{ "best_metric": 2.5881814793461846, "best_model_checkpoint": "/mnt/geminiszgmcephfs/geminicephfs/wx-dc-plt-hpc/yaoozhou/projects/ms-swift/output/grpo_geoloc_en_015_entity/v0-20250727-132257/checkpoint-1400", "epoch": 0.19170860292355618, "eval_steps": 200, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.0, "completions/mean_length": 175.3541717529297, "completions/min_length": 100.0, "epoch": 0.00011981787682722263, "grad_norm": 1.7581468060783427, "kl": 0.0, "learning_rate": 1.1904761904761903e-08, "loss": 1.8129746592876472e-07, "memory(GiB)": 131.24, "reward": 2.3585939407348633, "reward_std": 0.3267194330692291, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.27209389209747314, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4815104007720947, "rewards/GeoVisalEntityMatch2ORM/std": 0.1283111423254013, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 1, "train_speed(iter/s)": 0.00672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/mean_length": 178.75, "completions/min_length": 75.0, "epoch": 0.00023963575365444525, "grad_norm": 1.7157084964165181, "kl": 0.0, "learning_rate": 2.3809523809523807e-08, "loss": 7.351239901254303e-07, "memory(GiB)": 149.9, "reward": 2.1992740631103516, "reward_std": 0.18445752561092377, "rewards/GeoLocAccuracyV2ORM/mean": 0.793749988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.3606463372707367, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41594070196151733, "rewards/GeoVisalEntityMatch2ORM/std": 0.18779349327087402, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 2, "train_speed(iter/s)": 0.01144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.0, "completions/mean_length": 195.4166717529297, "completions/min_length": 86.0, "epoch": 0.0003594536304816679, "grad_norm": 1.7143790746927603, "kl": 0.0007833190029487014, "learning_rate": 3.571428571428571e-08, "loss": 1.385807991027832e-06, "memory(GiB)": 149.9, "reward": 1.804193377494812, "reward_std": 0.27644339203834534, "rewards/GeoLocAccuracyV2ORM/mean": 0.7020833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4560653567314148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.15419338643550873, "rewards/GeoVisalEntityMatch2ORM/std": 0.14132963120937347, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 3, "train_speed(iter/s)": 0.014866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.0, "completions/mean_length": 190.89584350585938, "completions/min_length": 88.0, "epoch": 0.0004792715073088905, "grad_norm": 1.5834985046591261, "kl": 0.0007023159705568105, "learning_rate": 4.7619047619047613e-08, "loss": 1.276532884730841e-06, "memory(GiB)": 149.9, "reward": 1.989699125289917, "reward_std": 0.26875272393226624, "rewards/GeoLocAccuracyV2ORM/mean": 0.7416666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.38707178831100464, "rewards/GeoVisalEntityMatch2ORM/mean": 0.25844907760620117, "rewards/GeoVisalEntityMatch2ORM/std": 0.1551644653081894, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 4, "train_speed(iter/s)": 0.017593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 168.23959350585938, "completions/min_length": 86.0, "epoch": 0.0005990893841361131, "grad_norm": 1.7594523311686259, "kl": 0.0007273690134752542, "learning_rate": 5.9523809523809515e-08, "loss": 1.1647741757769836e-06, "memory(GiB)": 149.9, "reward": 2.4003472328186035, "reward_std": 0.2918039560317993, "rewards/GeoLocAccuracyV2ORM/mean": 0.9604166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.19165712594985962, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4607639014720917, "rewards/GeoVisalEntityMatch2ORM/std": 0.1479908674955368, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 5, "train_speed(iter/s)": 0.018974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 204.36459350585938, "completions/min_length": 114.0, "epoch": 0.0007189072609633358, "grad_norm": 1.6947686330141463, "kl": 0.000758356269216165, "learning_rate": 7.142857142857142e-08, "loss": 9.387731552124023e-07, "memory(GiB)": 149.9, "reward": 1.9049031734466553, "reward_std": 0.46533069014549255, "rewards/GeoLocAccuracyV2ORM/mean": 0.5604166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.41813603043556213, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3861532211303711, "rewards/GeoVisalEntityMatch2ORM/std": 0.23472388088703156, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 6, "train_speed(iter/s)": 0.020067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 174.625, "completions/min_length": 94.0, "epoch": 0.0008387251377905584, "grad_norm": 1.7179689289441105, "kl": 0.0006929673254489899, "learning_rate": 8.333333333333333e-08, "loss": 1.0182460528085358e-06, "memory(GiB)": 149.9, "reward": 2.1612374782562256, "reward_std": 0.44080662727355957, "rewards/GeoLocAccuracyV2ORM/mean": 0.7875000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.3836802542209625, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4154040515422821, "rewards/GeoVisalEntityMatch2ORM/std": 0.20723789930343628, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 7, "train_speed(iter/s)": 0.021601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.0, "completions/mean_length": 174.125, "completions/min_length": 96.0, "epoch": 0.000958543014617781, "grad_norm": 1.7874616880989653, "kl": 0.0007318794087041169, "learning_rate": 9.523809523809523e-08, "loss": 8.617838602731354e-07, "memory(GiB)": 149.9, "reward": 2.099931240081787, "reward_std": 0.4606480002403259, "rewards/GeoLocAccuracyV2ORM/mean": 0.8374999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.3545939028263092, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3145143389701843, "rewards/GeoVisalEntityMatch2ORM/std": 0.1417779177427292, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 8, "train_speed(iter/s)": 0.022921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 197.58334350585938, "completions/min_length": 95.0, "epoch": 0.0010783608914450035, "grad_norm": 1.772691331142176, "kl": 0.0007295976975001395, "learning_rate": 1.0714285714285713e-07, "loss": 7.798274737069733e-07, "memory(GiB)": 149.9, "reward": 1.8786334991455078, "reward_std": 0.4196438789367676, "rewards/GeoLocAccuracyV2ORM/mean": 0.6541666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.43381214141845703, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2557167708873749, "rewards/GeoVisalEntityMatch2ORM/std": 0.19443608820438385, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 9, "train_speed(iter/s)": 0.024035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/mean_length": 190.125, "completions/min_length": 98.0, "epoch": 0.0011981787682722263, "grad_norm": 1.5457907063520793, "kl": 0.0007257974648382515, "learning_rate": 1.1904761904761903e-07, "loss": 1.0095536708831787e-06, "memory(GiB)": 149.9, "reward": 2.175520896911621, "reward_std": 0.4576098918914795, "rewards/GeoLocAccuracyV2ORM/mean": 0.7562500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.41569533944129944, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4817708432674408, "rewards/GeoVisalEntityMatch2ORM/std": 0.32965385913848877, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321177959442, "step": 10, "train_speed(iter/s)": 0.024983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 183.7916717529297, "completions/min_length": 86.0, "epoch": 0.0013179966450994488, "grad_norm": 1.6017997980503407, "kl": 0.0006859465211164206, "learning_rate": 1.3095238095238095e-07, "loss": 8.990367632577545e-07, "memory(GiB)": 149.9, "reward": 2.2125868797302246, "reward_std": 0.38113653659820557, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4120977520942688, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4292535185813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.2149493247270584, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 11, "train_speed(iter/s)": 0.025342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/mean_length": 199.875, "completions/min_length": 124.0, "epoch": 0.0014378145219266715, "grad_norm": 1.6192708119506003, "kl": 0.0007228805334307253, "learning_rate": 1.4285714285714285e-07, "loss": 7.102887025212112e-07, "memory(GiB)": 149.91, "reward": 2.0848565101623535, "reward_std": 0.34816378355026245, "rewards/GeoLocAccuracyV2ORM/mean": 0.8187500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.34589701890945435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.30777305364608765, "rewards/GeoVisalEntityMatch2ORM/std": 0.13172149658203125, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 12, "train_speed(iter/s)": 0.026079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.0, "completions/mean_length": 173.5, "completions/min_length": 87.0, "epoch": 0.001557632398753894, "grad_norm": 1.7229542165597667, "kl": 0.0007288863998837769, "learning_rate": 1.5476190476190475e-07, "loss": 1.1573235951800598e-06, "memory(GiB)": 149.91, "reward": 2.0706441402435303, "reward_std": 0.2313312441110611, "rewards/GeoLocAccuracyV2ORM/mean": 0.7562500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.37298583984375, "rewards/GeoVisalEntityMatch2ORM/mean": 0.32481062412261963, "rewards/GeoVisalEntityMatch2ORM/std": 0.14179018139839172, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 13, "train_speed(iter/s)": 0.026777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 184.61459350585938, "completions/min_length": 97.0, "epoch": 0.0016774502755811168, "grad_norm": 1.7646055450525646, "kl": 0.000725048070307821, "learning_rate": 1.6666666666666665e-07, "loss": 9.78509660853888e-07, "memory(GiB)": 149.91, "reward": 1.9592303037643433, "reward_std": 0.3775780200958252, "rewards/GeoLocAccuracyV2ORM/mean": 0.6312500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.42384517192840576, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3592303395271301, "rewards/GeoVisalEntityMatch2ORM/std": 0.252228707075119, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 14, "train_speed(iter/s)": 0.027409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 194.34375, "completions/min_length": 91.0, "epoch": 0.0017972681524083393, "grad_norm": 1.7287765177888839, "kl": 0.0007686361495871097, "learning_rate": 1.7857142857142858e-07, "loss": 8.841356020639068e-07, "memory(GiB)": 149.91, "reward": 1.964211344718933, "reward_std": 0.40976881980895996, "rewards/GeoLocAccuracyV2ORM/mean": 0.6729166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4477967619895935, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3329613208770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.14686527848243713, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 15, "train_speed(iter/s)": 0.027932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 207.20834350585938, "completions/min_length": 88.0, "epoch": 0.001917086029235562, "grad_norm": 1.5821394499686423, "kl": 0.000746031990274787, "learning_rate": 1.9047619047619045e-07, "loss": 9.971361123461975e-07, "memory(GiB)": 149.91, "reward": 1.7285341024398804, "reward_std": 0.4301055073738098, "rewards/GeoLocAccuracyV2ORM/mean": 0.5104166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.48546865582466125, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3118675947189331, "rewards/GeoVisalEntityMatch2ORM/std": 0.1401132196187973, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 16, "train_speed(iter/s)": 0.027994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.0, "completions/mean_length": 195.14584350585938, "completions/min_length": 104.0, "epoch": 0.0020369039060627848, "grad_norm": 1.7101101689608207, "kl": 0.0007092972518876195, "learning_rate": 2.0238095238095238e-07, "loss": 6.835907697677612e-07, "memory(GiB)": 149.91, "reward": 2.028141736984253, "reward_std": 0.42931345105171204, "rewards/GeoLocAccuracyV2ORM/mean": 0.7437500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.40492039918899536, "rewards/GeoVisalEntityMatch2ORM/mean": 0.29480820894241333, "rewards/GeoVisalEntityMatch2ORM/std": 0.19839759171009064, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 17, "train_speed(iter/s)": 0.028441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 165.89584350585938, "completions/min_length": 87.0, "epoch": 0.002156721782890007, "grad_norm": 1.7222059649178196, "kl": 0.0006932299584150314, "learning_rate": 2.1428571428571426e-07, "loss": 1.3137857877154602e-06, "memory(GiB)": 149.91, "reward": 2.107870578765869, "reward_std": 0.18980836868286133, "rewards/GeoLocAccuracyV2ORM/mean": 0.78125, "rewards/GeoLocAccuracyV2ORM/std": 0.3613753318786621, "rewards/GeoVisalEntityMatch2ORM/mean": 0.337037056684494, "rewards/GeoVisalEntityMatch2ORM/std": 0.19643785059452057, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 18, "train_speed(iter/s)": 0.028885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 197.09375, "completions/min_length": 104.0, "epoch": 0.00227653965971723, "grad_norm": 1.6590728373896326, "kl": 0.0007320639560930431, "learning_rate": 2.2619047619047619e-07, "loss": 1.3361375295062317e-06, "memory(GiB)": 149.91, "reward": 1.7783068418502808, "reward_std": 0.3316125273704529, "rewards/GeoLocAccuracyV2ORM/mean": 0.37916669249534607, "rewards/GeoLocAccuracyV2ORM/std": 0.4190004765987396, "rewards/GeoVisalEntityMatch2ORM/mean": 0.45122355222702026, "rewards/GeoVisalEntityMatch2ORM/std": 0.22141997516155243, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 19, "train_speed(iter/s)": 0.029305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/mean_length": 171.0729217529297, "completions/min_length": 87.0, "epoch": 0.0023963575365444525, "grad_norm": 1.8246293336864596, "kl": 0.0006629404088016599, "learning_rate": 2.3809523809523806e-07, "loss": 1.194576498164679e-06, "memory(GiB)": 149.91, "reward": 2.0148236751556396, "reward_std": 0.29947328567504883, "rewards/GeoLocAccuracyV2ORM/mean": 0.8041666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3635835349559784, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2106570452451706, "rewards/GeoVisalEntityMatch2ORM/std": 0.16927123069763184, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 20, "train_speed(iter/s)": 0.02969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/mean_length": 163.1666717529297, "completions/min_length": 99.0, "epoch": 0.0025161754133716753, "grad_norm": 1.7979943495456203, "kl": 0.0006868165219202638, "learning_rate": 2.5e-07, "loss": 9.437402468392975e-07, "memory(GiB)": 149.91, "reward": 2.284970283508301, "reward_std": 0.2580674886703491, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930106818675995, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3891369104385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.19548027217388153, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 21, "train_speed(iter/s)": 0.030031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.0, "completions/mean_length": 177.46875, "completions/min_length": 91.0, "epoch": 0.0026359932901988976, "grad_norm": 1.704791323034548, "kl": 0.0007367988582700491, "learning_rate": 2.619047619047619e-07, "loss": 5.563100557992584e-07, "memory(GiB)": 149.91, "reward": 1.728116750717163, "reward_std": 0.33947432041168213, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4486236274242401, "rewards/GeoVisalEntityMatch2ORM/mean": 0.21978339552879333, "rewards/GeoVisalEntityMatch2ORM/std": 0.17860101163387299, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 22, "train_speed(iter/s)": 0.030502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.0, "completions/mean_length": 185.875, "completions/min_length": 94.0, "epoch": 0.0027558111670261203, "grad_norm": 1.7636233129260799, "kl": 0.0007401038601528853, "learning_rate": 2.7380952380952385e-07, "loss": 1.1076530199716217e-06, "memory(GiB)": 149.91, "reward": 2.232422113418579, "reward_std": 0.30640122294425964, "rewards/GeoLocAccuracyV2ORM/mean": 0.8791667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.32246309518814087, "rewards/GeoVisalEntityMatch2ORM/mean": 0.37408876419067383, "rewards/GeoVisalEntityMatch2ORM/std": 0.21205584704875946, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 23, "train_speed(iter/s)": 0.030246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/mean_length": 193.6354217529297, "completions/min_length": 93.0, "epoch": 0.002875629043853343, "grad_norm": 1.6030691240592525, "kl": 0.0007341611490119249, "learning_rate": 2.857142857142857e-07, "loss": 7.82310962677002e-07, "memory(GiB)": 149.91, "reward": 1.9395755529403687, "reward_std": 0.3054536283016205, "rewards/GeoLocAccuracyV2ORM/mean": 0.5562500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.46989643573760986, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4041587710380554, "rewards/GeoVisalEntityMatch2ORM/std": 0.22572855651378632, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 24, "train_speed(iter/s)": 0.030538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.0, "completions/mean_length": 183.8854217529297, "completions/min_length": 84.0, "epoch": 0.0029954469206805653, "grad_norm": 1.8714888422547544, "kl": 0.0007430327241308987, "learning_rate": 2.976190476190476e-07, "loss": 1.092751858777774e-06, "memory(GiB)": 149.91, "reward": 1.9186840057373047, "reward_std": 0.1638755053281784, "rewards/GeoLocAccuracyV2ORM/mean": 0.6062500476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.4046603739261627, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3228505253791809, "rewards/GeoVisalEntityMatch2ORM/std": 0.17567868530750275, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 25, "train_speed(iter/s)": 0.030786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.0, "completions/mean_length": 198.4791717529297, "completions/min_length": 99.0, "epoch": 0.003115264797507788, "grad_norm": 1.639045368986647, "kl": 0.0007541380182374269, "learning_rate": 3.095238095238095e-07, "loss": 8.568167686462402e-07, "memory(GiB)": 149.91, "reward": 2.035590410232544, "reward_std": 0.495903879404068, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906978726387024, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3064236342906952, "rewards/GeoVisalEntityMatch2ORM/std": 0.20519381761550903, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 26, "train_speed(iter/s)": 0.031016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 177.78125, "completions/min_length": 85.0, "epoch": 0.003235082674335011, "grad_norm": 1.8700098436125294, "kl": 0.0007389492820948362, "learning_rate": 3.2142857142857145e-07, "loss": 9.834766387939453e-07, "memory(GiB)": 154.56, "reward": 1.946303367614746, "reward_std": 0.3673256039619446, "rewards/GeoLocAccuracyV2ORM/mean": 0.5708333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.4164806604385376, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3858867287635803, "rewards/GeoVisalEntityMatch2ORM/std": 0.21846729516983032, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 27, "train_speed(iter/s)": 0.031119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 168.90625, "completions/min_length": 89.0, "epoch": 0.0033549005511622335, "grad_norm": 1.749366249322508, "kl": 0.0008063678396865726, "learning_rate": 3.333333333333333e-07, "loss": 1.430511474609375e-06, "memory(GiB)": 154.56, "reward": 2.1007442474365234, "reward_std": 0.25205379724502563, "rewards/GeoLocAccuracyV2ORM/mean": 0.8812500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.2895777225494385, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2299107164144516, "rewards/GeoVisalEntityMatch2ORM/std": 0.15423116087913513, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 28, "train_speed(iter/s)": 0.031331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.0, "completions/mean_length": 197.25, "completions/min_length": 94.0, "epoch": 0.003474718427989456, "grad_norm": 1.6642263199020073, "kl": 0.000844427733682096, "learning_rate": 3.452380952380952e-07, "loss": 9.98377799987793e-07, "memory(GiB)": 154.56, "reward": 2.0547618865966797, "reward_std": 0.2707064747810364, "rewards/GeoLocAccuracyV2ORM/mean": 0.7041667699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.40416690707206726, "rewards/GeoVisalEntityMatch2ORM/mean": 0.37142857909202576, "rewards/GeoVisalEntityMatch2ORM/std": 0.1963461935520172, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 29, "train_speed(iter/s)": 0.031527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/mean_length": 217.5729217529297, "completions/min_length": 108.0, "epoch": 0.0035945363048166786, "grad_norm": 1.6854988048518846, "kl": 0.0009047830244526267, "learning_rate": 3.5714285714285716e-07, "loss": 1.003344891614688e-06, "memory(GiB)": 154.56, "reward": 1.8308614492416382, "reward_std": 0.4611669182777405, "rewards/GeoLocAccuracyV2ORM/mean": 0.5166666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.42418137192726135, "rewards/GeoVisalEntityMatch2ORM/mean": 0.36627814173698425, "rewards/GeoVisalEntityMatch2ORM/std": 0.16248910129070282, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 30, "train_speed(iter/s)": 0.031388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/mean_length": 176.8125, "completions/min_length": 93.0, "epoch": 0.0037143541816439013, "grad_norm": 1.668518105739412, "kl": 0.0009356598893646151, "learning_rate": 3.6904761904761906e-07, "loss": 6.60618184156192e-07, "memory(GiB)": 154.56, "reward": 2.1884796619415283, "reward_std": 0.3437362313270569, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.2957521080970764, "rewards/GeoVisalEntityMatch2ORM/mean": 0.340563029050827, "rewards/GeoVisalEntityMatch2ORM/std": 0.18387968838214874, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 31, "train_speed(iter/s)": 0.031584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 166.73959350585938, "completions/min_length": 92.0, "epoch": 0.003834172058471124, "grad_norm": 1.8798836191595358, "kl": 0.0008874457562342286, "learning_rate": 3.809523809523809e-07, "loss": 1.3013681154916412e-06, "memory(GiB)": 154.56, "reward": 2.353625535964966, "reward_std": 0.19326967000961304, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3744588792324066, "rewards/GeoVisalEntityMatch2ORM/std": 0.14001324772834778, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 32, "train_speed(iter/s)": 0.031743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 174.11459350585938, "completions/min_length": 82.0, "epoch": 0.003953989935298346, "grad_norm": 1.83988150021088, "kl": 0.0009149688121397048, "learning_rate": 3.928571428571428e-07, "loss": 1.0902683698077453e-06, "memory(GiB)": 154.56, "reward": 2.1270835399627686, "reward_std": 0.23169833421707153, "rewards/GeoLocAccuracyV2ORM/mean": 0.7625000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.38422855734825134, "rewards/GeoVisalEntityMatch2ORM/mean": 0.375, "rewards/GeoVisalEntityMatch2ORM/std": 0.1919429749250412, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 33, "train_speed(iter/s)": 0.031543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 184.71875, "completions/min_length": 94.0, "epoch": 0.0040738078121255695, "grad_norm": 1.791313568763152, "kl": 0.0010893746512010694, "learning_rate": 4.0476190476190476e-07, "loss": 7.897615432739258e-07, "memory(GiB)": 154.56, "reward": 2.1131324768066406, "reward_std": 0.3596246838569641, "rewards/GeoLocAccuracyV2ORM/mean": 0.8708333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.32956644892692566, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2735491394996643, "rewards/GeoVisalEntityMatch2ORM/std": 0.18965497612953186, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 34, "train_speed(iter/s)": 0.031681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/mean_length": 198.0625, "completions/min_length": 104.0, "epoch": 0.004193625688952792, "grad_norm": 1.6055974709843692, "kl": 0.0010668168542906642, "learning_rate": 4.1666666666666667e-07, "loss": 1.1759500466723694e-06, "memory(GiB)": 154.56, "reward": 2.153125286102295, "reward_std": 0.41466090083122253, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3908099830150604, "rewards/GeoVisalEntityMatch2ORM/mean": 0.39479169249534607, "rewards/GeoVisalEntityMatch2ORM/std": 0.15886513888835907, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 35, "train_speed(iter/s)": 0.031454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 196.8229217529297, "completions/min_length": 107.0, "epoch": 0.004313443565780014, "grad_norm": 1.789152033715451, "kl": 0.0012499213335104287, "learning_rate": 4.285714285714285e-07, "loss": 1.5149514638324035e-06, "memory(GiB)": 154.56, "reward": 1.7642858028411865, "reward_std": 0.3759126663208008, "rewards/GeoLocAccuracyV2ORM/mean": 0.543749988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4730222225189209, "rewards/GeoVisalEntityMatch2ORM/mean": 0.23095238208770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.1507129967212677, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 36, "train_speed(iter/s)": 0.031602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/mean_length": 172.0729217529297, "completions/min_length": 98.0, "epoch": 0.004433261442607237, "grad_norm": 1.8000041780246447, "kl": 0.0012955335550941527, "learning_rate": 4.4047619047619047e-07, "loss": 2.625087972774054e-06, "memory(GiB)": 154.56, "reward": 2.4814815521240234, "reward_std": 0.14352922141551971, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49189814925193787, "rewards/GeoVisalEntityMatch2ORM/std": 0.2989698648452759, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 37, "train_speed(iter/s)": 0.03176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 209.5416717529297, "completions/min_length": 106.0, "epoch": 0.00455307931943446, "grad_norm": 1.535556607295948, "kl": 0.001382035727147013, "learning_rate": 4.5238095238095237e-07, "loss": 1.7633041125009186e-06, "memory(GiB)": 154.56, "reward": 2.0784101486206055, "reward_std": 0.31646886467933655, "rewards/GeoLocAccuracyV2ORM/mean": 0.6270833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.46667447686195374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4825769066810608, "rewards/GeoVisalEntityMatch2ORM/std": 0.2226075530052185, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 38, "train_speed(iter/s)": 0.03179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 162.48959350585938, "completions/min_length": 93.0, "epoch": 0.004672897196261682, "grad_norm": 1.7140280114539705, "kl": 0.00144655053736642, "learning_rate": 4.6428571428571427e-07, "loss": 1.773238182067871e-06, "memory(GiB)": 154.56, "reward": 2.195862293243408, "reward_std": 0.2411021888256073, "rewards/GeoLocAccuracyV2ORM/mean": 0.9104167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.28115519881248474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2958622872829437, "rewards/GeoVisalEntityMatch2ORM/std": 0.15968717634677887, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 39, "train_speed(iter/s)": 0.031614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/mean_length": 166.78125, "completions/min_length": 107.0, "epoch": 0.004792715073088905, "grad_norm": 1.79006379527013, "kl": 0.0017114334623329341, "learning_rate": 4.761904761904761e-07, "loss": 1.7782052736947662e-06, "memory(GiB)": 159.84, "reward": 2.3145835399627686, "reward_std": 0.34146952629089355, "rewards/GeoLocAccuracyV2ORM/mean": 0.8979166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.2872662842273712, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4270833432674408, "rewards/GeoVisalEntityMatch2ORM/std": 0.27156490087509155, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 40, "train_speed(iter/s)": 0.031532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 185.52084350585938, "completions/min_length": 96.0, "epoch": 0.004912532949916127, "grad_norm": 1.5583830403579055, "kl": 0.001731376105453819, "learning_rate": 4.880952380952381e-07, "loss": 1.969436880244757e-06, "memory(GiB)": 159.84, "reward": 2.244246244430542, "reward_std": 0.3710552155971527, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3427187502384186, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4109126925468445, "rewards/GeoVisalEntityMatch2ORM/std": 0.1466943621635437, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 41, "train_speed(iter/s)": 0.031587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.0, "completions/mean_length": 172.17709350585938, "completions/min_length": 82.0, "epoch": 0.0050323508267433505, "grad_norm": 1.8221064403013878, "kl": 0.002109266584739089, "learning_rate": 5e-07, "loss": 2.5692086182971252e-06, "memory(GiB)": 159.84, "reward": 2.188541889190674, "reward_std": 0.33253246545791626, "rewards/GeoLocAccuracyV2ORM/mean": 0.8479166030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.34427013993263245, "rewards/GeoVisalEntityMatch2ORM/mean": 0.35104167461395264, "rewards/GeoVisalEntityMatch2ORM/std": 0.1419447958469391, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 42, "train_speed(iter/s)": 0.031706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/mean_length": 188.0, "completions/min_length": 110.0, "epoch": 0.005152168703570573, "grad_norm": 1.6564406260552202, "kl": 0.0022712050704285502, "learning_rate": 5.119047619047619e-07, "loss": 2.5754175112524536e-06, "memory(GiB)": 159.84, "reward": 2.201305389404297, "reward_std": 0.1581868827342987, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.44073307514190674, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46797215938568115, "rewards/GeoVisalEntityMatch2ORM/std": 0.17381145060062408, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 43, "train_speed(iter/s)": 0.031839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 173.5625, "completions/min_length": 91.0, "epoch": 0.005271986580397795, "grad_norm": 1.8076737846748299, "kl": 0.0022422191686928272, "learning_rate": 5.238095238095238e-07, "loss": 2.4586915969848633e-06, "memory(GiB)": 159.84, "reward": 2.1750869750976562, "reward_std": 0.3201717734336853, "rewards/GeoLocAccuracyV2ORM/mean": 0.8708333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.3029388189315796, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3250868320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.18786463141441345, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 44, "train_speed(iter/s)": 0.03197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 195.4375, "completions/min_length": 100.0, "epoch": 0.005391804457225018, "grad_norm": 1.7414855979757458, "kl": 0.0026320208562538028, "learning_rate": 5.357142857142857e-07, "loss": 2.8014183044433594e-06, "memory(GiB)": 159.84, "reward": 2.1659059524536133, "reward_std": 0.21400481462478638, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.21521101891994476, "rewards/GeoVisalEntityMatch2ORM/mean": 0.21590611338615417, "rewards/GeoVisalEntityMatch2ORM/std": 0.15635822713375092, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 45, "train_speed(iter/s)": 0.032074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/mean_length": 170.30209350585938, "completions/min_length": 100.0, "epoch": 0.005511622334052241, "grad_norm": 1.657464913717482, "kl": 0.003119782777503133, "learning_rate": 5.476190476190477e-07, "loss": 3.4421682357788086e-06, "memory(GiB)": 159.84, "reward": 2.288541793823242, "reward_std": 0.2990618944168091, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.3195391595363617, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41354167461395264, "rewards/GeoVisalEntityMatch2ORM/std": 0.16233967244625092, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 46, "train_speed(iter/s)": 0.032188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.0, "completions/mean_length": 182.39584350585938, "completions/min_length": 99.0, "epoch": 0.005631440210879463, "grad_norm": 1.6010199956487376, "kl": 0.0024017203832045197, "learning_rate": 5.595238095238095e-07, "loss": 2.8908252716064453e-06, "memory(GiB)": 159.84, "reward": 2.301909923553467, "reward_std": 0.3039586842060089, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204164206981659, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4706597328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.20844219624996185, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 47, "train_speed(iter/s)": 0.032309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 185.14584350585938, "completions/min_length": 91.0, "epoch": 0.005751258087706686, "grad_norm": 1.6244707095000035, "kl": 0.0028634765185415745, "learning_rate": 5.714285714285714e-07, "loss": 3.183881517543341e-06, "memory(GiB)": 159.84, "reward": 2.086458444595337, "reward_std": 0.31366753578186035, "rewards/GeoLocAccuracyV2ORM/mean": 0.6645833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.4490877687931061, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4322916865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.1987433135509491, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 48, "train_speed(iter/s)": 0.032426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 388.0, "completions/mean_length": 200.7291717529297, "completions/min_length": 121.0, "epoch": 0.005871075964533908, "grad_norm": 1.7320187379471905, "kl": 0.0024698697961866856, "learning_rate": 5.833333333333334e-07, "loss": 2.4201970063586487e-06, "memory(GiB)": 159.85, "reward": 1.8364953994750977, "reward_std": 0.22414100170135498, "rewards/GeoLocAccuracyV2ORM/mean": 0.5520833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.49989035725593567, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2844122052192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.15132880210876465, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 49, "train_speed(iter/s)": 0.032349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.0, "completions/mean_length": 171.28125, "completions/min_length": 95.0, "epoch": 0.005990893841361131, "grad_norm": 1.6947193085987329, "kl": 0.0031521241180598736, "learning_rate": 5.952380952380952e-07, "loss": 4.361073479230981e-06, "memory(GiB)": 165.42, "reward": 2.176785945892334, "reward_std": 0.1936403214931488, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.36418014764785767, "rewards/GeoVisalEntityMatch2ORM/mean": 0.37886905670166016, "rewards/GeoVisalEntityMatch2ORM/std": 0.15254296362400055, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 50, "train_speed(iter/s)": 0.032172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 174.15625, "completions/min_length": 107.0, "epoch": 0.006110711718188354, "grad_norm": 1.6196258702823163, "kl": 0.003416712745092809, "learning_rate": 6.071428571428571e-07, "loss": 4.3138861656188965e-06, "memory(GiB)": 165.42, "reward": 2.433333396911621, "reward_std": 0.16205298900604248, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.45000001788139343, "rewards/GeoVisalEntityMatch2ORM/std": 0.3357805013656616, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 51, "train_speed(iter/s)": 0.032271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.0, "completions/mean_length": 168.70834350585938, "completions/min_length": 105.0, "epoch": 0.006230529595015576, "grad_norm": 1.8887269302419787, "kl": 0.003613047651015222, "learning_rate": 6.19047619047619e-07, "loss": 4.236896984366467e-06, "memory(GiB)": 165.42, "reward": 2.310925006866455, "reward_std": 0.11018089950084686, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.319258451461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.1702468991279602, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 52, "train_speed(iter/s)": 0.032369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.0, "completions/mean_length": 203.65625, "completions/min_length": 120.0, "epoch": 0.006350347471842799, "grad_norm": 1.6096402243848424, "kl": 0.003548738663084805, "learning_rate": 6.309523809523809e-07, "loss": 4.708766937255859e-06, "memory(GiB)": 165.42, "reward": 1.792184829711914, "reward_std": 0.262237548828125, "rewards/GeoLocAccuracyV2ORM/mean": 0.4166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.46941545605659485, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3755180239677429, "rewards/GeoVisalEntityMatch2ORM/std": 0.15732590854167938, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 53, "train_speed(iter/s)": 0.032384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/mean_length": 190.17709350585938, "completions/min_length": 118.0, "epoch": 0.006470165348670022, "grad_norm": 1.7674011441910553, "kl": 0.004421151010319591, "learning_rate": 6.428571428571429e-07, "loss": 4.738569259643555e-06, "memory(GiB)": 165.42, "reward": 2.018981695175171, "reward_std": 0.28824156522750854, "rewards/GeoLocAccuracyV2ORM/mean": 0.6354166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4838397204875946, "rewards/GeoVisalEntityMatch2ORM/mean": 0.383564829826355, "rewards/GeoVisalEntityMatch2ORM/std": 0.19244995713233948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 54, "train_speed(iter/s)": 0.032436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 182.21875, "completions/min_length": 98.0, "epoch": 0.006589983225497244, "grad_norm": 1.717339889440152, "kl": 0.005359603324905038, "learning_rate": 6.547619047619047e-07, "loss": 6.141762241895776e-06, "memory(GiB)": 165.42, "reward": 2.117534637451172, "reward_std": 0.3023472726345062, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3646675944328308, "rewards/GeoVisalEntityMatch2ORM/mean": 0.32586807012557983, "rewards/GeoVisalEntityMatch2ORM/std": 0.1641007661819458, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 55, "train_speed(iter/s)": 0.032365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 210.34375, "completions/min_length": 113.0, "epoch": 0.006709801102324467, "grad_norm": 1.5866599772173402, "kl": 0.004566613119095564, "learning_rate": 6.666666666666666e-07, "loss": 4.932284355163574e-06, "memory(GiB)": 165.42, "reward": 2.149566173553467, "reward_std": 0.2987133264541626, "rewards/GeoLocAccuracyV2ORM/mean": 0.7229166030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.4268561005592346, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4266493320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.19076943397521973, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 56, "train_speed(iter/s)": 0.032457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 196.86459350585938, "completions/min_length": 113.0, "epoch": 0.006829618979151689, "grad_norm": 1.769370242090966, "kl": 0.005360053153708577, "learning_rate": 6.785714285714286e-07, "loss": 6.128102540969849e-06, "memory(GiB)": 165.42, "reward": 2.6046133041381836, "reward_std": 0.15195152163505554, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6212798357009888, "rewards/GeoVisalEntityMatch2ORM/std": 0.2418520152568817, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 57, "train_speed(iter/s)": 0.03255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 181.125, "completions/min_length": 111.0, "epoch": 0.006949436855978912, "grad_norm": 1.7072657895224528, "kl": 0.006655579665675759, "learning_rate": 6.904761904761904e-07, "loss": 7.048249244689941e-06, "memory(GiB)": 165.42, "reward": 2.0025670528411865, "reward_std": 0.23363575339317322, "rewards/GeoLocAccuracyV2ORM/mean": 0.6750000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.45445162057876587, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3275669813156128, "rewards/GeoVisalEntityMatch2ORM/std": 0.1829240918159485, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 58, "train_speed(iter/s)": 0.032625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 187.1666717529297, "completions/min_length": 120.0, "epoch": 0.007069254732806135, "grad_norm": 1.6137069675054219, "kl": 0.006707850843667984, "learning_rate": 7.023809523809523e-07, "loss": 7.035831913526636e-06, "memory(GiB)": 165.42, "reward": 2.340451717376709, "reward_std": 0.2953152358531952, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2783094346523285, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4446180760860443, "rewards/GeoVisalEntityMatch2ORM/std": 0.19140133261680603, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 59, "train_speed(iter/s)": 0.032712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 206.5, "completions/min_length": 123.0, "epoch": 0.007189072609633357, "grad_norm": 1.5338788433003203, "kl": 0.007577173411846161, "learning_rate": 7.142857142857143e-07, "loss": 7.874022230680566e-06, "memory(GiB)": 165.42, "reward": 2.1688342094421387, "reward_std": 0.344182550907135, "rewards/GeoLocAccuracyV2ORM/mean": 0.8937500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.2987033426761627, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2855008542537689, "rewards/GeoVisalEntityMatch2ORM/std": 0.16732093691825867, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 60, "train_speed(iter/s)": 0.032783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 185.1875, "completions/min_length": 114.0, "epoch": 0.00730889048646058, "grad_norm": 1.7084007005694668, "kl": 0.007568043423816562, "learning_rate": 7.261904761904761e-07, "loss": 8.64267349243164e-06, "memory(GiB)": 165.42, "reward": 2.062037229537964, "reward_std": 0.1027362272143364, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.31203705072402954, "rewards/GeoVisalEntityMatch2ORM/std": 0.12637975811958313, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 61, "train_speed(iter/s)": 0.032856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 208.09375, "completions/min_length": 122.0, "epoch": 0.007428708363287803, "grad_norm": 1.6180242884427227, "kl": 0.007990859914571047, "learning_rate": 7.380952380952381e-07, "loss": 8.694827556610107e-06, "memory(GiB)": 165.76, "reward": 2.322068691253662, "reward_std": 0.258143812417984, "rewards/GeoLocAccuracyV2ORM/mean": 0.9458333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.21221472322940826, "rewards/GeoVisalEntityMatch2ORM/mean": 0.38665199279785156, "rewards/GeoVisalEntityMatch2ORM/std": 0.18348032236099243, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 62, "train_speed(iter/s)": 0.032925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.0, "completions/mean_length": 209.625, "completions/min_length": 129.0, "epoch": 0.007548526240115025, "grad_norm": 1.6287911624626736, "kl": 0.007761480053886771, "learning_rate": 7.5e-07, "loss": 8.38624964671908e-06, "memory(GiB)": 165.76, "reward": 2.1193418502807617, "reward_std": 0.23806244134902954, "rewards/GeoLocAccuracyV2ORM/mean": 0.7229167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.3853854537010193, "rewards/GeoVisalEntityMatch2ORM/mean": 0.40684184432029724, "rewards/GeoVisalEntityMatch2ORM/std": 0.16147074103355408, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 63, "train_speed(iter/s)": 0.032995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.0, "completions/mean_length": 202.71875, "completions/min_length": 127.0, "epoch": 0.007668344116942248, "grad_norm": 1.5864662853361178, "kl": 0.00875511672347784, "learning_rate": 7.619047619047618e-07, "loss": 9.325643986812793e-06, "memory(GiB)": 165.76, "reward": 2.101835250854492, "reward_std": 0.23912425339221954, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3018353283405304, "rewards/GeoVisalEntityMatch2ORM/std": 0.1146685853600502, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 64, "train_speed(iter/s)": 0.032852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.0, "completions/mean_length": 229.96875, "completions/min_length": 123.0, "epoch": 0.00778816199376947, "grad_norm": 1.5288203587235907, "kl": 0.00882829912006855, "learning_rate": 7.738095238095238e-07, "loss": 8.794168934400659e-06, "memory(GiB)": 165.76, "reward": 1.9835481643676758, "reward_std": 0.26194190979003906, "rewards/GeoLocAccuracyV2ORM/mean": 0.8062500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.34756675362586975, "rewards/GeoVisalEntityMatch2ORM/mean": 0.17729829251766205, "rewards/GeoVisalEntityMatch2ORM/std": 0.1619838923215866, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 65, "train_speed(iter/s)": 0.032915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 384.0, "completions/mean_length": 222.89584350585938, "completions/min_length": 141.0, "epoch": 0.007907979870596693, "grad_norm": 1.5296085093359766, "kl": 0.009489203337579966, "learning_rate": 7.857142857142856e-07, "loss": 1.0438263416290283e-05, "memory(GiB)": 165.76, "reward": 2.323111057281494, "reward_std": 0.24801242351531982, "rewards/GeoLocAccuracyV2ORM/mean": 0.9125000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.2592499256134033, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41061097383499146, "rewards/GeoVisalEntityMatch2ORM/std": 0.1482621282339096, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 66, "train_speed(iter/s)": 0.032967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/mean_length": 193.11459350585938, "completions/min_length": 102.0, "epoch": 0.008027797747423916, "grad_norm": 1.5737158697166138, "kl": 0.010443256236612797, "learning_rate": 7.976190476190476e-07, "loss": 1.103430986404419e-05, "memory(GiB)": 165.76, "reward": 2.308940887451172, "reward_std": 0.17163051664829254, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.32977432012557983, "rewards/GeoVisalEntityMatch2ORM/std": 0.21250644326210022, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 67, "train_speed(iter/s)": 0.033031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.0, "completions/mean_length": 220.55209350585938, "completions/min_length": 107.0, "epoch": 0.008147615624251139, "grad_norm": 1.7074207876577776, "kl": 0.009552914649248123, "learning_rate": 8.095238095238095e-07, "loss": 1.0341405868530273e-05, "memory(GiB)": 165.76, "reward": 2.157860040664673, "reward_std": 0.31251394748687744, "rewards/GeoLocAccuracyV2ORM/mean": 0.8145833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.3424306809902191, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3432765007019043, "rewards/GeoVisalEntityMatch2ORM/std": 0.15940909087657928, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 68, "train_speed(iter/s)": 0.033094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/mean_length": 211.14584350585938, "completions/min_length": 125.0, "epoch": 0.00826743350107836, "grad_norm": 1.6188129559316222, "kl": 0.01189803658053279, "learning_rate": 8.214285714285713e-07, "loss": 1.2074908227077685e-05, "memory(GiB)": 165.76, "reward": 2.4745898246765137, "reward_std": 0.16924312710762024, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48500633239746094, "rewards/GeoVisalEntityMatch2ORM/std": 0.24134904146194458, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 69, "train_speed(iter/s)": 0.033142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.0, "completions/mean_length": 222.2916717529297, "completions/min_length": 130.0, "epoch": 0.008387251377905584, "grad_norm": 1.532902677468884, "kl": 0.012799838092178106, "learning_rate": 8.333333333333333e-07, "loss": 1.36643648147583e-05, "memory(GiB)": 165.76, "reward": 2.088477611541748, "reward_std": 0.2865196168422699, "rewards/GeoLocAccuracyV2ORM/mean": 0.5604166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.4835132956504822, "rewards/GeoVisalEntityMatch2ORM/mean": 0.528060793876648, "rewards/GeoVisalEntityMatch2ORM/std": 0.15923555195331573, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 70, "train_speed(iter/s)": 0.032996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.0, "completions/mean_length": 210.03125, "completions/min_length": 105.0, "epoch": 0.008507069254732807, "grad_norm": 1.5918257484962008, "kl": 0.013972037937492132, "learning_rate": 8.452380952380952e-07, "loss": 1.4178454875946045e-05, "memory(GiB)": 165.76, "reward": 1.9668114185333252, "reward_std": 0.28023892641067505, "rewards/GeoLocAccuracyV2ORM/mean": 0.5895833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.4442567229270935, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3772280216217041, "rewards/GeoVisalEntityMatch2ORM/std": 0.16006295382976532, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 71, "train_speed(iter/s)": 0.033054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 248.6979217529297, "completions/min_length": 151.0, "epoch": 0.008626887131560028, "grad_norm": 1.396143518914335, "kl": 0.013660897966474295, "learning_rate": 8.57142857142857e-07, "loss": 1.439948937331792e-05, "memory(GiB)": 165.76, "reward": 2.3348801136016846, "reward_std": 0.20197856426239014, "rewards/GeoLocAccuracyV2ORM/mean": 0.9458333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.21221472322940826, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3890467584133148, "rewards/GeoVisalEntityMatch2ORM/std": 0.1459609568119049, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 72, "train_speed(iter/s)": 0.033098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 259.84375, "completions/min_length": 110.0, "epoch": 0.008746705008387251, "grad_norm": 1.5751941851976772, "kl": 0.017634215764701366, "learning_rate": 8.69047619047619e-07, "loss": 1.7931064576259814e-05, "memory(GiB)": 165.76, "reward": 2.134248733520508, "reward_std": 0.33876460790634155, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4180101156234741, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3988320827484131, "rewards/GeoVisalEntityMatch2ORM/std": 0.21251380443572998, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 73, "train_speed(iter/s)": 0.033013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/mean_length": 245.2916717529297, "completions/min_length": 144.0, "epoch": 0.008866522885214475, "grad_norm": 1.5190951887401083, "kl": 0.019781431183218956, "learning_rate": 8.809523809523809e-07, "loss": 2.002219480345957e-05, "memory(GiB)": 165.76, "reward": 2.23587965965271, "reward_std": 0.26883164048194885, "rewards/GeoLocAccuracyV2ORM/mean": 0.8187500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.36944127082824707, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41712963581085205, "rewards/GeoVisalEntityMatch2ORM/std": 0.1314280778169632, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 74, "train_speed(iter/s)": 0.03307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 267.0, "completions/min_length": 134.0, "epoch": 0.008986340762041696, "grad_norm": 1.438795378987653, "kl": 0.02144565060734749, "learning_rate": 8.928571428571428e-07, "loss": 2.2269785404205322e-05, "memory(GiB)": 165.76, "reward": 2.073148250579834, "reward_std": 0.28739139437675476, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3964757025241852, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4064815044403076, "rewards/GeoVisalEntityMatch2ORM/std": 0.15860456228256226, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 75, "train_speed(iter/s)": 0.033109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 277.88543701171875, "completions/min_length": 178.0, "epoch": 0.00910615863886892, "grad_norm": 1.3622712018257448, "kl": 0.021255861967802048, "learning_rate": 9.047619047619047e-07, "loss": 2.119193595717661e-05, "memory(GiB)": 165.76, "reward": 2.2101356983184814, "reward_std": 0.1531040370464325, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482285737991333, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41013556718826294, "rewards/GeoVisalEntityMatch2ORM/std": 0.2194642424583435, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 76, "train_speed(iter/s)": 0.032979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 274.65625, "completions/min_length": 140.0, "epoch": 0.009225976515696142, "grad_norm": 1.4490490478925608, "kl": 0.02229056227952242, "learning_rate": 9.166666666666665e-07, "loss": 2.282609602843877e-05, "memory(GiB)": 165.76, "reward": 2.3545243740081787, "reward_std": 0.2963886857032776, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.3553105890750885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.556607723236084, "rewards/GeoVisalEntityMatch2ORM/std": 0.1578005999326706, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 77, "train_speed(iter/s)": 0.033018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 414.0, "completions/mean_length": 290.6458435058594, "completions/min_length": 126.0, "epoch": 0.009345794392523364, "grad_norm": 1.3648016403088996, "kl": 0.024389872327446938, "learning_rate": 9.285714285714285e-07, "loss": 2.559026142989751e-05, "memory(GiB)": 165.76, "reward": 2.1191182136535645, "reward_std": 0.3831455409526825, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979167699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.36418014764785767, "rewards/GeoVisalEntityMatch2ORM/mean": 0.35245126485824585, "rewards/GeoVisalEntityMatch2ORM/std": 0.142270028591156, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 78, "train_speed(iter/s)": 0.03292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 390.0, "completions/mean_length": 304.8125, "completions/min_length": 161.0, "epoch": 0.009465612269350587, "grad_norm": 1.3073450075737025, "kl": 0.025748773477971554, "learning_rate": 9.404761904761904e-07, "loss": 2.617575228214264e-05, "memory(GiB)": 165.76, "reward": 2.2743372917175293, "reward_std": 0.3475509583950043, "rewards/GeoLocAccuracyV2ORM/mean": 0.9020833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.27529093623161316, "rewards/GeoVisalEntityMatch2ORM/mean": 0.39308714866638184, "rewards/GeoVisalEntityMatch2ORM/std": 0.21471957862377167, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 79, "train_speed(iter/s)": 0.032789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 310.09375, "completions/min_length": 204.0, "epoch": 0.00958543014617781, "grad_norm": 1.2523260109586456, "kl": 0.02499673143029213, "learning_rate": 9.523809523809522e-07, "loss": 2.549092096160166e-05, "memory(GiB)": 165.76, "reward": 2.2134838104248047, "reward_std": 0.24682655930519104, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.39363351464271545, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41348379850387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.1266457438468933, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 80, "train_speed(iter/s)": 0.03283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 293.8125, "completions/min_length": 122.0, "epoch": 0.009705248023005032, "grad_norm": 1.3318795754738748, "kl": 0.03009121585637331, "learning_rate": 9.642857142857142e-07, "loss": 3.183633089065552e-05, "memory(GiB)": 165.76, "reward": 2.4051051139831543, "reward_std": 0.21385729312896729, "rewards/GeoLocAccuracyV2ORM/mean": 0.9604166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.19165712594985962, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4551050066947937, "rewards/GeoVisalEntityMatch2ORM/std": 0.13101567327976227, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 81, "train_speed(iter/s)": 0.032873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/mean_length": 276.5208435058594, "completions/min_length": 141.0, "epoch": 0.009825065899832255, "grad_norm": 1.346491733272906, "kl": 0.035830337554216385, "learning_rate": 9.761904761904762e-07, "loss": 3.64979132427834e-05, "memory(GiB)": 165.76, "reward": 2.5635294914245605, "reward_std": 0.15113583207130432, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5635293126106262, "rewards/GeoVisalEntityMatch2ORM/std": 0.1623048186302185, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 82, "train_speed(iter/s)": 0.03292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 325.46875, "completions/min_length": 172.0, "epoch": 0.009944883776659478, "grad_norm": 1.2359098244849651, "kl": 0.03138995077461004, "learning_rate": 9.88095238095238e-07, "loss": 3.259132427047007e-05, "memory(GiB)": 165.76, "reward": 2.408475399017334, "reward_std": 0.27074137330055237, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916667699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5272253751754761, "rewards/GeoVisalEntityMatch2ORM/std": 0.12574785947799683, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 83, "train_speed(iter/s)": 0.032961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 337.91668701171875, "completions/min_length": 185.0, "epoch": 0.010064701653486701, "grad_norm": 1.2914457359308846, "kl": 0.03205036371946335, "learning_rate": 1e-06, "loss": 3.29663380398415e-05, "memory(GiB)": 165.76, "reward": 2.3788774013519287, "reward_std": 0.2352948635816574, "rewards/GeoLocAccuracyV2ORM/mean": 0.9000000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.26596397161483765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47887730598449707, "rewards/GeoVisalEntityMatch2ORM/std": 0.17069901525974274, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 84, "train_speed(iter/s)": 0.032987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 341.7395935058594, "completions/min_length": 248.0, "epoch": 0.010184519530313923, "grad_norm": 1.2792905516923276, "kl": 0.03369907848536968, "learning_rate": 9.999999638532406e-07, "loss": 3.482898318907246e-05, "memory(GiB)": 165.76, "reward": 2.114285945892334, "reward_std": 0.17083534598350525, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.36428576707839966, "rewards/GeoVisalEntityMatch2ORM/std": 0.14598087966442108, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 85, "train_speed(iter/s)": 0.033009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 340.66668701171875, "completions/min_length": 227.0, "epoch": 0.010304337407141146, "grad_norm": 1.2198074854102323, "kl": 0.03754807636141777, "learning_rate": 9.99999855412968e-07, "loss": 3.812213981291279e-05, "memory(GiB)": 165.76, "reward": 2.35550594329834, "reward_std": 0.2362847924232483, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.28115519881248474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46592265367507935, "rewards/GeoVisalEntityMatch2ORM/std": 0.15069766342639923, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 86, "train_speed(iter/s)": 0.033039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 352.875, "completions/min_length": 246.0, "epoch": 0.010424155283968369, "grad_norm": 1.2297075148289398, "kl": 0.038078175857663155, "learning_rate": 9.999996746791972e-07, "loss": 3.853688758681528e-05, "memory(GiB)": 165.76, "reward": 2.3645834922790527, "reward_std": 0.2571934461593628, "rewards/GeoLocAccuracyV2ORM/mean": 0.9354166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.23304806649684906, "rewards/GeoVisalEntityMatch2ORM/mean": 0.42916667461395264, "rewards/GeoVisalEntityMatch2ORM/std": 0.16071386635303497, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 87, "train_speed(iter/s)": 0.032964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 509.0, "completions/mean_length": 320.59375, "completions/min_length": 208.0, "epoch": 0.01054397316079559, "grad_norm": 1.2584400450600033, "kl": 0.037754252552986145, "learning_rate": 9.999994216519552e-07, "loss": 3.8245074392762035e-05, "memory(GiB)": 165.76, "reward": 2.233184576034546, "reward_std": 0.33720555901527405, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4180101156234741, "rewards/GeoVisalEntityMatch2ORM/mean": 0.633184552192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.2513973116874695, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 88, "train_speed(iter/s)": 0.03285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/mean_length": 330.66668701171875, "completions/min_length": 163.0, "epoch": 0.010663791037622813, "grad_norm": 1.1936941925050741, "kl": 0.043771892786026, "learning_rate": 9.999990963312778e-07, "loss": 4.564225673675537e-05, "memory(GiB)": 165.76, "reward": 2.4331846237182617, "reward_std": 0.17582005262374878, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46443456411361694, "rewards/GeoVisalEntityMatch2ORM/std": 0.11235196143388748, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 89, "train_speed(iter/s)": 0.032814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 343.35418701171875, "completions/min_length": 240.0, "epoch": 0.010783608914450037, "grad_norm": 1.2293452934872788, "kl": 0.04369460418820381, "learning_rate": 9.999986987172128e-07, "loss": 4.377092045615427e-05, "memory(GiB)": 165.76, "reward": 2.6394965648651123, "reward_std": 0.11947444081306458, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6499132513999939, "rewards/GeoVisalEntityMatch2ORM/std": 0.13404139876365662, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 90, "train_speed(iter/s)": 0.032705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 434.0, "completions/mean_length": 332.8958435058594, "completions/min_length": 216.0, "epoch": 0.010903426791277258, "grad_norm": 1.2743937229680251, "kl": 0.04828941449522972, "learning_rate": 9.99998228809817e-07, "loss": 4.921605432173237e-05, "memory(GiB)": 165.76, "reward": 2.3148436546325684, "reward_std": 0.2537420392036438, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2404673844575882, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4231770932674408, "rewards/GeoVisalEntityMatch2ORM/std": 0.19882036745548248, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 91, "train_speed(iter/s)": 0.032638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/mean_length": 353.6770935058594, "completions/min_length": 238.0, "epoch": 0.011023244668104481, "grad_norm": 1.2532643419925418, "kl": 0.04518351890146732, "learning_rate": 9.999976866091588e-07, "loss": 4.555781924864277e-05, "memory(GiB)": 165.76, "reward": 2.348206043243408, "reward_std": 0.19102522730827332, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250827372074127, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3752893805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.19385042786598206, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 92, "train_speed(iter/s)": 0.032708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 390.94793701171875, "completions/min_length": 262.0, "epoch": 0.011143062544931704, "grad_norm": 1.1502996321280765, "kl": 0.04601626843214035, "learning_rate": 9.999970721153163e-07, "loss": 4.663815343519673e-05, "memory(GiB)": 165.76, "reward": 2.2246031761169434, "reward_std": 0.1978263556957245, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204163908958435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3829365372657776, "rewards/GeoVisalEntityMatch2ORM/std": 0.1490044742822647, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 93, "train_speed(iter/s)": 0.032748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 451.0, "completions/mean_length": 361.0, "completions/min_length": 257.0, "epoch": 0.011262880421758926, "grad_norm": 1.1724889570480348, "kl": 0.049310872331261635, "learning_rate": 9.999963853283784e-07, "loss": 5.0352267862763256e-05, "memory(GiB)": 165.76, "reward": 2.2471065521240234, "reward_std": 0.2556188702583313, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824830532073975, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48668980598449707, "rewards/GeoVisalEntityMatch2ORM/std": 0.1356932669878006, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 94, "train_speed(iter/s)": 0.032679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 552.0, "completions/mean_length": 372.35418701171875, "completions/min_length": 222.0, "epoch": 0.011382698298586149, "grad_norm": 1.1852228522316215, "kl": 0.05587015114724636, "learning_rate": 9.999956262484445e-07, "loss": 5.568563938140869e-05, "memory(GiB)": 165.76, "reward": 2.1084201335906982, "reward_std": 0.4318831264972687, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3746342957019806, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44175347685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.21843574941158295, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.3746342957019806, "step": 95, "train_speed(iter/s)": 0.032577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/mean_length": 374.72918701171875, "completions/min_length": 259.0, "epoch": 0.011502516175413372, "grad_norm": 1.2027853761951937, "kl": 0.05721244961023331, "learning_rate": 9.999947948756245e-07, "loss": 5.783637607237324e-05, "memory(GiB)": 165.76, "reward": 2.2950398921966553, "reward_std": 0.184457927942276, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3252461850643158, "rewards/GeoVisalEntityMatch2ORM/mean": 0.45545634627342224, "rewards/GeoVisalEntityMatch2ORM/std": 0.17167481780052185, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 96, "train_speed(iter/s)": 0.032597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/mean_length": 401.97918701171875, "completions/min_length": 293.0, "epoch": 0.011622334052240594, "grad_norm": 1.0725168004315957, "kl": 0.050963269546628, "learning_rate": 9.999938912100383e-07, "loss": 5.225340646575205e-05, "memory(GiB)": 165.76, "reward": 2.3614988327026367, "reward_std": 0.08890531957149506, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3614989221096039, "rewards/GeoVisalEntityMatch2ORM/std": 0.1423180103302002, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 97, "train_speed(iter/s)": 0.032629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 552.0, "completions/mean_length": 409.66668701171875, "completions/min_length": 274.0, "epoch": 0.011742151929067817, "grad_norm": 1.0234912219114678, "kl": 0.05104006826877594, "learning_rate": 9.999929152518167e-07, "loss": 5.122522634337656e-05, "memory(GiB)": 165.76, "reward": 2.465538263320923, "reward_std": 0.3546692430973053, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5384548902511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.2327793389558792, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 98, "train_speed(iter/s)": 0.032658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 365.25, "completions/min_length": 264.0, "epoch": 0.01186196980589504, "grad_norm": 1.176892927624131, "kl": 0.05701450631022453, "learning_rate": 9.99991867001101e-07, "loss": 5.710124969482422e-05, "memory(GiB)": 165.76, "reward": 2.121969699859619, "reward_std": 0.21923908591270447, "rewards/GeoLocAccuracyV2ORM/mean": 0.6833333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3932768404483795, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4386363923549652, "rewards/GeoVisalEntityMatch2ORM/std": 0.1436581313610077, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 99, "train_speed(iter/s)": 0.032663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 386.4895935058594, "completions/min_length": 249.0, "epoch": 0.011981787682722261, "grad_norm": 1.025586324412284, "kl": 0.06090938672423363, "learning_rate": 9.999907464580422e-07, "loss": 6.20459541096352e-05, "memory(GiB)": 165.76, "reward": 2.4486191272735596, "reward_std": 0.14508545398712158, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4569523334503174, "rewards/GeoVisalEntityMatch2ORM/std": 0.2199670523405075, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 100, "train_speed(iter/s)": 0.032693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/mean_length": 383.84375, "completions/min_length": 252.0, "epoch": 0.012101605559549485, "grad_norm": 1.081661900661042, "kl": 0.06034421548247337, "learning_rate": 9.999895536228029e-07, "loss": 6.151696288725361e-05, "memory(GiB)": 165.76, "reward": 2.5211639404296875, "reward_std": 0.15131539106369019, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5294973850250244, "rewards/GeoVisalEntityMatch2ORM/std": 0.188433438539505, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 101, "train_speed(iter/s)": 0.032721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 514.0, "completions/mean_length": 413.54168701171875, "completions/min_length": 282.0, "epoch": 0.012221423436376708, "grad_norm": 1.0441191341076577, "kl": 0.06011896952986717, "learning_rate": 9.999882884955552e-07, "loss": 6.11313953413628e-05, "memory(GiB)": 165.76, "reward": 2.200260639190674, "reward_std": 0.34147682785987854, "rewards/GeoLocAccuracyV2ORM/mean": 0.71875, "rewards/GeoLocAccuracyV2ORM/std": 0.45196935534477234, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5127604007720947, "rewards/GeoVisalEntityMatch2ORM/std": 0.25635412335395813, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 102, "train_speed(iter/s)": 0.032735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 542.0, "completions/mean_length": 399.65625, "completions/min_length": 258.0, "epoch": 0.01234124131320393, "grad_norm": 1.0840784341928476, "kl": 0.06428797543048859, "learning_rate": 9.999869510764822e-07, "loss": 6.504108750959858e-05, "memory(GiB)": 165.76, "reward": 2.1324996948242188, "reward_std": 0.32688602805137634, "rewards/GeoLocAccuracyV2ORM/mean": 0.7041666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.39361125230789185, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43874964118003845, "rewards/GeoVisalEntityMatch2ORM/std": 0.21109271049499512, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 103, "train_speed(iter/s)": 0.032764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 506.0, "completions/mean_length": 386.71875, "completions/min_length": 289.0, "epoch": 0.012461059190031152, "grad_norm": 3.2463527767602547, "kl": 1.6185881160199642, "learning_rate": 9.999855413657773e-07, "loss": 0.0016188746085390449, "memory(GiB)": 165.76, "reward": 2.1195311546325684, "reward_std": 0.5425702929496765, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.42250296473503113, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5257812738418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.15858972072601318, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 104, "train_speed(iter/s)": 0.032665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 506.0, "completions/mean_length": 398.84375, "completions/min_length": 272.0, "epoch": 0.012580877066858375, "grad_norm": 1.1180263353940083, "kl": 0.0719018392264843, "learning_rate": 9.999840593636444e-07, "loss": 7.288157939910889e-05, "memory(GiB)": 165.76, "reward": 1.6981315612792969, "reward_std": 0.4858349561691284, "rewards/GeoLocAccuracyV2ORM/mean": 0.5625, "rewards/GeoLocAccuracyV2ORM/std": 0.4986824691295624, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5731316208839417, "rewards/GeoVisalEntityMatch2ORM/std": 0.22828951478004456, "rewards/MathFormat/mean": 0.5625, "rewards/MathFormat/std": 0.4986824691295624, "step": 105, "train_speed(iter/s)": 0.032596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4270833333333333, "completions/max_length": 506.0, "completions/mean_length": 432.28125, "completions/min_length": 310.0, "epoch": 0.012700694943685599, "grad_norm": 1.158424443996119, "kl": 0.089426189661026, "learning_rate": 9.999825050702975e-07, "loss": 9.016941476147622e-05, "memory(GiB)": 165.76, "reward": 1.5935434103012085, "reward_std": 0.6370090246200562, "rewards/GeoLocAccuracyV2ORM/mean": 0.5166666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4972909390926361, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4935433268547058, "rewards/GeoVisalEntityMatch2ORM/std": 0.2382013350725174, "rewards/MathFormat/mean": 0.5833333730697632, "rewards/MathFormat/std": 0.4955946207046509, "step": 106, "train_speed(iter/s)": 0.032507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 552.0, "completions/mean_length": 448.4375, "completions/min_length": 298.0, "epoch": 0.01282051282051282, "grad_norm": 1.0408069624439305, "kl": 0.06851721554994583, "learning_rate": 9.999808784859613e-07, "loss": 6.918981671333313e-05, "memory(GiB)": 165.76, "reward": 2.2136995792388916, "reward_std": 0.34870052337646484, "rewards/GeoLocAccuracyV2ORM/mean": 0.78125, "rewards/GeoLocAccuracyV2ORM/std": 0.383971631526947, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4949495196342468, "rewards/GeoVisalEntityMatch2ORM/std": 0.17229300737380981, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 107, "train_speed(iter/s)": 0.032532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 507.0, "completions/mean_length": 411.29168701171875, "completions/min_length": 283.0, "epoch": 0.012940330697340043, "grad_norm": 1.0975471045935719, "kl": 0.13978362083435059, "learning_rate": 9.999791796108714e-07, "loss": 0.00013963630772195756, "memory(GiB)": 165.76, "reward": 2.2163195610046387, "reward_std": 0.2007865607738495, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5392361879348755, "rewards/GeoVisalEntityMatch2ORM/std": 0.3289692997932434, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 108, "train_speed(iter/s)": 0.032558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 514.0, "completions/mean_length": 435.19793701171875, "completions/min_length": 314.0, "epoch": 0.013060148574167266, "grad_norm": 1.0802241272627657, "kl": 0.07337841764092445, "learning_rate": 9.999774084452733e-07, "loss": 7.337331771850586e-05, "memory(GiB)": 165.76, "reward": 2.404017925262451, "reward_std": 0.275050550699234, "rewards/GeoLocAccuracyV2ORM/mean": 0.8229166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.37260934710502625, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6748511791229248, "rewards/GeoVisalEntityMatch2ORM/std": 0.24548794329166412, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 109, "train_speed(iter/s)": 0.032578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 552.0, "completions/mean_length": 437.3958435058594, "completions/min_length": 283.0, "epoch": 0.013179966450994488, "grad_norm": 1.0826468346258835, "kl": 0.07065572589635849, "learning_rate": 9.999755649894226e-07, "loss": 7.132689643185586e-05, "memory(GiB)": 165.76, "reward": 2.550520896911621, "reward_std": 0.42091482877731323, "rewards/GeoLocAccuracyV2ORM/mean": 0.9020833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2901829183101654, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7213541865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.24025696516036987, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 110, "train_speed(iter/s)": 0.032599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 552.0, "completions/mean_length": 453.125, "completions/min_length": 320.0, "epoch": 0.013299784327821711, "grad_norm": 1.0669812221698336, "kl": 0.07102721929550171, "learning_rate": 9.999736492435865e-07, "loss": 7.17143266228959e-05, "memory(GiB)": 165.76, "reward": 2.388227701187134, "reward_std": 0.24003201723098755, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336147725582123, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46114420890808105, "rewards/GeoVisalEntityMatch2ORM/std": 0.1597190499305725, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 111, "train_speed(iter/s)": 0.032651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2604166666666667, "completions/max_length": 529.0, "completions/mean_length": 440.3958435058594, "completions/min_length": 338.0, "epoch": 0.013419602204648934, "grad_norm": 1.182597165978976, "kl": 0.07127491384744644, "learning_rate": 9.999716612080416e-07, "loss": 7.069607818266377e-05, "memory(GiB)": 165.76, "reward": 2.0412700176239014, "reward_std": 0.8283802270889282, "rewards/GeoLocAccuracyV2ORM/mean": 0.7208333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4490731358528137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5808532238006592, "rewards/GeoVisalEntityMatch2ORM/std": 0.2724136710166931, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657452583313, "step": 112, "train_speed(iter/s)": 0.032587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 400.0520935058594, "completions/min_length": 308.0, "epoch": 0.013539420081476156, "grad_norm": 1.0832226893463945, "kl": 0.07767760381102562, "learning_rate": 9.999696008830757e-07, "loss": 7.832050323486328e-05, "memory(GiB)": 165.76, "reward": 2.461458444595337, "reward_std": 0.2124001681804657, "rewards/GeoLocAccuracyV2ORM/mean": 0.9708333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.16410309076309204, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5114583373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.09656566381454468, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 113, "train_speed(iter/s)": 0.032602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 552.0, "completions/mean_length": 450.38543701171875, "completions/min_length": 346.0, "epoch": 0.013659237958303379, "grad_norm": 1.1680323004094035, "kl": 0.07172419130802155, "learning_rate": 9.99967468268986e-07, "loss": 7.153551268856972e-05, "memory(GiB)": 165.76, "reward": 1.7925223112106323, "reward_std": 0.6849711537361145, "rewards/GeoLocAccuracyV2ORM/mean": 0.5062500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4714619219303131, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4112723469734192, "rewards/GeoVisalEntityMatch2ORM/std": 0.15095588564872742, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245497941970825, "step": 114, "train_speed(iter/s)": 0.032619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 531.0, "completions/mean_length": 420.60418701171875, "completions/min_length": 326.0, "epoch": 0.013779055835130602, "grad_norm": 1.1010640746287068, "kl": 0.08103320002555847, "learning_rate": 9.999652633660818e-07, "loss": 8.117407560348511e-05, "memory(GiB)": 165.76, "reward": 1.8184027671813965, "reward_std": 0.3356388211250305, "rewards/GeoLocAccuracyV2ORM/mean": 0.6375000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.445149302482605, "rewards/GeoVisalEntityMatch2ORM/mean": 0.35798609256744385, "rewards/GeoVisalEntityMatch2ORM/std": 0.12516607344150543, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837430775165558, "step": 115, "train_speed(iter/s)": 0.032567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 400.8645935058594, "completions/min_length": 278.0, "epoch": 0.013898873711957823, "grad_norm": 1.0913959312628705, "kl": 0.06828466802835464, "learning_rate": 9.99962986174681e-07, "loss": 6.932144606253132e-05, "memory(GiB)": 165.76, "reward": 2.404919147491455, "reward_std": 0.26010334491729736, "rewards/GeoLocAccuracyV2ORM/mean": 0.8812500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.2895777225494385, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5340856909751892, "rewards/GeoVisalEntityMatch2ORM/std": 0.1836915910243988, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 116, "train_speed(iter/s)": 0.032617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 510.0, "completions/mean_length": 399.82293701171875, "completions/min_length": 304.0, "epoch": 0.014018691588785047, "grad_norm": 1.13707339082728, "kl": 0.06928468495607376, "learning_rate": 9.999606366951135e-07, "loss": 6.960829341551289e-05, "memory(GiB)": 165.76, "reward": 2.1084201335906982, "reward_std": 0.34143275022506714, "rewards/GeoLocAccuracyV2ORM/mean": 0.84375, "rewards/GeoLocAccuracyV2ORM/std": 0.3649982213973999, "rewards/GeoVisalEntityMatch2ORM/mean": 0.42092013359069824, "rewards/GeoVisalEntityMatch2ORM/std": 0.23903435468673706, "rewards/MathFormat/mean": 0.84375, "rewards/MathFormat/std": 0.3649982213973999, "step": 117, "train_speed(iter/s)": 0.032541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/mean_length": 380.72918701171875, "completions/min_length": 272.0, "epoch": 0.01413850946561227, "grad_norm": 1.093130087660283, "kl": 0.07509016245603561, "learning_rate": 9.999582149277185e-07, "loss": 7.521237421315163e-05, "memory(GiB)": 165.76, "reward": 2.4668736457824707, "reward_std": 0.22478285431861877, "rewards/GeoLocAccuracyV2ORM/mean": 0.9375, "rewards/GeoLocAccuracyV2ORM/std": 0.22536519169807434, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5293734073638916, "rewards/GeoVisalEntityMatch2ORM/std": 0.16561079025268555, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 118, "train_speed(iter/s)": 0.032557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 552.0, "completions/mean_length": 418.84375, "completions/min_length": 327.0, "epoch": 0.014258327342439491, "grad_norm": 1.0772399460073532, "kl": 0.06080085225403309, "learning_rate": 9.999557208728465e-07, "loss": 6.184975791256875e-05, "memory(GiB)": 165.76, "reward": 2.3396825790405273, "reward_std": 0.2701668441295624, "rewards/GeoLocAccuracyV2ORM/mean": 0.9708333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.16410307586193085, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3896825909614563, "rewards/GeoVisalEntityMatch2ORM/std": 0.16770485043525696, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 119, "train_speed(iter/s)": 0.032576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 539.0, "completions/mean_length": 391.35418701171875, "completions/min_length": 261.0, "epoch": 0.014378145219266714, "grad_norm": 1.1473993982358164, "kl": 0.10456019267439842, "learning_rate": 9.999531545308585e-07, "loss": 0.00010179728269577026, "memory(GiB)": 165.76, "reward": 2.28967022895813, "reward_std": 0.43603408336639404, "rewards/GeoLocAccuracyV2ORM/mean": 0.8375000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.3545939028263092, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48342013359069824, "rewards/GeoVisalEntityMatch2ORM/std": 0.19732308387756348, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 120, "train_speed(iter/s)": 0.032529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 542.0, "completions/mean_length": 370.8333435058594, "completions/min_length": 279.0, "epoch": 0.014497963096093937, "grad_norm": 1.1412862660222505, "kl": 0.06943594291806221, "learning_rate": 9.999505159021245e-07, "loss": 7.094691682141274e-05, "memory(GiB)": 165.76, "reward": 2.573516368865967, "reward_std": 0.15316496789455414, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5839331150054932, "rewards/GeoVisalEntityMatch2ORM/std": 0.24940550327301025, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 121, "train_speed(iter/s)": 0.032539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/mean_length": 378.91668701171875, "completions/min_length": 254.0, "epoch": 0.01461778097292116, "grad_norm": 1.2277284101230963, "kl": 0.07255682349205017, "learning_rate": 9.999478049870269e-07, "loss": 7.317464041989297e-05, "memory(GiB)": 165.76, "reward": 2.428443431854248, "reward_std": 0.3117160201072693, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5617766380310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.16259486973285675, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 122, "train_speed(iter/s)": 0.032569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 375.3645935058594, "completions/min_length": 252.0, "epoch": 0.014737598849748382, "grad_norm": 1.1004434200146276, "kl": 0.06435265019536018, "learning_rate": 9.99945021785957e-07, "loss": 6.492435932159424e-05, "memory(GiB)": 165.76, "reward": 2.137847423553467, "reward_std": 0.2629048228263855, "rewards/GeoLocAccuracyV2ORM/mean": 0.6916667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.44192561507225037, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4565972685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.1898798644542694, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 123, "train_speed(iter/s)": 0.032596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 359.69793701171875, "completions/min_length": 224.0, "epoch": 0.014857416726575605, "grad_norm": 1.1197893649249042, "kl": 0.07005186751484871, "learning_rate": 9.99942166299318e-07, "loss": 7.187326991697773e-05, "memory(GiB)": 165.76, "reward": 2.6693577766418457, "reward_std": 0.11204437911510468, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6776909828186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.17559629678726196, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 124, "train_speed(iter/s)": 0.03261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 369.51043701171875, "completions/min_length": 273.0, "epoch": 0.014977234603402828, "grad_norm": 1.1240155401557972, "kl": 0.07112998142838478, "learning_rate": 9.999392385275221e-07, "loss": 7.270028436323628e-05, "memory(GiB)": 165.76, "reward": 2.513241767883301, "reward_std": 0.14717331528663635, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.534075140953064, "rewards/GeoVisalEntityMatch2ORM/std": 0.12157278507947922, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 125, "train_speed(iter/s)": 0.032639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 380.54168701171875, "completions/min_length": 281.0, "epoch": 0.01509705248023005, "grad_norm": 1.0539085898620795, "kl": 0.07997442781925201, "learning_rate": 9.99936238470993e-07, "loss": 8.063018321990967e-05, "memory(GiB)": 165.76, "reward": 2.3934895992279053, "reward_std": 0.221689373254776, "rewards/GeoLocAccuracyV2ORM/mean": 0.9312499761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.23001714050769806, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47265625, "rewards/GeoVisalEntityMatch2ORM/std": 0.2142762690782547, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 126, "train_speed(iter/s)": 0.032657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 365.0833435058594, "completions/min_length": 251.0, "epoch": 0.015216870357057273, "grad_norm": 1.152781527926737, "kl": 0.06826126575469971, "learning_rate": 9.999331661301642e-07, "loss": 6.941954779904336e-05, "memory(GiB)": 165.76, "reward": 2.4854745864868164, "reward_std": 0.11757856607437134, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.485474556684494, "rewards/GeoVisalEntityMatch2ORM/std": 0.27626854181289673, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 127, "train_speed(iter/s)": 0.032685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 371.90625, "completions/min_length": 286.0, "epoch": 0.015336688233884496, "grad_norm": 1.2412006882437485, "kl": 0.07808782160282135, "learning_rate": 9.9993002150548e-07, "loss": 7.837762677809224e-05, "memory(GiB)": 165.76, "reward": 2.563657522201538, "reward_std": 0.12122585624456406, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5636574029922485, "rewards/GeoVisalEntityMatch2ORM/std": 0.15165948867797852, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 128, "train_speed(iter/s)": 0.032716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 362.5, "completions/min_length": 270.0, "epoch": 0.015456506110711718, "grad_norm": 1.1206895531410468, "kl": 0.085781529545784, "learning_rate": 9.999268045973952e-07, "loss": 8.611878001829609e-05, "memory(GiB)": 165.76, "reward": 2.2280051708221436, "reward_std": 0.09822505712509155, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47800514101982117, "rewards/GeoVisalEntityMatch2ORM/std": 0.19000451266765594, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 129, "train_speed(iter/s)": 0.03274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 373.54168701171875, "completions/min_length": 280.0, "epoch": 0.01557632398753894, "grad_norm": 1.1599054814467422, "kl": 0.0727030485868454, "learning_rate": 9.99923515406375e-07, "loss": 7.417052984237671e-05, "memory(GiB)": 165.76, "reward": 2.4124934673309326, "reward_std": 0.17503473162651062, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4333266615867615, "rewards/GeoVisalEntityMatch2ORM/std": 0.17249774932861328, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 130, "train_speed(iter/s)": 0.032762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 552.0, "completions/mean_length": 409.38543701171875, "completions/min_length": 326.0, "epoch": 0.015696141864366162, "grad_norm": 1.0804902962256624, "kl": 0.0735611841082573, "learning_rate": 9.999201539328944e-07, "loss": 7.45604484109208e-05, "memory(GiB)": 165.76, "reward": 2.270089626312256, "reward_std": 0.2876441478729248, "rewards/GeoLocAccuracyV2ORM/mean": 0.78125, "rewards/GeoLocAccuracyV2ORM/std": 0.3728446960449219, "rewards/GeoVisalEntityMatch2ORM/mean": 0.520089328289032, "rewards/GeoVisalEntityMatch2ORM/std": 0.22537574172019958, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 131, "train_speed(iter/s)": 0.032783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/mean_length": 350.9583435058594, "completions/min_length": 249.0, "epoch": 0.015815959741193385, "grad_norm": 1.2119780781542335, "kl": 0.07966183125972748, "learning_rate": 9.999167201774403e-07, "loss": 8.150687062880024e-05, "memory(GiB)": 165.76, "reward": 2.477083206176758, "reward_std": 0.08701664209365845, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47708332538604736, "rewards/GeoVisalEntityMatch2ORM/std": 0.21777383983135223, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 132, "train_speed(iter/s)": 0.032814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/mean_length": 365.8125, "completions/min_length": 256.0, "epoch": 0.01593577761802061, "grad_norm": 1.2034377989011642, "kl": 0.08124516159296036, "learning_rate": 9.999132141405084e-07, "loss": 8.183717727661133e-05, "memory(GiB)": 165.76, "reward": 2.4488468170166016, "reward_std": 0.11711788922548294, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4488467574119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.1378740817308426, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 133, "train_speed(iter/s)": 0.032841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/mean_length": 376.0625, "completions/min_length": 297.0, "epoch": 0.016055595494847832, "grad_norm": 1.169769776868012, "kl": 0.08274271711707115, "learning_rate": 9.999096358226063e-07, "loss": 8.331611752510071e-05, "memory(GiB)": 165.76, "reward": 2.580894947052002, "reward_std": 0.11853963136672974, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.580894947052002, "rewards/GeoVisalEntityMatch2ORM/std": 0.12373041361570358, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 134, "train_speed(iter/s)": 0.032843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 422.90625, "completions/min_length": 290.0, "epoch": 0.016175413371675055, "grad_norm": 1.0523177162560258, "kl": 0.08067120984196663, "learning_rate": 9.999059852242507e-07, "loss": 8.126472675940022e-05, "memory(GiB)": 165.76, "reward": 2.2740700244903564, "reward_std": 0.2825610041618347, "rewards/GeoLocAccuracyV2ORM/mean": 0.6625000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4024268686771393, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6219866275787354, "rewards/GeoVisalEntityMatch2ORM/std": 0.2365548461675644, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 135, "train_speed(iter/s)": 0.032895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/mean_length": 364.26043701171875, "completions/min_length": 286.0, "epoch": 0.016295231248502278, "grad_norm": 1.179085773757121, "kl": 0.08000852912664413, "learning_rate": 9.999022623459699e-07, "loss": 8.098284888546914e-05, "memory(GiB)": 165.76, "reward": 2.048003673553467, "reward_std": 0.27540189027786255, "rewards/GeoLocAccuracyV2ORM/mean": 0.5875000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.49145326018333435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4605035185813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.2191665768623352, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 136, "train_speed(iter/s)": 0.032917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 410.91668701171875, "completions/min_length": 306.0, "epoch": 0.016415049125329498, "grad_norm": 1.0074647302243749, "kl": 0.07510080561041832, "learning_rate": 9.99898467188302e-07, "loss": 7.588168227812275e-05, "memory(GiB)": 165.76, "reward": 2.338653326034546, "reward_std": 0.25748127698898315, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3524289131164551, "rewards/GeoVisalEntityMatch2ORM/mean": 0.551153302192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.18084514141082764, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 137, "train_speed(iter/s)": 0.032937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 483.0, "completions/mean_length": 382.0, "completions/min_length": 280.0, "epoch": 0.01653486700215672, "grad_norm": 1.1368123498411986, "kl": 0.08804232627153397, "learning_rate": 9.998945997517955e-07, "loss": 8.90493392944336e-05, "memory(GiB)": 165.76, "reward": 2.4504342079162598, "reward_std": 0.283162921667099, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.21521101891994476, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5316840410232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.24740943312644958, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 138, "train_speed(iter/s)": 0.032943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/mean_length": 414.28125, "completions/min_length": 320.0, "epoch": 0.016654684878983944, "grad_norm": 1.061514140598551, "kl": 0.07792757079005241, "learning_rate": 9.998906600370102e-07, "loss": 7.9326331615448e-05, "memory(GiB)": 165.76, "reward": 2.2921626567840576, "reward_std": 0.12144751846790314, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.2921627163887024, "rewards/GeoVisalEntityMatch2ORM/std": 0.21421706676483154, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 139, "train_speed(iter/s)": 0.032962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 390.3020935058594, "completions/min_length": 299.0, "epoch": 0.016774502755811167, "grad_norm": 1.108830045519758, "kl": 0.08302151784300804, "learning_rate": 9.998866480445152e-07, "loss": 8.409470319747925e-05, "memory(GiB)": 165.76, "reward": 2.2134921550750732, "reward_std": 0.16391852498054504, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.44073304533958435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4801587462425232, "rewards/GeoVisalEntityMatch2ORM/std": 0.2590900659561157, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 140, "train_speed(iter/s)": 0.032982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 414.90625, "completions/min_length": 306.0, "epoch": 0.01689432063263839, "grad_norm": 1.2155931953466599, "kl": 0.0847674235701561, "learning_rate": 9.998825637748908e-07, "loss": 8.500119292875752e-05, "memory(GiB)": 165.76, "reward": 2.3595657348632812, "reward_std": 0.19411982595920563, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662347316742, "rewards/GeoVisalEntityMatch2ORM/mean": 0.405398964881897, "rewards/GeoVisalEntityMatch2ORM/std": 0.2156507968902588, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 141, "train_speed(iter/s)": 0.033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 375.8125, "completions/min_length": 284.0, "epoch": 0.017014138509465614, "grad_norm": 1.1108148980387158, "kl": 0.09027177840471268, "learning_rate": 9.998784072287276e-07, "loss": 9.14980992092751e-05, "memory(GiB)": 165.76, "reward": 2.4978010654449463, "reward_std": 0.22305533289909363, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250827372074127, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5353009700775146, "rewards/GeoVisalEntityMatch2ORM/std": 0.16657662391662598, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 142, "train_speed(iter/s)": 0.03302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 389.75, "completions/min_length": 254.0, "epoch": 0.017133956386292833, "grad_norm": 1.0794839767991598, "kl": 0.08250810578465462, "learning_rate": 9.998741784066263e-07, "loss": 8.290757978102192e-05, "memory(GiB)": 165.76, "reward": 2.3659725189208984, "reward_std": 0.15792688727378845, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.28383341431617737, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4826389253139496, "rewards/GeoVisalEntityMatch2ORM/std": 0.17728747427463531, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 143, "train_speed(iter/s)": 0.033071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/mean_length": 398.69793701171875, "completions/min_length": 290.0, "epoch": 0.017253774263120056, "grad_norm": 1.0870797564047618, "kl": 0.08797532320022583, "learning_rate": 9.998698773091986e-07, "loss": 8.815030741970986e-05, "memory(GiB)": 165.76, "reward": 2.196110725402832, "reward_std": 0.13989609479904175, "rewards/GeoLocAccuracyV2ORM/mean": 0.8145833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.3424306809902191, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3815273642539978, "rewards/GeoVisalEntityMatch2ORM/std": 0.14833882451057434, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 144, "train_speed(iter/s)": 0.033091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 389.3125, "completions/min_length": 302.0, "epoch": 0.01737359213994728, "grad_norm": 1.073602376300606, "kl": 0.2181444726884365, "learning_rate": 9.998655039370665e-07, "loss": 0.00018736100173555315, "memory(GiB)": 165.76, "reward": 2.445486068725586, "reward_std": 0.19486483931541443, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47465282678604126, "rewards/GeoVisalEntityMatch2ORM/std": 0.12244082242250443, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 145, "train_speed(iter/s)": 0.033111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 397.9270935058594, "completions/min_length": 293.0, "epoch": 0.017493410016774503, "grad_norm": 1.1498141666837944, "kl": 0.09391996636986732, "learning_rate": 9.998610582908617e-07, "loss": 9.483595931669697e-05, "memory(GiB)": 165.76, "reward": 2.386375665664673, "reward_std": 0.2541843056678772, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2783094346523285, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5113756656646729, "rewards/GeoVisalEntityMatch2ORM/std": 0.24021807312965393, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 146, "train_speed(iter/s)": 0.033126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 496.0, "completions/mean_length": 391.32293701171875, "completions/min_length": 314.0, "epoch": 0.017613227893601726, "grad_norm": 1.1011544873172576, "kl": 0.1432654671370983, "learning_rate": 9.998565403712277e-07, "loss": 0.0001407414674758911, "memory(GiB)": 165.76, "reward": 2.500868320465088, "reward_std": 0.2858317494392395, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336149215698242, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6050347685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.13931356370449066, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 147, "train_speed(iter/s)": 0.033083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 372.4270935058594, "completions/min_length": 291.0, "epoch": 0.01773304577042895, "grad_norm": 1.1539754011891226, "kl": 0.09875631332397461, "learning_rate": 9.998519501788173e-07, "loss": 9.892880916595459e-05, "memory(GiB)": 165.76, "reward": 2.501851797103882, "reward_std": 0.11233986914157867, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5018519163131714, "rewards/GeoVisalEntityMatch2ORM/std": 0.17942705750465393, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 148, "train_speed(iter/s)": 0.033098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 552.0, "completions/mean_length": 405.2395935058594, "completions/min_length": 318.0, "epoch": 0.017852863647256172, "grad_norm": 1.1512564022000362, "kl": 0.27744951099157333, "learning_rate": 9.998472877142945e-07, "loss": 0.00027027976466342807, "memory(GiB)": 165.76, "reward": 2.0026516914367676, "reward_std": 0.4280818700790405, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.4502192437648773, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5234848856925964, "rewards/GeoVisalEntityMatch2ORM/std": 0.18071061372756958, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 149, "train_speed(iter/s)": 0.03303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 400.72918701171875, "completions/min_length": 297.0, "epoch": 0.017972681524083392, "grad_norm": 1.083326221844633, "kl": 0.1044602207839489, "learning_rate": 9.99842552978333e-07, "loss": 0.00010597705841064453, "memory(GiB)": 165.76, "reward": 2.524676561355591, "reward_std": 0.18094095587730408, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662347316742, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5600931644439697, "rewards/GeoVisalEntityMatch2ORM/std": 0.15000806748867035, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 150, "train_speed(iter/s)": 0.033047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 360.29168701171875, "completions/min_length": 281.0, "epoch": 0.018092499400910615, "grad_norm": 1.183454629686085, "kl": 0.10763290151953697, "learning_rate": 9.998377459716177e-07, "loss": 0.00010936459148069844, "memory(GiB)": 165.76, "reward": 2.5677084922790527, "reward_std": 0.14937886595726013, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5843750238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.17409642040729523, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 151, "train_speed(iter/s)": 0.032992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 488.0, "completions/mean_length": 394.79168701171875, "completions/min_length": 292.0, "epoch": 0.01821231727773784, "grad_norm": 1.117727463062857, "kl": 0.10348107293248177, "learning_rate": 9.998328666948437e-07, "loss": 0.00010561446833889931, "memory(GiB)": 165.76, "reward": 2.560515880584717, "reward_std": 0.16928860545158386, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.58134925365448, "rewards/GeoVisalEntityMatch2ORM/std": 0.1775483340024948, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 152, "train_speed(iter/s)": 0.032996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 524.0, "completions/mean_length": 412.9895935058594, "completions/min_length": 301.0, "epoch": 0.01833213515456506, "grad_norm": 1.1489418483388427, "kl": 0.10869323089718819, "learning_rate": 9.998279151487162e-07, "loss": 0.00011055668437620625, "memory(GiB)": 165.76, "reward": 2.465712070465088, "reward_std": 0.261497437953949, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.21521103382110596, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5261284708976746, "rewards/GeoVisalEntityMatch2ORM/std": 0.36037591099739075, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 153, "train_speed(iter/s)": 0.03301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 523.0, "completions/mean_length": 403.01043701171875, "completions/min_length": 270.0, "epoch": 0.018451953031392285, "grad_norm": 1.1015018723558587, "kl": 0.1060301661491394, "learning_rate": 9.998228913339513e-07, "loss": 0.00010574857878964394, "memory(GiB)": 165.76, "reward": 2.348784923553467, "reward_std": 0.302168071269989, "rewards/GeoLocAccuracyV2ORM/mean": 0.9229167699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.24169538915157318, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43628472089767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.17339621484279633, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 154, "train_speed(iter/s)": 0.032966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 506.0, "completions/mean_length": 401.32293701171875, "completions/min_length": 312.0, "epoch": 0.018571770908219508, "grad_norm": 1.1523341368188995, "kl": 0.12196343019604683, "learning_rate": 9.998177952512755e-07, "loss": 0.00012237331247888505, "memory(GiB)": 165.76, "reward": 2.167645215988159, "reward_std": 0.4976774752140045, "rewards/GeoLocAccuracyV2ORM/mean": 0.8145833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.38851383328437805, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5301452875137329, "rewards/GeoVisalEntityMatch2ORM/std": 0.3901083171367645, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 155, "train_speed(iter/s)": 0.032901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 394.3020935058594, "completions/min_length": 288.0, "epoch": 0.018691588785046728, "grad_norm": 1.1627468713528566, "kl": 0.19329619407653809, "learning_rate": 9.998126269014254e-07, "loss": 0.00018137507140636444, "memory(GiB)": 165.76, "reward": 2.3651621341705322, "reward_std": 0.28701281547546387, "rewards/GeoLocAccuracyV2ORM/mean": 0.8125, "rewards/GeoLocAccuracyV2ORM/std": 0.34678977727890015, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5734953880310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.2264215499162674, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 156, "train_speed(iter/s)": 0.032909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 552.0, "completions/mean_length": 430.2395935058594, "completions/min_length": 335.0, "epoch": 0.01881140666187395, "grad_norm": 1.2202429009490925, "kl": 0.26692577823996544, "learning_rate": 9.998073862851482e-07, "loss": 0.0002538611588533968, "memory(GiB)": 165.76, "reward": 1.851008653640747, "reward_std": 0.5366746783256531, "rewards/GeoLocAccuracyV2ORM/mean": 0.6354166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4838397204875946, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3301752805709839, "rewards/GeoVisalEntityMatch2ORM/std": 0.14036749303340912, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 157, "train_speed(iter/s)": 0.032868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 513.0, "completions/mean_length": 410.3020935058594, "completions/min_length": 329.0, "epoch": 0.018931224538701174, "grad_norm": 1.0573887727787756, "kl": 0.11616449803113937, "learning_rate": 9.99802073403202e-07, "loss": 0.00011642277240753174, "memory(GiB)": 165.76, "reward": 2.237401008605957, "reward_std": 0.25631827116012573, "rewards/GeoLocAccuracyV2ORM/mean": 0.7645833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3805755078792572, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5040674805641174, "rewards/GeoVisalEntityMatch2ORM/std": 0.22932171821594238, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 158, "train_speed(iter/s)": 0.032884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 505.0, "completions/mean_length": 395.21875, "completions/min_length": 280.0, "epoch": 0.019051042415528397, "grad_norm": 1.1455525826226483, "kl": 0.2422649748623371, "learning_rate": 9.997966882563547e-07, "loss": 0.0002281355409650132, "memory(GiB)": 165.76, "reward": 2.5389161109924316, "reward_std": 0.2141212522983551, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5805826187133789, "rewards/GeoVisalEntityMatch2ORM/std": 0.13679079711437225, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 159, "train_speed(iter/s)": 0.032901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 468.0, "completions/mean_length": 389.38543701171875, "completions/min_length": 289.0, "epoch": 0.01917086029235562, "grad_norm": 1.1211328423759472, "kl": 0.41224104166030884, "learning_rate": 9.99791230845385e-07, "loss": 0.0004002625937573612, "memory(GiB)": 165.76, "reward": 2.179863452911377, "reward_std": 0.4119586944580078, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3120953440666199, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3798633813858032, "rewards/GeoVisalEntityMatch2ORM/std": 0.15408213436603546, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 160, "train_speed(iter/s)": 0.032862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 403.79168701171875, "completions/min_length": 306.0, "epoch": 0.019290678169182843, "grad_norm": 1.1171896567790836, "kl": 0.18368085101246834, "learning_rate": 9.997857011710817e-07, "loss": 0.00017437587666790932, "memory(GiB)": 165.76, "reward": 2.3905065059661865, "reward_std": 0.295685350894928, "rewards/GeoLocAccuracyV2ORM/mean": 0.9375, "rewards/GeoLocAccuracyV2ORM/std": 0.22536519169807434, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4738396108150482, "rewards/GeoVisalEntityMatch2ORM/std": 0.1640271246433258, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 161, "train_speed(iter/s)": 0.032868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 458.0, "completions/mean_length": 376.38543701171875, "completions/min_length": 290.0, "epoch": 0.019410496046010063, "grad_norm": 1.0792648121933017, "kl": 0.19797548651695251, "learning_rate": 9.997800992342447e-07, "loss": 0.00018396353698335588, "memory(GiB)": 165.76, "reward": 2.6414434909820557, "reward_std": 0.26349520683288574, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6726934313774109, "rewards/GeoVisalEntityMatch2ORM/std": 0.24491387605667114, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 162, "train_speed(iter/s)": 0.032848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 389.96875, "completions/min_length": 302.0, "epoch": 0.019530313922837286, "grad_norm": 1.245644857444007, "kl": 0.10912362486124039, "learning_rate": 9.99774425035684e-07, "loss": 0.00011050949979107827, "memory(GiB)": 165.76, "reward": 2.1315104961395264, "reward_std": 0.1661374866962433, "rewards/GeoLocAccuracyV2ORM/mean": 0.856249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.31214454770088196, "rewards/GeoVisalEntityMatch2ORM/mean": 0.27526041865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.23457831144332886, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 163, "train_speed(iter/s)": 0.032894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 542.0, "completions/mean_length": 416.125, "completions/min_length": 334.0, "epoch": 0.01965013179966451, "grad_norm": 2.3803137637855345, "kl": 4.466970801353455, "learning_rate": 9.997686785762198e-07, "loss": 0.004216674715280533, "memory(GiB)": 165.76, "reward": 2.3427910804748535, "reward_std": 0.3496699333190918, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.35143813490867615, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5927910208702087, "rewards/GeoVisalEntityMatch2ORM/std": 0.14150945842266083, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 164, "train_speed(iter/s)": 0.032852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 391.9375, "completions/min_length": 304.0, "epoch": 0.019769949676491733, "grad_norm": 1.1047584831365291, "kl": 0.11343758553266525, "learning_rate": 9.99762859856683e-07, "loss": 0.00011510153854032978, "memory(GiB)": 165.76, "reward": 2.659606695175171, "reward_std": 0.14677177369594574, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6846064925193787, "rewards/GeoVisalEntityMatch2ORM/std": 0.1067856177687645, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 165, "train_speed(iter/s)": 0.032843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 396.85418701171875, "completions/min_length": 277.0, "epoch": 0.019889767553318956, "grad_norm": 1.1185257755472786, "kl": 0.44717487692832947, "learning_rate": 9.997569688779148e-07, "loss": 0.00038655599928461015, "memory(GiB)": 165.76, "reward": 2.0870683193206787, "reward_std": 0.3632388114929199, "rewards/GeoLocAccuracyV2ORM/mean": 0.6166666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.45111945271492004, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4912348687648773, "rewards/GeoVisalEntityMatch2ORM/std": 0.12886828184127808, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 166, "train_speed(iter/s)": 0.032856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 507.0, "completions/mean_length": 419.2083435058594, "completions/min_length": 309.0, "epoch": 0.02000958543014618, "grad_norm": 1.0895162451264986, "kl": 0.12021288648247719, "learning_rate": 9.997510056407673e-07, "loss": 0.0001197954043163918, "memory(GiB)": 165.76, "reward": 2.0171875953674316, "reward_std": 0.2902821898460388, "rewards/GeoLocAccuracyV2ORM/mean": 0.6395833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.46122926473617554, "rewards/GeoVisalEntityMatch2ORM/mean": 0.440104216337204, "rewards/GeoVisalEntityMatch2ORM/std": 0.21357740461826324, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 167, "train_speed(iter/s)": 0.032859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 400.01043701171875, "completions/min_length": 306.0, "epoch": 0.020129403306973402, "grad_norm": 1.0971634213666404, "kl": 0.11584628373384476, "learning_rate": 9.997449701461022e-07, "loss": 0.00011563301086425781, "memory(GiB)": 165.76, "reward": 2.3562231063842773, "reward_std": 0.24832773208618164, "rewards/GeoLocAccuracyV2ORM/mean": 0.9020833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.27529093623161316, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4853896498680115, "rewards/GeoVisalEntityMatch2ORM/std": 0.1357029229402542, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 168, "train_speed(iter/s)": 0.032872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 380.9895935058594, "completions/min_length": 295.0, "epoch": 0.020249221183800622, "grad_norm": 1.1895552143294013, "kl": 0.11822203546762466, "learning_rate": 9.997388623947926e-07, "loss": 0.00011844436812680215, "memory(GiB)": 165.76, "reward": 2.503926992416382, "reward_std": 0.12665605545043945, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5039269328117371, "rewards/GeoVisalEntityMatch2ORM/std": 0.1512146294116974, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 169, "train_speed(iter/s)": 0.032887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 387.10418701171875, "completions/min_length": 273.0, "epoch": 0.020369039060627845, "grad_norm": 1.1052986291194515, "kl": 0.3309033066034317, "learning_rate": 9.997326823877214e-07, "loss": 0.0002984876628033817, "memory(GiB)": 165.76, "reward": 2.29296875, "reward_std": 0.24003174901008606, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.20364010334014893, "rewards/GeoVisalEntityMatch2ORM/mean": 0.35546875, "rewards/GeoVisalEntityMatch2ORM/std": 0.22718846797943115, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 170, "train_speed(iter/s)": 0.032899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/mean_length": 365.3125, "completions/min_length": 264.0, "epoch": 0.020488856937455068, "grad_norm": 1.1523627952582782, "kl": 0.11914395168423653, "learning_rate": 9.997264301257824e-07, "loss": 0.00011987736070295796, "memory(GiB)": 165.76, "reward": 2.6315104961395264, "reward_std": 0.12735149264335632, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6398437023162842, "rewards/GeoVisalEntityMatch2ORM/std": 0.22282803058624268, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 171, "train_speed(iter/s)": 0.032911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 378.6770935058594, "completions/min_length": 299.0, "epoch": 0.02060867481428229, "grad_norm": 1.150088497906956, "kl": 0.11125807836651802, "learning_rate": 9.99720105609879e-07, "loss": 0.00011232992255827412, "memory(GiB)": 165.76, "reward": 2.3973422050476074, "reward_std": 0.10407604277133942, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6473420858383179, "rewards/GeoVisalEntityMatch2ORM/std": 0.13073234260082245, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 172, "train_speed(iter/s)": 0.032925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 413.0, "completions/mean_length": 341.7083435058594, "completions/min_length": 278.0, "epoch": 0.020728492691109515, "grad_norm": 1.2204824742167661, "kl": 0.15706771612167358, "learning_rate": 9.997137088409264e-07, "loss": 0.00015505155897699296, "memory(GiB)": 165.76, "reward": 2.3647589683532715, "reward_std": 0.2991529703140259, "rewards/GeoLocAccuracyV2ORM/mean": 0.8375000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.32999202609062195, "rewards/GeoVisalEntityMatch2ORM/mean": 0.548092246055603, "rewards/GeoVisalEntityMatch2ORM/std": 0.13593420386314392, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 173, "train_speed(iter/s)": 0.032888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 361.40625, "completions/min_length": 250.0, "epoch": 0.020848310567936738, "grad_norm": 1.193666873431895, "kl": 0.12445623427629471, "learning_rate": 9.997072398198492e-07, "loss": 0.0001261550933122635, "memory(GiB)": 165.76, "reward": 2.426612377166748, "reward_std": 0.23707014322280884, "rewards/GeoLocAccuracyV2ORM/mean": 0.9145833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.26871198415756226, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5120288133621216, "rewards/GeoVisalEntityMatch2ORM/std": 0.23019380867481232, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 174, "train_speed(iter/s)": 0.0329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 362.2708435058594, "completions/min_length": 283.0, "epoch": 0.020968128444763957, "grad_norm": 1.1614453948440093, "kl": 0.1166774109005928, "learning_rate": 9.997006985475824e-07, "loss": 0.000117773815873079, "memory(GiB)": 165.76, "reward": 2.3540096282958984, "reward_std": 0.11044473946094513, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3623429238796234, "rewards/GeoVisalEntityMatch2ORM/std": 0.15807902812957764, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 175, "train_speed(iter/s)": 0.032918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 344.2708435058594, "completions/min_length": 246.0, "epoch": 0.02108794632159118, "grad_norm": 1.2513064149623738, "kl": 0.12654400616884232, "learning_rate": 9.99694085025072e-07, "loss": 0.00012739500380121171, "memory(GiB)": 165.76, "reward": 2.625868320465088, "reward_std": 0.13555604219436646, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6362847685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.20028114318847656, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 176, "train_speed(iter/s)": 0.032935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 343.22918701171875, "completions/min_length": 260.0, "epoch": 0.021207764198418404, "grad_norm": 1.2271389956842704, "kl": 0.11822939291596413, "learning_rate": 9.996873992532744e-07, "loss": 0.00011921550321858376, "memory(GiB)": 165.76, "reward": 2.300297737121582, "reward_std": 0.234386146068573, "rewards/GeoLocAccuracyV2ORM/mean": 0.8999999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.28135013580322266, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4002976417541504, "rewards/GeoVisalEntityMatch2ORM/std": 0.3089914917945862, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 177, "train_speed(iter/s)": 0.032956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 368.32293701171875, "completions/min_length": 285.0, "epoch": 0.021327582075245627, "grad_norm": 1.1131923576284255, "kl": 0.11789602041244507, "learning_rate": 9.996806412331563e-07, "loss": 0.00011865794658660889, "memory(GiB)": 165.76, "reward": 2.3285300731658936, "reward_std": 0.19148260354995728, "rewards/GeoLocAccuracyV2ORM/mean": 0.9104167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.28115519881248474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41811344027519226, "rewards/GeoVisalEntityMatch2ORM/std": 0.3284824788570404, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 178, "train_speed(iter/s)": 0.032975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 358.5208435058594, "completions/min_length": 288.0, "epoch": 0.02144739995207285, "grad_norm": 1.2198133982036745, "kl": 0.11386172845959663, "learning_rate": 9.996738109656944e-07, "loss": 0.00011585156607907265, "memory(GiB)": 165.76, "reward": 2.441840171813965, "reward_std": 0.15251854062080383, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.608506977558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.2506398856639862, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 179, "train_speed(iter/s)": 0.03299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 472.0, "completions/mean_length": 362.53125, "completions/min_length": 285.0, "epoch": 0.021567217828900073, "grad_norm": 1.141262319838932, "kl": 0.1538114920258522, "learning_rate": 9.996669084518766e-07, "loss": 0.00015324479318223894, "memory(GiB)": 165.76, "reward": 2.3793065547943115, "reward_std": 0.37582576274871826, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5772231817245483, "rewards/GeoVisalEntityMatch2ORM/std": 0.12358938902616501, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 180, "train_speed(iter/s)": 0.032933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 350.57293701171875, "completions/min_length": 280.0, "epoch": 0.021687035705727293, "grad_norm": 1.101979814751518, "kl": 0.12547863647341728, "learning_rate": 9.99659933692701e-07, "loss": 0.0001271367073059082, "memory(GiB)": 165.76, "reward": 2.6332671642303467, "reward_std": 0.1921522617340088, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6541005373001099, "rewards/GeoVisalEntityMatch2ORM/std": 0.11245155334472656, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 181, "train_speed(iter/s)": 0.032947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 353.3125, "completions/min_length": 276.0, "epoch": 0.021806853582554516, "grad_norm": 1.2153442693727812, "kl": 0.11824673786759377, "learning_rate": 9.996528866891758e-07, "loss": 0.00011972089851042256, "memory(GiB)": 165.76, "reward": 2.387152671813965, "reward_std": 0.20011650025844574, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2783094346523285, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4913194179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.2257958948612213, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 182, "train_speed(iter/s)": 0.032963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 345.51043701171875, "completions/min_length": 266.0, "epoch": 0.02192667145938174, "grad_norm": 1.2767015713842749, "kl": 0.13873561471700668, "learning_rate": 9.9964576744232e-07, "loss": 0.00013931840658187866, "memory(GiB)": 165.76, "reward": 2.415914535522461, "reward_std": 0.2625925540924072, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5575810670852661, "rewards/GeoVisalEntityMatch2ORM/std": 0.16972078382968903, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 183, "train_speed(iter/s)": 0.032983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 327.3020935058594, "completions/min_length": 242.0, "epoch": 0.022046489336208962, "grad_norm": 1.2222908142775903, "kl": 0.13053575158119202, "learning_rate": 9.99638575953163e-07, "loss": 0.0001324365584878251, "memory(GiB)": 165.76, "reward": 2.5580978393554688, "reward_std": 0.10581746697425842, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5580977201461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.15178294479846954, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 184, "train_speed(iter/s)": 0.033004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 484.0, "completions/mean_length": 367.85418701171875, "completions/min_length": 293.0, "epoch": 0.022166307213036186, "grad_norm": 1.1284469865756883, "kl": 0.13126567751169205, "learning_rate": 9.996313122227447e-07, "loss": 0.0001317088899668306, "memory(GiB)": 165.76, "reward": 2.367708444595337, "reward_std": 0.2611429691314697, "rewards/GeoLocAccuracyV2ORM/mean": 0.9541666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.19783920049667358, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43437501788139343, "rewards/GeoVisalEntityMatch2ORM/std": 0.26120293140411377, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 185, "train_speed(iter/s)": 0.03297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 348.4583435058594, "completions/min_length": 261.0, "epoch": 0.02228612508986341, "grad_norm": 1.2083303951553725, "kl": 0.12059367820620537, "learning_rate": 9.99623976252115e-07, "loss": 0.00012131284165661782, "memory(GiB)": 165.76, "reward": 2.4701390266418457, "reward_std": 0.13181382417678833, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47013887763023376, "rewards/GeoVisalEntityMatch2ORM/std": 0.1990315318107605, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 186, "train_speed(iter/s)": 0.03301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 339.57293701171875, "completions/min_length": 255.0, "epoch": 0.022405942966690632, "grad_norm": 1.2532420706623126, "kl": 0.13518604636192322, "learning_rate": 9.996165680423348e-07, "loss": 0.00013556084013544023, "memory(GiB)": 165.76, "reward": 2.295461416244507, "reward_std": 0.2066386640071869, "rewards/GeoLocAccuracyV2ORM/mean": 0.7541667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.42893174290657043, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5412946939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.18406301736831665, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 187, "train_speed(iter/s)": 0.033026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 447.0, "completions/mean_length": 363.1458435058594, "completions/min_length": 268.0, "epoch": 0.02252576084351785, "grad_norm": 1.1711157850426723, "kl": 0.12426747009158134, "learning_rate": 9.996090875944754e-07, "loss": 0.00012413784861564636, "memory(GiB)": 165.76, "reward": 2.4440600872039795, "reward_std": 0.27697521448135376, "rewards/GeoLocAccuracyV2ORM/mean": 0.9437500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.22043615579605103, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5315600633621216, "rewards/GeoVisalEntityMatch2ORM/std": 0.2517721652984619, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 188, "train_speed(iter/s)": 0.032992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 338.4270935058594, "completions/min_length": 244.0, "epoch": 0.022645578720345075, "grad_norm": 1.150724209574523, "kl": 0.11959667131304741, "learning_rate": 9.99601534909618e-07, "loss": 0.00012086828792234883, "memory(GiB)": 165.76, "reward": 2.6202259063720703, "reward_std": 0.11838121712207794, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.620225727558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.18749947845935822, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 189, "train_speed(iter/s)": 0.033001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 344.35418701171875, "completions/min_length": 224.0, "epoch": 0.022765396597172298, "grad_norm": 1.1134496171303714, "kl": 0.12471951171755791, "learning_rate": 9.99593909988855e-07, "loss": 0.00012600857007782906, "memory(GiB)": 165.76, "reward": 2.6135079860687256, "reward_std": 0.15288010239601135, "rewards/GeoLocAccuracyV2ORM/mean": 0.9625000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.18194852769374847, "rewards/GeoVisalEntityMatch2ORM/mean": 0.651007890701294, "rewards/GeoVisalEntityMatch2ORM/std": 0.24853956699371338, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 190, "train_speed(iter/s)": 0.032968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 352.875, "completions/min_length": 294.0, "epoch": 0.02288521447399952, "grad_norm": 1.1684315662756792, "kl": 0.1406562328338623, "learning_rate": 9.995862128332887e-07, "loss": 0.00014116367674432695, "memory(GiB)": 165.76, "reward": 2.0991952419281006, "reward_std": 0.17716023325920105, "rewards/GeoLocAccuracyV2ORM/mean": 0.53125, "rewards/GeoLocAccuracyV2ORM/std": 0.5016420483589172, "rewards/GeoVisalEntityMatch2ORM/mean": 0.567945122718811, "rewards/GeoVisalEntityMatch2ORM/std": 0.1908804327249527, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 191, "train_speed(iter/s)": 0.032985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 340.59375, "completions/min_length": 245.0, "epoch": 0.023005032350826744, "grad_norm": 1.2267630411477475, "kl": 0.12412898242473602, "learning_rate": 9.995784434440318e-07, "loss": 0.00012478730059228837, "memory(GiB)": 165.76, "reward": 2.519791603088379, "reward_std": 0.11556856334209442, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5197917222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.15541517734527588, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 192, "train_speed(iter/s)": 0.032999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 455.0, "completions/mean_length": 345.8958435058594, "completions/min_length": 253.0, "epoch": 0.023124850227653967, "grad_norm": 1.129092397032442, "kl": 0.15555129945278168, "learning_rate": 9.99570601822208e-07, "loss": 0.0001554687914904207, "memory(GiB)": 165.76, "reward": 2.4131579399108887, "reward_std": 0.242501363158226, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.35702845454216003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.633991003036499, "rewards/GeoVisalEntityMatch2ORM/std": 0.3162245750427246, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 193, "train_speed(iter/s)": 0.032951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 337.9375, "completions/min_length": 234.0, "epoch": 0.023244668104481187, "grad_norm": 1.139838283832511, "kl": 0.13302572816610336, "learning_rate": 9.995626879689508e-07, "loss": 0.0001347661018371582, "memory(GiB)": 165.76, "reward": 2.5805556774139404, "reward_std": 0.17311078310012817, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6138889193534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.15662124752998352, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 194, "train_speed(iter/s)": 0.032969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 358.94793701171875, "completions/min_length": 295.0, "epoch": 0.02336448598130841, "grad_norm": 1.2352307301917937, "kl": 0.14572254568338394, "learning_rate": 9.995547018854048e-07, "loss": 0.00014675161219201982, "memory(GiB)": 165.76, "reward": 2.539248466491699, "reward_std": 0.16888189315795898, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6059151887893677, "rewards/GeoVisalEntityMatch2ORM/std": 0.22704602777957916, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 195, "train_speed(iter/s)": 0.032989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 347.25, "completions/min_length": 274.0, "epoch": 0.023484303858135634, "grad_norm": 1.1138639386725353, "kl": 0.15684469789266586, "learning_rate": 9.995466435727245e-07, "loss": 0.00015804171562194824, "memory(GiB)": 165.76, "reward": 2.183420181274414, "reward_std": 0.19983115792274475, "rewards/GeoLocAccuracyV2ORM/mean": 0.6708333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4687983989715576, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5125868320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.15875519812107086, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 196, "train_speed(iter/s)": 0.033006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 337.13543701171875, "completions/min_length": 262.0, "epoch": 0.023604121734962857, "grad_norm": 1.173884131379965, "kl": 0.15551241487264633, "learning_rate": 9.995385130320748e-07, "loss": 0.00015689930296503007, "memory(GiB)": 165.76, "reward": 2.6319446563720703, "reward_std": 0.10694562643766403, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6319444179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.250073105096817, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 197, "train_speed(iter/s)": 0.033018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 359.9270935058594, "completions/min_length": 302.0, "epoch": 0.02372393961179008, "grad_norm": 1.224095673633789, "kl": 0.15131313353776932, "learning_rate": 9.995303102646315e-07, "loss": 0.0001515944895800203, "memory(GiB)": 165.76, "reward": 2.449463367462158, "reward_std": 0.217128723859787, "rewards/GeoLocAccuracyV2ORM/mean": 0.8937500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.2987033426761627, "rewards/GeoVisalEntityMatch2ORM/mean": 0.555713415145874, "rewards/GeoVisalEntityMatch2ORM/std": 0.16407926380634308, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 198, "train_speed(iter/s)": 0.033032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 343.0625, "completions/min_length": 243.0, "epoch": 0.023843757488617303, "grad_norm": 1.1766374266899489, "kl": 0.15658879280090332, "learning_rate": 9.995220352715807e-07, "loss": 0.000157967209815979, "memory(GiB)": 165.76, "reward": 2.573784828186035, "reward_std": 0.0929383784532547, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5737847685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.20200739800930023, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 199, "train_speed(iter/s)": 0.033047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 376.8645935058594, "completions/min_length": 315.0, "epoch": 0.023963575365444523, "grad_norm": 1.179826478570859, "kl": 0.15406740456819534, "learning_rate": 9.995136880541186e-07, "loss": 0.00015552590775769204, "memory(GiB)": 165.76, "reward": 2.1886327266693115, "reward_std": 0.114253468811512, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321254789829254, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43654927611351013, "rewards/GeoVisalEntityMatch2ORM/std": 0.17010077834129333, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 200, "train_speed(iter/s)": 0.033059 }, { "epoch": 0.023963575365444523, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.004464285714285714, "eval_completions/max_length": 444.2797619047619, "eval_completions/mean_length": 362.11645416986374, "eval_completions/min_length": 283.7261904761905, "eval_kl": 0.18960397810276067, "eval_loss": 0.0001882244978332892, "eval_reward": 2.404718205332756, "eval_reward_std": 0.17760096738735834, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.8916418782124916, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.16082749520206735, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.5174165571197158, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.16082556576778492, "eval_rewards/MathFormat/mean": 0.9956597231683277, "eval_rewards/MathFormat/std": 0.012497775700120698, "eval_runtime": 1752.4094, "eval_samples_per_second": 0.192, "eval_steps_per_second": 0.005, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 350.38543701171875, "completions/min_length": 256.0, "epoch": 0.024083393242271746, "grad_norm": 1.1799846090113275, "kl": 0.1645374894142151, "learning_rate": 9.995052686134524e-07, "loss": 0.00016507382679264992, "memory(GiB)": 165.76, "reward": 2.554067373275757, "reward_std": 0.15289631485939026, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250827372074127, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5811508297920227, "rewards/GeoVisalEntityMatch2ORM/std": 0.2379811853170395, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 201, "train_speed(iter/s)": 0.025495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 365.2708435058594, "completions/min_length": 262.0, "epoch": 0.02420321111909897, "grad_norm": 1.2143624564954243, "kl": 0.15256501734256744, "learning_rate": 9.994967769507992e-07, "loss": 0.00015380482363980263, "memory(GiB)": 165.76, "reward": 2.605902910232544, "reward_std": 0.12972459197044373, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6059027910232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.13625052571296692, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 202, "train_speed(iter/s)": 0.025532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 343.72918701171875, "completions/min_length": 224.0, "epoch": 0.024323028995926192, "grad_norm": 1.184010637711422, "kl": 0.14787112176418304, "learning_rate": 9.994882130673868e-07, "loss": 0.0001497467455919832, "memory(GiB)": 165.76, "reward": 2.5250000953674316, "reward_std": 0.13244283199310303, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5416667461395264, "rewards/GeoVisalEntityMatch2ORM/std": 0.1335475891828537, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 203, "train_speed(iter/s)": 0.025572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 366.13543701171875, "completions/min_length": 274.0, "epoch": 0.024442846872753415, "grad_norm": 1.1405835983138106, "kl": 0.1500900611281395, "learning_rate": 9.994795769644535e-07, "loss": 0.0001514380273874849, "memory(GiB)": 165.76, "reward": 2.672255277633667, "reward_std": 0.11372627317905426, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6722553968429565, "rewards/GeoVisalEntityMatch2ORM/std": 0.2273254692554474, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 204, "train_speed(iter/s)": 0.025609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 352.57293701171875, "completions/min_length": 253.0, "epoch": 0.02456266474958064, "grad_norm": 1.238704079384479, "kl": 0.15611816942691803, "learning_rate": 9.99470868643248e-07, "loss": 0.00015781819820404053, "memory(GiB)": 165.76, "reward": 2.6041667461395264, "reward_std": 0.15239137411117554, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6145833730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.17795540392398834, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 205, "train_speed(iter/s)": 0.025645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 361.1145935058594, "completions/min_length": 264.0, "epoch": 0.02468248262640786, "grad_norm": 1.1876101829825354, "kl": 0.16449365764856339, "learning_rate": 9.994620881050294e-07, "loss": 0.00016535943723283708, "memory(GiB)": 165.76, "reward": 2.2697713375091553, "reward_std": 0.17306585609912872, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.32810473442077637, "rewards/GeoVisalEntityMatch2ORM/std": 0.17367760837078094, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 206, "train_speed(iter/s)": 0.025683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 350.96875, "completions/min_length": 279.0, "epoch": 0.02480230050323508, "grad_norm": 1.1531807769150142, "kl": 0.14776237308979034, "learning_rate": 9.994532353510672e-07, "loss": 0.00014780214405618608, "memory(GiB)": 165.76, "reward": 2.4131531715393066, "reward_std": 0.11650325357913971, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.41315311193466187, "rewards/GeoVisalEntityMatch2ORM/std": 0.17908552289009094, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 207, "train_speed(iter/s)": 0.025721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 366.26043701171875, "completions/min_length": 267.0, "epoch": 0.024922118380062305, "grad_norm": 1.019609044881977, "kl": 0.13923931121826172, "learning_rate": 9.994443103826413e-07, "loss": 0.00014015163469593972, "memory(GiB)": 165.76, "reward": 2.3851799964904785, "reward_std": 0.09549171477556229, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.38517993688583374, "rewards/GeoVisalEntityMatch2ORM/std": 0.26704344153404236, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 208, "train_speed(iter/s)": 0.02575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 342.6145935058594, "completions/min_length": 226.0, "epoch": 0.025041936256889528, "grad_norm": 1.243987767763416, "kl": 0.15456148236989975, "learning_rate": 9.994353132010424e-07, "loss": 0.000156017646077089, "memory(GiB)": 165.76, "reward": 2.594034194946289, "reward_std": 0.21033668518066406, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6273674368858337, "rewards/GeoVisalEntityMatch2ORM/std": 0.22771969437599182, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 209, "train_speed(iter/s)": 0.025786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 354.4583435058594, "completions/min_length": 246.0, "epoch": 0.02516175413371675, "grad_norm": 1.2123934770718143, "kl": 0.15427450090646744, "learning_rate": 9.99426243807571e-07, "loss": 0.00015599033213220537, "memory(GiB)": 165.76, "reward": 2.560811758041382, "reward_std": 0.09973088651895523, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5608116388320923, "rewards/GeoVisalEntityMatch2ORM/std": 0.20359255373477936, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 210, "train_speed(iter/s)": 0.025823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 362.85418701171875, "completions/min_length": 293.0, "epoch": 0.025281572010543974, "grad_norm": 1.185629136814069, "kl": 0.1515854001045227, "learning_rate": 9.99417102203539e-07, "loss": 0.00015253698802553117, "memory(GiB)": 165.76, "reward": 2.6292576789855957, "reward_std": 0.1506698578596115, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6292576789855957, "rewards/GeoVisalEntityMatch2ORM/std": 0.18720805644989014, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 211, "train_speed(iter/s)": 0.025859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 360.35418701171875, "completions/min_length": 284.0, "epoch": 0.025401389887371197, "grad_norm": 1.1634842883460115, "kl": 0.1667819619178772, "learning_rate": 9.994078883902676e-07, "loss": 0.00016712149954400957, "memory(GiB)": 165.76, "reward": 2.4983272552490234, "reward_std": 0.1682712882757187, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5316603779792786, "rewards/GeoVisalEntityMatch2ORM/std": 0.25897616147994995, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 212, "train_speed(iter/s)": 0.025889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 369.53125, "completions/min_length": 245.0, "epoch": 0.025521207764198417, "grad_norm": 1.1926015394454548, "kl": 0.16739486157894135, "learning_rate": 9.993986023690892e-07, "loss": 0.00016812235116958618, "memory(GiB)": 165.76, "reward": 2.041121006011963, "reward_std": 0.21831150352954865, "rewards/GeoLocAccuracyV2ORM/mean": 0.6812500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.396381676197052, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3598710894584656, "rewards/GeoVisalEntityMatch2ORM/std": 0.22057384252548218, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 213, "train_speed(iter/s)": 0.025924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 358.0, "completions/min_length": 290.0, "epoch": 0.02564102564102564, "grad_norm": 1.178482510810919, "kl": 0.15690536051988602, "learning_rate": 9.993892441413466e-07, "loss": 0.00015721222735010087, "memory(GiB)": 165.76, "reward": 2.3099124431610107, "reward_std": 0.13384565711021423, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3182457387447357, "rewards/GeoVisalEntityMatch2ORM/std": 0.15022054314613342, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 214, "train_speed(iter/s)": 0.025959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 427.0, "completions/mean_length": 357.2708435058594, "completions/min_length": 255.0, "epoch": 0.025760843517852863, "grad_norm": 1.2871286048793906, "kl": 0.28438758105039597, "learning_rate": 9.993798137083923e-07, "loss": 0.0002791981096379459, "memory(GiB)": 165.76, "reward": 2.1286211013793945, "reward_std": 0.5424324870109558, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.45021921396255493, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5661211013793945, "rewards/GeoVisalEntityMatch2ORM/std": 0.23551470041275024, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 215, "train_speed(iter/s)": 0.025965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 459.0, "completions/mean_length": 378.125, "completions/min_length": 253.0, "epoch": 0.025880661394680086, "grad_norm": 1.2443351034753523, "kl": 0.6693469882011414, "learning_rate": 9.993703110715907e-07, "loss": 0.0006467700004577637, "memory(GiB)": 165.76, "reward": 2.1192917823791504, "reward_std": 0.37682563066482544, "rewards/GeoLocAccuracyV2ORM/mean": 0.6375000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.43558794260025024, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5963748693466187, "rewards/GeoVisalEntityMatch2ORM/std": 0.1538323163986206, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 216, "train_speed(iter/s)": 0.025964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 369.9375, "completions/min_length": 260.0, "epoch": 0.02600047927150731, "grad_norm": 1.1707597339623335, "kl": 0.14087402820587158, "learning_rate": 9.99360736232315e-07, "loss": 0.00014240792370401323, "memory(GiB)": 165.76, "reward": 2.4860119819641113, "reward_std": 0.20189714431762695, "rewards/GeoLocAccuracyV2ORM/mean": 0.9604166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.19165712594985962, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5255953073501587, "rewards/GeoVisalEntityMatch2ORM/std": 0.22654621303081512, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 217, "train_speed(iter/s)": 0.025997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 483.0, "completions/mean_length": 378.41668701171875, "completions/min_length": 303.0, "epoch": 0.026120297148334533, "grad_norm": 1.1422078829352997, "kl": 0.1516221985220909, "learning_rate": 9.993510891919502e-07, "loss": 0.0001522153615951538, "memory(GiB)": 165.76, "reward": 2.3953375816345215, "reward_std": 0.37708884477615356, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5515873432159424, "rewards/GeoVisalEntityMatch2ORM/std": 0.1649361103773117, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 218, "train_speed(iter/s)": 0.026026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 387.96875, "completions/min_length": 272.0, "epoch": 0.026240115025161753, "grad_norm": 1.1083146944070297, "kl": 0.16060787439346313, "learning_rate": 9.993413699518905e-07, "loss": 0.00016119579959195107, "memory(GiB)": 165.76, "reward": 2.33493709564209, "reward_std": 0.3104146122932434, "rewards/GeoLocAccuracyV2ORM/mean": 0.89166659116745, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4432705342769623, "rewards/GeoVisalEntityMatch2ORM/std": 0.16966891288757324, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 219, "train_speed(iter/s)": 0.026059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 351.28125, "completions/min_length": 236.0, "epoch": 0.026359932901988976, "grad_norm": 1.226854373355716, "kl": 0.16306977719068527, "learning_rate": 9.993315785135416e-07, "loss": 0.000163910910487175, "memory(GiB)": 165.76, "reward": 2.479403495788574, "reward_std": 0.11277014017105103, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47940343618392944, "rewards/GeoVisalEntityMatch2ORM/std": 0.22616544365882874, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 220, "train_speed(iter/s)": 0.026089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 364.69793701171875, "completions/min_length": 243.0, "epoch": 0.0264797507788162, "grad_norm": 1.0536168457741248, "kl": 0.15788129717111588, "learning_rate": 9.99321714878319e-07, "loss": 0.0001590301690157503, "memory(GiB)": 165.76, "reward": 2.2763891220092773, "reward_std": 0.12828905880451202, "rewards/GeoLocAccuracyV2ORM/mean": 0.7250000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4431941509246826, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5513888597488403, "rewards/GeoVisalEntityMatch2ORM/std": 0.28040045499801636, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 221, "train_speed(iter/s)": 0.026132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 493.0, "completions/mean_length": 371.75, "completions/min_length": 259.0, "epoch": 0.026599568655643422, "grad_norm": 1.798160039413565, "kl": 1.3230874836444855, "learning_rate": 9.993117790476494e-07, "loss": 0.0011628320207819343, "memory(GiB)": 165.76, "reward": 2.4437737464904785, "reward_std": 0.3742659091949463, "rewards/GeoLocAccuracyV2ORM/mean": 0.8125, "rewards/GeoLocAccuracyV2ORM/std": 0.3814791142940521, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6521070003509521, "rewards/GeoVisalEntityMatch2ORM/std": 0.13134978711605072, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 222, "train_speed(iter/s)": 0.026138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 357.0833435058594, "completions/min_length": 242.0, "epoch": 0.026719386532470645, "grad_norm": 1.0959989160732082, "kl": 0.1807810664176941, "learning_rate": 9.993017710229684e-07, "loss": 0.00018194815493188798, "memory(GiB)": 165.76, "reward": 2.3728063106536865, "reward_std": 0.21933455765247345, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43947288393974304, "rewards/GeoVisalEntityMatch2ORM/std": 0.14150331914424896, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 223, "train_speed(iter/s)": 0.02617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 356.16668701171875, "completions/min_length": 300.0, "epoch": 0.02683920440929787, "grad_norm": 1.073198071551365, "kl": 0.16637496650218964, "learning_rate": 9.99291690805724e-07, "loss": 0.00016672413039486855, "memory(GiB)": 165.76, "reward": 2.4834201335906982, "reward_std": 0.15706053376197815, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6584201455116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.22213612496852875, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 224, "train_speed(iter/s)": 0.026205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 354.57293701171875, "completions/min_length": 259.0, "epoch": 0.02695902228612509, "grad_norm": 1.1210039825336524, "kl": 0.16871359944343567, "learning_rate": 9.99281538397373e-07, "loss": 0.00016988813877105713, "memory(GiB)": 165.76, "reward": 2.2918403148651123, "reward_std": 0.17927347123622894, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662347316742, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3272569477558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.12528091669082642, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 225, "train_speed(iter/s)": 0.026211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 349.875, "completions/min_length": 279.0, "epoch": 0.02707884016295231, "grad_norm": 1.1154300648604891, "kl": 0.16776041686534882, "learning_rate": 9.992713137993839e-07, "loss": 0.0001690313220024109, "memory(GiB)": 165.76, "reward": 2.470980644226074, "reward_std": 0.18045413494110107, "rewards/GeoLocAccuracyV2ORM/mean": 0.8999999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.26596397161483765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5709806680679321, "rewards/GeoVisalEntityMatch2ORM/std": 0.1884620189666748, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 226, "train_speed(iter/s)": 0.026244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.0, "completions/mean_length": 360.3020935058594, "completions/min_length": 256.0, "epoch": 0.027198658039779534, "grad_norm": 1.0533097243702356, "kl": 0.15676233172416687, "learning_rate": 9.992610170132343e-07, "loss": 0.00015914192772470415, "memory(GiB)": 165.76, "reward": 2.3804893493652344, "reward_std": 0.08526600897312164, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.580489456653595, "rewards/GeoVisalEntityMatch2ORM/std": 0.1552884727716446, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 227, "train_speed(iter/s)": 0.026276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 358.4270935058594, "completions/min_length": 231.0, "epoch": 0.027318475916606758, "grad_norm": 1.1349001932967995, "kl": 0.17996203154325485, "learning_rate": 9.992506480404137e-07, "loss": 0.00018045306205749512, "memory(GiB)": 165.76, "reward": 2.6044270992279053, "reward_std": 0.15054936707019806, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6148437857627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.2555205523967743, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 228, "train_speed(iter/s)": 0.026309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 426.0, "completions/mean_length": 340.41668701171875, "completions/min_length": 228.0, "epoch": 0.02743829379343398, "grad_norm": 0.9707887259600315, "kl": 0.1821991577744484, "learning_rate": 9.99240206882421e-07, "loss": 0.00018334141350351274, "memory(GiB)": 165.76, "reward": 2.542757987976074, "reward_std": 0.19244937598705292, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.27209389209747314, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6448413133621216, "rewards/GeoVisalEntityMatch2ORM/std": 0.24458980560302734, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 229, "train_speed(iter/s)": 0.026315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 436.0, "completions/mean_length": 360.60418701171875, "completions/min_length": 242.0, "epoch": 0.027558111670261204, "grad_norm": 1.1011348739692428, "kl": 0.20232047885656357, "learning_rate": 9.992296935407657e-07, "loss": 0.00020153820514678955, "memory(GiB)": 165.76, "reward": 2.4121861457824707, "reward_std": 0.30549293756484985, "rewards/GeoLocAccuracyV2ORM/mean": 0.9375, "rewards/GeoLocAccuracyV2ORM/std": 0.2433321326971054, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5371858477592468, "rewards/GeoVisalEntityMatch2ORM/std": 0.16610051691532135, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 230, "train_speed(iter/s)": 0.026311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 454.0, "completions/mean_length": 387.03125, "completions/min_length": 283.0, "epoch": 0.027677929547088427, "grad_norm": 1.1045732814497793, "kl": 0.18031878769397736, "learning_rate": 9.992191080169682e-07, "loss": 0.00018029163766186684, "memory(GiB)": 165.76, "reward": 2.4321181774139404, "reward_std": 0.41327208280563354, "rewards/GeoLocAccuracyV2ORM/mean": 0.7958332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.3794502317905426, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7404513359069824, "rewards/GeoVisalEntityMatch2ORM/std": 0.28481802344322205, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 231, "train_speed(iter/s)": 0.026316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 376.6770935058594, "completions/min_length": 309.0, "epoch": 0.027797747423915647, "grad_norm": 0.9282417866883752, "kl": 0.15956006199121475, "learning_rate": 9.992084503125588e-07, "loss": 0.00016040231275837868, "memory(GiB)": 165.76, "reward": 2.534226417541504, "reward_std": 0.1258525550365448, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5446428656578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.3215329945087433, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 232, "train_speed(iter/s)": 0.026349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 413.6458435058594, "completions/min_length": 337.0, "epoch": 0.02791756530074287, "grad_norm": 1.0141806468485095, "kl": 0.17460957169532776, "learning_rate": 9.991977204290786e-07, "loss": 0.00017514824867248535, "memory(GiB)": 165.76, "reward": 1.958717703819275, "reward_std": 0.4756767153739929, "rewards/GeoLocAccuracyV2ORM/mean": 0.5145833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.44555801153182983, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4649677872657776, "rewards/GeoVisalEntityMatch2ORM/std": 0.18044933676719666, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 233, "train_speed(iter/s)": 0.026379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 385.5833435058594, "completions/min_length": 277.0, "epoch": 0.028037383177570093, "grad_norm": 1.1439869620553258, "kl": 0.19750197231769562, "learning_rate": 9.99186918368079e-07, "loss": 0.00019863247871398926, "memory(GiB)": 165.76, "reward": 2.422916889190674, "reward_std": 0.1810714602470398, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204163908958435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5812500715255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.22898198664188385, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 234, "train_speed(iter/s)": 0.026408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 460.0, "completions/mean_length": 372.22918701171875, "completions/min_length": 294.0, "epoch": 0.028157201054397316, "grad_norm": 1.1649264677727433, "kl": 0.21458394825458527, "learning_rate": 9.991760441311218e-07, "loss": 0.0002134045062121004, "memory(GiB)": 165.76, "reward": 2.1777777671813965, "reward_std": 0.2907024621963501, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4649089574813843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5527777671813965, "rewards/GeoVisalEntityMatch2ORM/std": 0.1399805098772049, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 235, "train_speed(iter/s)": 0.026412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 378.21875, "completions/min_length": 275.0, "epoch": 0.02827701893122454, "grad_norm": 1.114139880310811, "kl": 0.18235290050506592, "learning_rate": 9.991650977197792e-07, "loss": 0.00018344323325436562, "memory(GiB)": 165.76, "reward": 2.5251736640930176, "reward_std": 0.12195863574743271, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5251736640930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.12904143333435059, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 236, "train_speed(iter/s)": 0.026442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 499.0, "completions/mean_length": 403.5520935058594, "completions/min_length": 245.0, "epoch": 0.028396836808051763, "grad_norm": 1.136497639856349, "kl": 0.18198204040527344, "learning_rate": 9.991540791356342e-07, "loss": 0.00018266837287228554, "memory(GiB)": 165.76, "reward": 2.026761054992676, "reward_std": 0.38318437337875366, "rewards/GeoLocAccuracyV2ORM/mean": 0.5541666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.4979783892631531, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5038442611694336, "rewards/GeoVisalEntityMatch2ORM/std": 0.2150813490152359, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 237, "train_speed(iter/s)": 0.026454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 437.0, "completions/mean_length": 367.8645935058594, "completions/min_length": 292.0, "epoch": 0.028516654684878982, "grad_norm": 1.1431004246049918, "kl": 0.23287196457386017, "learning_rate": 9.991429883802794e-07, "loss": 0.0002319316117791459, "memory(GiB)": 165.76, "reward": 2.204080104827881, "reward_std": 0.3819330930709839, "rewards/GeoLocAccuracyV2ORM/mean": 0.6895833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.46327874064445496, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5665798783302307, "rewards/GeoVisalEntityMatch2ORM/std": 0.16449680924415588, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 238, "train_speed(iter/s)": 0.026452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 387.2708435058594, "completions/min_length": 328.0, "epoch": 0.028636472561706205, "grad_norm": 1.1557809816474078, "kl": 0.20453030616044998, "learning_rate": 9.99131825455319e-07, "loss": 0.00020526102161966264, "memory(GiB)": 165.76, "reward": 2.313516616821289, "reward_std": 0.16047172248363495, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3551831841468811, "rewards/GeoVisalEntityMatch2ORM/std": 0.1385788768529892, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 239, "train_speed(iter/s)": 0.026483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 392.9895935058594, "completions/min_length": 279.0, "epoch": 0.02875629043853343, "grad_norm": 1.100733221545618, "kl": 0.2063508778810501, "learning_rate": 9.991205903623665e-07, "loss": 0.00020686785865109414, "memory(GiB)": 165.76, "reward": 2.2902779579162598, "reward_std": 0.2174742966890335, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.371625155210495, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4902777671813965, "rewards/GeoVisalEntityMatch2ORM/std": 0.24924317002296448, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 240, "train_speed(iter/s)": 0.026513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 387.75, "completions/min_length": 324.0, "epoch": 0.028876108315360652, "grad_norm": 1.133749001933399, "kl": 0.19527489691972733, "learning_rate": 9.991092831030466e-07, "loss": 0.0001965587434824556, "memory(GiB)": 165.76, "reward": 2.4932541847229004, "reward_std": 0.1831091046333313, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5140873193740845, "rewards/GeoVisalEntityMatch2ORM/std": 0.17742259800434113, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 241, "train_speed(iter/s)": 0.026544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 383.47918701171875, "completions/min_length": 310.0, "epoch": 0.028995926192187875, "grad_norm": 1.1030657830504111, "kl": 0.20381730049848557, "learning_rate": 9.990979036789941e-07, "loss": 0.00020627926278393716, "memory(GiB)": 165.76, "reward": 2.371961832046509, "reward_std": 0.08903664350509644, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3719618022441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.15442205965518951, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 242, "train_speed(iter/s)": 0.026572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 459.0, "completions/mean_length": 382.59375, "completions/min_length": 297.0, "epoch": 0.029115744069015098, "grad_norm": 1.0898950678466328, "kl": 0.19470786303281784, "learning_rate": 9.990864520918547e-07, "loss": 0.00019514685845933855, "memory(GiB)": 165.76, "reward": 2.2131946086883545, "reward_std": 0.33643922209739685, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3973541557788849, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48819446563720703, "rewards/GeoVisalEntityMatch2ORM/std": 0.15618762373924255, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 243, "train_speed(iter/s)": 0.026577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 379.9270935058594, "completions/min_length": 301.0, "epoch": 0.02923556194584232, "grad_norm": 1.133754859820175, "kl": 0.21730224788188934, "learning_rate": 9.990749283432836e-07, "loss": 0.00021873539662919939, "memory(GiB)": 165.76, "reward": 2.0575647354125977, "reward_std": 0.17725546658039093, "rewards/GeoLocAccuracyV2ORM/mean": 0.5812500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.46255868673324585, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4763144850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.2164158672094345, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 244, "train_speed(iter/s)": 0.026605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 389.8125, "completions/min_length": 292.0, "epoch": 0.02935537982266954, "grad_norm": 0.9315651204199851, "kl": 0.18540170788764954, "learning_rate": 9.99063332434947e-07, "loss": 0.0001866916863946244, "memory(GiB)": 165.76, "reward": 2.660590171813965, "reward_std": 0.1381758749485016, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6710070371627808, "rewards/GeoVisalEntityMatch2ORM/std": 0.23382148146629333, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 245, "train_speed(iter/s)": 0.026633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 448.0, "completions/mean_length": 390.0208435058594, "completions/min_length": 320.0, "epoch": 0.029475197699496764, "grad_norm": 1.1782355260355846, "kl": 0.30250921100378036, "learning_rate": 9.990516643685221e-07, "loss": 0.00029619288397952914, "memory(GiB)": 165.76, "reward": 2.298532485961914, "reward_std": 0.5752472877502441, "rewards/GeoLocAccuracyV2ORM/mean": 0.7562500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.41569533944129944, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7193655371665955, "rewards/GeoVisalEntityMatch2ORM/std": 0.20658709108829498, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 246, "train_speed(iter/s)": 0.026636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 404.9375, "completions/min_length": 320.0, "epoch": 0.029595015576323987, "grad_norm": 1.1044334758385932, "kl": 0.20874151587486267, "learning_rate": 9.990399241456952e-07, "loss": 0.00021003186702728271, "memory(GiB)": 165.76, "reward": 2.433405876159668, "reward_std": 0.21840566396713257, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4917392134666443, "rewards/GeoVisalEntityMatch2ORM/std": 0.21936318278312683, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 247, "train_speed(iter/s)": 0.026665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 391.0520935058594, "completions/min_length": 318.0, "epoch": 0.02971483345315121, "grad_norm": 1.1683077383830107, "kl": 0.2178809717297554, "learning_rate": 9.990281117681645e-07, "loss": 0.0002187788486480713, "memory(GiB)": 165.76, "reward": 2.5996816158294678, "reward_std": 0.16336315870285034, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.1786891669034958, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6413483619689941, "rewards/GeoVisalEntityMatch2ORM/std": 0.14737240970134735, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 248, "train_speed(iter/s)": 0.026693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 398.7395935058594, "completions/min_length": 332.0, "epoch": 0.029834651329978434, "grad_norm": 1.0643779292812465, "kl": 0.21917953342199326, "learning_rate": 9.990162272376374e-07, "loss": 0.0002211133687524125, "memory(GiB)": 165.76, "reward": 2.440730094909668, "reward_std": 0.15944045782089233, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4823969304561615, "rewards/GeoVisalEntityMatch2ORM/std": 0.16911886632442474, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 249, "train_speed(iter/s)": 0.026721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 367.21875, "completions/min_length": 297.0, "epoch": 0.029954469206805657, "grad_norm": 1.1707417244398086, "kl": 0.23878216743469238, "learning_rate": 9.990042705558325e-07, "loss": 0.00023945048451423645, "memory(GiB)": 165.76, "reward": 2.6855905055999756, "reward_std": 0.1845180243253708, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.714756965637207, "rewards/GeoVisalEntityMatch2ORM/std": 0.2985037863254547, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 250, "train_speed(iter/s)": 0.026748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 552.0, "completions/mean_length": 412.15625, "completions/min_length": 331.0, "epoch": 0.030074287083632877, "grad_norm": 1.0228373572563476, "kl": 0.20715584605932236, "learning_rate": 9.989922417244783e-07, "loss": 0.00020915022469125688, "memory(GiB)": 165.76, "reward": 2.4584450721740723, "reward_std": 0.2557046413421631, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5001116394996643, "rewards/GeoVisalEntityMatch2ORM/std": 0.17708969116210938, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 251, "train_speed(iter/s)": 0.026774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 449.0, "completions/mean_length": 374.79168701171875, "completions/min_length": 294.0, "epoch": 0.0301941049604601, "grad_norm": 1.1086963088134516, "kl": 0.36166585981845856, "learning_rate": 9.989801407453145e-07, "loss": 0.0003581047058105469, "memory(GiB)": 165.76, "reward": 2.1723380088806152, "reward_std": 0.3951306939125061, "rewards/GeoLocAccuracyV2ORM/mean": 0.8291666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3600925803184509, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4473379850387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.21933352947235107, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 252, "train_speed(iter/s)": 0.026778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 386.1145935058594, "completions/min_length": 321.0, "epoch": 0.030313922837287323, "grad_norm": 1.1540439219508034, "kl": 0.221110001206398, "learning_rate": 9.989679676200904e-07, "loss": 0.00022202979016583413, "memory(GiB)": 165.76, "reward": 2.4025793075561523, "reward_std": 0.20258043706417084, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4692460298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.1876010149717331, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 253, "train_speed(iter/s)": 0.026806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 469.0, "completions/mean_length": 393.0208435058594, "completions/min_length": 323.0, "epoch": 0.030433740714114546, "grad_norm": 1.1289799616112808, "kl": 0.253720685839653, "learning_rate": 9.98955722350566e-07, "loss": 0.00025236979126930237, "memory(GiB)": 165.76, "reward": 2.181748867034912, "reward_std": 0.31932827830314636, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.38661259412765503, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4775819480419159, "rewards/GeoVisalEntityMatch2ORM/std": 0.18573088943958282, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 254, "train_speed(iter/s)": 0.026808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 396.40625, "completions/min_length": 324.0, "epoch": 0.03055355859094177, "grad_norm": 1.0698795766443374, "kl": 0.22325272113084793, "learning_rate": 9.98943404938512e-07, "loss": 0.00022379185247700661, "memory(GiB)": 165.76, "reward": 2.6176092624664307, "reward_std": 0.15785762667655945, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6342757940292358, "rewards/GeoVisalEntityMatch2ORM/std": 0.16662099957466125, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 255, "train_speed(iter/s)": 0.026846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 394.13543701171875, "completions/min_length": 319.0, "epoch": 0.030673376467768992, "grad_norm": 1.0873481095448263, "kl": 0.24381227046251297, "learning_rate": 9.989310153857094e-07, "loss": 0.000245342671405524, "memory(GiB)": 165.76, "reward": 2.338773250579834, "reward_std": 0.22013625502586365, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5387731790542603, "rewards/GeoVisalEntityMatch2ORM/std": 0.20818841457366943, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 256, "train_speed(iter/s)": 0.026872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 392.85418701171875, "completions/min_length": 302.0, "epoch": 0.030793194344596212, "grad_norm": 1.0655982130897632, "kl": 0.22481811046600342, "learning_rate": 9.989185536939493e-07, "loss": 0.00022628407168667763, "memory(GiB)": 165.76, "reward": 2.3689236640930176, "reward_std": 0.11940096318721771, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6189236640930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.16120198369026184, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 257, "train_speed(iter/s)": 0.026889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 390.8645935058594, "completions/min_length": 308.0, "epoch": 0.030913012221423435, "grad_norm": 1.052388680801606, "kl": 0.23374280333518982, "learning_rate": 9.989060198650337e-07, "loss": 0.00023445984697900712, "memory(GiB)": 165.76, "reward": 2.5642364025115967, "reward_std": 0.17287680506706238, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5809028148651123, "rewards/GeoVisalEntityMatch2ORM/std": 0.21521639823913574, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 258, "train_speed(iter/s)": 0.026914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 371.54168701171875, "completions/min_length": 274.0, "epoch": 0.03103283009825066, "grad_norm": 0.9592966441573727, "kl": 0.21832040697336197, "learning_rate": 9.988934139007747e-07, "loss": 0.00021985173225402832, "memory(GiB)": 165.76, "reward": 2.6277778148651123, "reward_std": 0.09732135385274887, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6361111402511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.29910847544670105, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 259, "train_speed(iter/s)": 0.026938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 388.97918701171875, "completions/min_length": 323.0, "epoch": 0.03115264797507788, "grad_norm": 1.1274900092248261, "kl": 0.22747967392206192, "learning_rate": 9.988807358029953e-07, "loss": 0.00022779902792535722, "memory(GiB)": 165.76, "reward": 2.4214162826538086, "reward_std": 0.14915357530117035, "rewards/GeoLocAccuracyV2ORM/mean": 0.762499988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.42581191658973694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6589162349700928, "rewards/GeoVisalEntityMatch2ORM/std": 0.20170369744300842, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 260, "train_speed(iter/s)": 0.026965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 414.71875, "completions/min_length": 350.0, "epoch": 0.0312724658519051, "grad_norm": 1.035721563353762, "kl": 0.24848555773496628, "learning_rate": 9.988679855735282e-07, "loss": 0.000249519944190979, "memory(GiB)": 165.76, "reward": 2.3152198791503906, "reward_std": 0.2660120129585266, "rewards/GeoLocAccuracyV2ORM/mean": 0.7479166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.37640380859375, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5777199268341064, "rewards/GeoVisalEntityMatch2ORM/std": 0.1143006905913353, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 261, "train_speed(iter/s)": 0.02699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 408.2708435058594, "completions/min_length": 348.0, "epoch": 0.031392283728732324, "grad_norm": 1.135150278682334, "kl": 0.2480812445282936, "learning_rate": 9.988551632142171e-07, "loss": 0.0002496515808161348, "memory(GiB)": 165.76, "reward": 2.278935194015503, "reward_std": 0.3087347745895386, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3760365843772888, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4872685372829437, "rewards/GeoVisalEntityMatch2ORM/std": 0.11270183324813843, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 262, "train_speed(iter/s)": 0.027012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/mean_length": 396.25, "completions/min_length": 312.0, "epoch": 0.03151210160555955, "grad_norm": 1.1471281162665719, "kl": 0.2284272238612175, "learning_rate": 9.988422687269157e-07, "loss": 0.00022926430392544717, "memory(GiB)": 165.76, "reward": 2.426666021347046, "reward_std": 0.13201354444026947, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43499916791915894, "rewards/GeoVisalEntityMatch2ORM/std": 0.2688619792461395, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 263, "train_speed(iter/s)": 0.027046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 421.8333435058594, "completions/min_length": 361.0, "epoch": 0.03163191948238677, "grad_norm": 1.044162864600662, "kl": 0.21195950359106064, "learning_rate": 9.988293021134885e-07, "loss": 0.00021272897720336914, "memory(GiB)": 165.76, "reward": 2.2882938385009766, "reward_std": 0.23887531459331512, "rewards/GeoLocAccuracyV2ORM/mean": 0.7875000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4050341248512268, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5007936954498291, "rewards/GeoVisalEntityMatch2ORM/std": 0.24442967772483826, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 264, "train_speed(iter/s)": 0.027072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 394.0, "completions/min_length": 319.0, "epoch": 0.031751737359213994, "grad_norm": 1.0666707294406437, "kl": 0.2476639449596405, "learning_rate": 9.988162633758106e-07, "loss": 0.0002476423978805542, "memory(GiB)": 165.76, "reward": 2.269345283508301, "reward_std": 0.09497473388910294, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.519345223903656, "rewards/GeoVisalEntityMatch2ORM/std": 0.23968610167503357, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 265, "train_speed(iter/s)": 0.027097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 511.0, "completions/mean_length": 430.1875, "completions/min_length": 366.0, "epoch": 0.03187155523604122, "grad_norm": 1.0598286488510693, "kl": 0.3281877115368843, "learning_rate": 9.988031525157671e-07, "loss": 0.00032197684049606323, "memory(GiB)": 165.76, "reward": 2.2712674140930176, "reward_std": 0.2785147726535797, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.39054054021835327, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5629340410232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.15943463146686554, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 266, "train_speed(iter/s)": 0.027122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 383.54168701171875, "completions/min_length": 271.0, "epoch": 0.03199137311286844, "grad_norm": 0.9517651512077561, "kl": 0.24831130355596542, "learning_rate": 9.987899695352535e-07, "loss": 0.00024888169718906283, "memory(GiB)": 165.76, "reward": 2.613157272338867, "reward_std": 0.06434165686368942, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6131572723388672, "rewards/GeoVisalEntityMatch2ORM/std": 0.255317747592926, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 267, "train_speed(iter/s)": 0.027146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 422.47918701171875, "completions/min_length": 340.0, "epoch": 0.032111190989695663, "grad_norm": 1.068120574124668, "kl": 0.24946386367082596, "learning_rate": 9.98776714436176e-07, "loss": 0.00025012841797433794, "memory(GiB)": 165.76, "reward": 2.354269504547119, "reward_std": 0.2767603099346161, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.29829925298690796, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4709359109401703, "rewards/GeoVisalEntityMatch2ORM/std": 0.20302149653434753, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 268, "train_speed(iter/s)": 0.02717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 404.8645935058594, "completions/min_length": 311.0, "epoch": 0.03223100886652289, "grad_norm": 1.1192960936073535, "kl": 0.26045122742652893, "learning_rate": 9.987633872204506e-07, "loss": 0.00026110809994861484, "memory(GiB)": 165.76, "reward": 2.551938772201538, "reward_std": 0.1182781383395195, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5519386529922485, "rewards/GeoVisalEntityMatch2ORM/std": 0.17497847974300385, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 269, "train_speed(iter/s)": 0.027193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 420.44793701171875, "completions/min_length": 315.0, "epoch": 0.03235082674335011, "grad_norm": 1.1128205420766009, "kl": 0.23304319381713867, "learning_rate": 9.987499878900053e-07, "loss": 0.0002347007393836975, "memory(GiB)": 165.76, "reward": 2.4039220809936523, "reward_std": 0.17487964034080505, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.1786891669034958, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44558870792388916, "rewards/GeoVisalEntityMatch2ORM/std": 0.13868758082389832, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 270, "train_speed(iter/s)": 0.027218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 435.6875, "completions/min_length": 345.0, "epoch": 0.03247064462017733, "grad_norm": 2.09543862715984, "kl": 3.047751724720001, "learning_rate": 9.987365164467766e-07, "loss": 0.0027391042094677687, "memory(GiB)": 165.76, "reward": 2.564699172973633, "reward_std": 0.32545727491378784, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087526738643646, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6376157999038696, "rewards/GeoVisalEntityMatch2ORM/std": 0.22263334691524506, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 271, "train_speed(iter/s)": 0.02724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 506.0, "completions/mean_length": 426.8333435058594, "completions/min_length": 331.0, "epoch": 0.032590462497004556, "grad_norm": 0.888055087501724, "kl": 0.3194248303771019, "learning_rate": 9.987229728927126e-07, "loss": 0.00031129768467508256, "memory(GiB)": 165.76, "reward": 2.670734167098999, "reward_std": 0.3135888874530792, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7540674209594727, "rewards/GeoVisalEntityMatch2ORM/std": 0.22282777726650238, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 272, "train_speed(iter/s)": 0.027262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/mean_length": 414.78125, "completions/min_length": 323.0, "epoch": 0.03271028037383177, "grad_norm": 1.0947416318350156, "kl": 0.23074647784233093, "learning_rate": 9.987093572297715e-07, "loss": 0.00023171368229668587, "memory(GiB)": 165.76, "reward": 2.456423759460449, "reward_std": 0.18964387476444244, "rewards/GeoLocAccuracyV2ORM/mean": 0.8500000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.3138890266418457, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6064236164093018, "rewards/GeoVisalEntityMatch2ORM/std": 0.22223438322544098, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 273, "train_speed(iter/s)": 0.027287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 411.1145935058594, "completions/min_length": 318.0, "epoch": 0.032830098250658996, "grad_norm": 1.09488423494258, "kl": 0.26946160197257996, "learning_rate": 9.98695669459922e-07, "loss": 0.00027118128491565585, "memory(GiB)": 165.76, "reward": 2.1085071563720703, "reward_std": 0.08615725487470627, "rewards/GeoLocAccuracyV2ORM/mean": 0.5, "rewards/GeoLocAccuracyV2ORM/std": 0.5026246905326843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6085069179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.11820059269666672, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 274, "train_speed(iter/s)": 0.027308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 401.0833435058594, "completions/min_length": 278.0, "epoch": 0.03294991612748622, "grad_norm": 1.0673167796836418, "kl": 0.24168487638235092, "learning_rate": 9.98681909585143e-07, "loss": 0.0002432850596960634, "memory(GiB)": 165.76, "reward": 2.488194465637207, "reward_std": 0.1904830038547516, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5090278387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.2881775498390198, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 275, "train_speed(iter/s)": 0.027316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 493.0, "completions/mean_length": 398.2708435058594, "completions/min_length": 357.0, "epoch": 0.03306973400431344, "grad_norm": 1.1746076302630015, "kl": 0.41237524151802063, "learning_rate": 9.986680776074243e-07, "loss": 0.00041337808943353593, "memory(GiB)": 165.76, "reward": 1.2052083015441895, "reward_std": 0.19103005528450012, "rewards/GeoLocAccuracyV2ORM/mean": 0.20000001788139343, "rewards/GeoLocAccuracyV2ORM/std": 0.3178877532482147, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5052083730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.12772899866104126, "rewards/MathFormat/mean": 0.5, "rewards/MathFormat/std": 0.5026246905326843, "step": 276, "train_speed(iter/s)": 0.027307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 483.0, "completions/mean_length": 417.3020935058594, "completions/min_length": 338.0, "epoch": 0.033189551881140665, "grad_norm": 0.9825729699023954, "kl": 0.3021637871861458, "learning_rate": 9.986541735287658e-07, "loss": 0.0002999169228132814, "memory(GiB)": 165.76, "reward": 2.1647322177886963, "reward_std": 0.3341423273086548, "rewards/GeoLocAccuracyV2ORM/mean": 0.6791667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.4098566174507141, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5376487970352173, "rewards/GeoVisalEntityMatch2ORM/std": 0.21314038336277008, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 277, "train_speed(iter/s)": 0.027325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 470.0, "completions/mean_length": 386.3333435058594, "completions/min_length": 298.0, "epoch": 0.03330936975796789, "grad_norm": 1.1362853574938128, "kl": 0.2403474524617195, "learning_rate": 9.986401973511774e-07, "loss": 0.00023924186825752258, "memory(GiB)": 165.76, "reward": 2.5457465648651123, "reward_std": 0.22750981152057648, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.30503809452056885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6915798187255859, "rewards/GeoVisalEntityMatch2ORM/std": 0.18495096266269684, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 278, "train_speed(iter/s)": 0.027327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/mean_length": 431.0833435058594, "completions/min_length": 346.0, "epoch": 0.03342918763479511, "grad_norm": 0.9056504607900959, "kl": 0.2540146932005882, "learning_rate": 9.986261490766801e-07, "loss": 0.000254608690738678, "memory(GiB)": 165.76, "reward": 2.502232313156128, "reward_std": 0.09424926340579987, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7522321939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.20425063371658325, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 279, "train_speed(iter/s)": 0.02736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 549.0, "completions/mean_length": 436.82293701171875, "completions/min_length": 352.0, "epoch": 0.033549005511622335, "grad_norm": 1.0544149994724676, "kl": 0.2391585186123848, "learning_rate": 9.986120287073054e-07, "loss": 0.000240325927734375, "memory(GiB)": 165.76, "reward": 2.4357142448425293, "reward_std": 0.12818455696105957, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.35702845454216003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6461309790611267, "rewards/GeoVisalEntityMatch2ORM/std": 0.16345135867595673, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 280, "train_speed(iter/s)": 0.027382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 529.0, "completions/mean_length": 457.4270935058594, "completions/min_length": 376.0, "epoch": 0.03366882338844956, "grad_norm": 1.0363189949286247, "kl": 0.3033497557044029, "learning_rate": 9.985978362450945e-07, "loss": 0.0003028388018719852, "memory(GiB)": 165.76, "reward": 2.3149638175964355, "reward_std": 0.38647550344467163, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4816303253173828, "rewards/GeoVisalEntityMatch2ORM/std": 0.21843652427196503, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 281, "train_speed(iter/s)": 0.027401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 524.0, "completions/mean_length": 402.79168701171875, "completions/min_length": 362.0, "epoch": 0.03378864126527678, "grad_norm": 1.1282314745224358, "kl": 0.8298223465681076, "learning_rate": 9.985835716921e-07, "loss": 0.0008309986442327499, "memory(GiB)": 165.76, "reward": 1.5930554866790771, "reward_std": 0.08926891535520554, "rewards/GeoLocAccuracyV2ORM/mean": 0.5, "rewards/GeoLocAccuracyV2ORM/std": 0.5026246905326843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5930556058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.20807762444019318, "rewards/MathFormat/mean": 0.5, "rewards/MathFormat/std": 0.5026246905326843, "step": 282, "train_speed(iter/s)": 0.027392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 506.0, "completions/mean_length": 445.65625, "completions/min_length": 352.0, "epoch": 0.033908459142104004, "grad_norm": 0.9539578546050123, "kl": 0.29213449358940125, "learning_rate": 9.985692350503836e-07, "loss": 0.00029056271887384355, "memory(GiB)": 165.76, "reward": 2.4544272422790527, "reward_std": 0.4031217396259308, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5377604365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.2652718722820282, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 283, "train_speed(iter/s)": 0.027412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 436.625, "completions/min_length": 324.0, "epoch": 0.03402827701893123, "grad_norm": 1.1388363138586182, "kl": 0.286954902112484, "learning_rate": 9.985548263220187e-07, "loss": 0.00028255581855773926, "memory(GiB)": 165.76, "reward": 2.5467305183410645, "reward_std": 0.24663332104682922, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336147725582123, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6092303991317749, "rewards/GeoVisalEntityMatch2ORM/std": 0.1801263391971588, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 284, "train_speed(iter/s)": 0.027434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 425.15625, "completions/min_length": 337.0, "epoch": 0.03414809489575845, "grad_norm": 1.1624215964992863, "kl": 0.2524554133415222, "learning_rate": 9.985403455090886e-07, "loss": 0.00025214007473550737, "memory(GiB)": 165.76, "reward": 2.3637733459472656, "reward_std": 0.23294593393802643, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895832896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.28115519881248474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4846065044403076, "rewards/GeoVisalEntityMatch2ORM/std": 0.167499840259552, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 285, "train_speed(iter/s)": 0.027456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 552.0, "completions/mean_length": 448.38543701171875, "completions/min_length": 340.0, "epoch": 0.03426791277258567, "grad_norm": 1.0477891077814767, "kl": 0.3114538788795471, "learning_rate": 9.98525792613687e-07, "loss": 0.00030640390468761325, "memory(GiB)": 165.76, "reward": 2.3966150283813477, "reward_std": 0.1817013919353485, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4174482822418213, "rewards/GeoVisalEntityMatch2ORM/std": 0.12193255126476288, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 286, "train_speed(iter/s)": 0.027478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 491.0, "completions/mean_length": 402.84375, "completions/min_length": 322.0, "epoch": 0.03438773064941289, "grad_norm": 1.0909314595067134, "kl": 0.3844968378543854, "learning_rate": 9.985111676379179e-07, "loss": 0.00037712976336479187, "memory(GiB)": 165.76, "reward": 2.5608136653900146, "reward_std": 0.4149749279022217, "rewards/GeoLocAccuracyV2ORM/mean": 0.8520833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3473142683506012, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8128969073295593, "rewards/GeoVisalEntityMatch2ORM/std": 0.20675766468048096, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 287, "train_speed(iter/s)": 0.027478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 425.8020935058594, "completions/min_length": 353.0, "epoch": 0.03450754852624011, "grad_norm": 1.0884528428693252, "kl": 0.3359697461128235, "learning_rate": 9.98496470583896e-07, "loss": 0.0003252103924751282, "memory(GiB)": 165.76, "reward": 2.477083444595337, "reward_std": 0.21323707699775696, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5062500238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.24966678023338318, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 288, "train_speed(iter/s)": 0.027499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 431.57293701171875, "completions/min_length": 330.0, "epoch": 0.034627366403067336, "grad_norm": 1.0974581168350366, "kl": 0.3524091839790344, "learning_rate": 9.984817014537463e-07, "loss": 0.00034443041658960283, "memory(GiB)": 165.76, "reward": 2.417534828186035, "reward_std": 0.35965442657470703, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904594898224, "rewards/GeoVisalEntityMatch2ORM/mean": 0.584201455116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.14975757896900177, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336146235466003, "step": 289, "train_speed(iter/s)": 0.027519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 420.10418701171875, "completions/min_length": 328.0, "epoch": 0.03474718427989456, "grad_norm": 1.0605772368071815, "kl": 0.2528531476855278, "learning_rate": 9.984668602496042e-07, "loss": 0.0002534240484237671, "memory(GiB)": 165.76, "reward": 2.446713924407959, "reward_std": 0.2518181800842285, "rewards/GeoLocAccuracyV2ORM/mean": 0.8145833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.3545134961605072, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6529638171195984, "rewards/GeoVisalEntityMatch2ORM/std": 0.1809987872838974, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 290, "train_speed(iter/s)": 0.02754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 479.0, "completions/mean_length": 405.54168701171875, "completions/min_length": 305.0, "epoch": 0.03486700215672178, "grad_norm": 1.141077281214626, "kl": 0.25696705281734467, "learning_rate": 9.984519469736157e-07, "loss": 0.00025872886180877686, "memory(GiB)": 165.76, "reward": 2.3774893283843994, "reward_std": 0.18940618634223938, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.406655877828598, "rewards/GeoVisalEntityMatch2ORM/std": 0.13298004865646362, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 291, "train_speed(iter/s)": 0.027541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 455.0, "completions/mean_length": 396.15625, "completions/min_length": 303.0, "epoch": 0.034986820033549006, "grad_norm": 1.1419768311958722, "kl": 0.7146969437599182, "learning_rate": 9.984369616279368e-07, "loss": 0.0006952633848413825, "memory(GiB)": 165.76, "reward": 2.376297950744629, "reward_std": 0.3759106695652008, "rewards/GeoLocAccuracyV2ORM/mean": 0.856249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.32535406947135925, "rewards/GeoVisalEntityMatch2ORM/mean": 0.572131335735321, "rewards/GeoVisalEntityMatch2ORM/std": 0.17180582880973816, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 292, "train_speed(iter/s)": 0.027541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 479.0, "completions/mean_length": 395.09375, "completions/min_length": 322.0, "epoch": 0.03510663791037623, "grad_norm": 1.1738663118029742, "kl": 0.6734276413917542, "learning_rate": 9.984219042147344e-07, "loss": 0.0006584003567695618, "memory(GiB)": 165.76, "reward": 2.383556604385376, "reward_std": 0.3423548638820648, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904594898224, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6127232313156128, "rewards/GeoVisalEntityMatch2ORM/std": 0.15322597324848175, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 293, "train_speed(iter/s)": 0.027542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 493.0, "completions/mean_length": 377.54168701171875, "completions/min_length": 313.0, "epoch": 0.03522645578720345, "grad_norm": 1.0932053994576298, "kl": 0.28238798677921295, "learning_rate": 9.984067747361856e-07, "loss": 0.00028071305132471025, "memory(GiB)": 165.76, "reward": 1.8649802207946777, "reward_std": 0.501407265663147, "rewards/GeoLocAccuracyV2ORM/mean": 0.5041667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.4992450475692749, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5795634984970093, "rewards/GeoVisalEntityMatch2ORM/std": 0.16132569313049316, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 294, "train_speed(iter/s)": 0.02753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 365.13543701171875, "completions/min_length": 280.0, "epoch": 0.035346273664030675, "grad_norm": 1.132038124608518, "kl": 0.24154765158891678, "learning_rate": 9.983915731944778e-07, "loss": 0.00024413566279690713, "memory(GiB)": 165.76, "reward": 2.294593334197998, "reward_std": 0.22095322608947754, "rewards/GeoLocAccuracyV2ORM/mean": 0.71875, "rewards/GeoLocAccuracyV2ORM/std": 0.44255539774894714, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5758432745933533, "rewards/GeoVisalEntityMatch2ORM/std": 0.26342642307281494, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 295, "train_speed(iter/s)": 0.027548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 354.53125, "completions/min_length": 306.0, "epoch": 0.0354660915408579, "grad_norm": 1.139978599148714, "kl": 0.2564193457365036, "learning_rate": 9.98376299591809e-07, "loss": 0.000257343053817749, "memory(GiB)": 165.76, "reward": 2.1860532760620117, "reward_std": 0.24194006621837616, "rewards/GeoLocAccuracyV2ORM/mean": 0.6270833611488342, "rewards/GeoLocAccuracyV2ORM/std": 0.4575631618499756, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5589699149131775, "rewards/GeoVisalEntityMatch2ORM/std": 0.17406880855560303, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 296, "train_speed(iter/s)": 0.027568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.0, "completions/mean_length": 347.01043701171875, "completions/min_length": 281.0, "epoch": 0.03558590941768512, "grad_norm": 1.1769912978167167, "kl": 0.2916223406791687, "learning_rate": 9.983609539303875e-07, "loss": 0.00029178461409173906, "memory(GiB)": 165.76, "reward": 2.525463104248047, "reward_std": 0.1044924259185791, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5254629850387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.17896471917629242, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 297, "train_speed(iter/s)": 0.02759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 391.0, "completions/mean_length": 335.53125, "completions/min_length": 279.0, "epoch": 0.035705727294512345, "grad_norm": 1.1052308211922373, "kl": 0.3289885073900223, "learning_rate": 9.98345536212432e-07, "loss": 0.00032751759863458574, "memory(GiB)": 165.76, "reward": 2.4187026023864746, "reward_std": 0.3393996059894562, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3096616566181183, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5916193127632141, "rewards/GeoVisalEntityMatch2ORM/std": 0.12130045145750046, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 298, "train_speed(iter/s)": 0.027581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/mean_length": 316.01043701171875, "completions/min_length": 262.0, "epoch": 0.03582554517133956, "grad_norm": 1.228989292484358, "kl": 0.253319188952446, "learning_rate": 9.98330046440172e-07, "loss": 0.0002541790599934757, "memory(GiB)": 165.76, "reward": 2.5405094623565674, "reward_std": 0.1236037015914917, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5405093431472778, "rewards/GeoVisalEntityMatch2ORM/std": 0.18417610228061676, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 299, "train_speed(iter/s)": 0.027603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 324.47918701171875, "completions/min_length": 237.0, "epoch": 0.035945363048166784, "grad_norm": 1.1864501557297682, "kl": 0.25989431142807007, "learning_rate": 9.98314484615847e-07, "loss": 0.00026081502437591553, "memory(GiB)": 165.76, "reward": 2.3607144355773926, "reward_std": 0.19642163813114166, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824830532073975, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5690476298332214, "rewards/GeoVisalEntityMatch2ORM/std": 0.18801289796829224, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 300, "train_speed(iter/s)": 0.027625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 313.8125, "completions/min_length": 206.0, "epoch": 0.03606518092499401, "grad_norm": 1.0073757244433086, "kl": 0.21860371530056, "learning_rate": 9.982988507417073e-07, "loss": 0.00022035340953152627, "memory(GiB)": 165.76, "reward": 2.690699338912964, "reward_std": 0.11722365021705627, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6990327835083008, "rewards/GeoVisalEntityMatch2ORM/std": 0.22096391022205353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 301, "train_speed(iter/s)": 0.027647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/mean_length": 299.66668701171875, "completions/min_length": 236.0, "epoch": 0.03618499880182123, "grad_norm": 1.1578335988426443, "kl": 0.23761006444692612, "learning_rate": 9.982831448200127e-07, "loss": 0.00023834407329559326, "memory(GiB)": 165.76, "reward": 2.5859375, "reward_std": 0.20644420385360718, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6276041865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.23922789096832275, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 302, "train_speed(iter/s)": 0.02765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 388.0, "completions/mean_length": 320.0208435058594, "completions/min_length": 256.0, "epoch": 0.036304816678648454, "grad_norm": 1.331239250884179, "kl": 0.28407812118530273, "learning_rate": 9.982673668530346e-07, "loss": 0.0002843216061592102, "memory(GiB)": 165.76, "reward": 2.2142913341522217, "reward_std": 0.25755012035369873, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.42250296473503113, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44345802068710327, "rewards/GeoVisalEntityMatch2ORM/std": 0.2261480689048767, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 303, "train_speed(iter/s)": 0.027672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 294.40625, "completions/min_length": 234.0, "epoch": 0.03642463455547568, "grad_norm": 1.1917448478733845, "kl": 0.22609585523605347, "learning_rate": 9.98251516843054e-07, "loss": 0.00022676712251268327, "memory(GiB)": 165.76, "reward": 2.690972089767456, "reward_std": 0.14483588933944702, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6909722089767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.19474704563617706, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 304, "train_speed(iter/s)": 0.027696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/mean_length": 286.2395935058594, "completions/min_length": 222.0, "epoch": 0.0365444524323029, "grad_norm": 1.3366358244937673, "kl": 0.2600920796394348, "learning_rate": 9.982355947923629e-07, "loss": 0.0002608050999697298, "memory(GiB)": 165.76, "reward": 2.4869792461395264, "reward_std": 0.11643882840871811, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4869791865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.17452026903629303, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 305, "train_speed(iter/s)": 0.02772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.0, "completions/mean_length": 285.0208435058594, "completions/min_length": 219.0, "epoch": 0.03666427030913012, "grad_norm": 1.1611245045101892, "kl": 0.2657855302095413, "learning_rate": 9.98219600703263e-07, "loss": 0.0002654145355336368, "memory(GiB)": 165.76, "reward": 2.518890380859375, "reward_std": 0.10168029367923737, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5188902616500854, "rewards/GeoVisalEntityMatch2ORM/std": 0.14068308472633362, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 306, "train_speed(iter/s)": 0.027743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/mean_length": 288.84375, "completions/min_length": 225.0, "epoch": 0.036784088185957346, "grad_norm": 1.2088225240594608, "kl": 0.2798045128583908, "learning_rate": 9.982035345780673e-07, "loss": 0.00028187534189783037, "memory(GiB)": 165.76, "reward": 2.3045244216918945, "reward_std": 0.117306187748909, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3646675944328308, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5128577947616577, "rewards/GeoVisalEntityMatch2ORM/std": 0.1417093724012375, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 307, "train_speed(iter/s)": 0.027767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 352.0, "completions/mean_length": 278.8125, "completions/min_length": 213.0, "epoch": 0.03690390606278457, "grad_norm": 1.34164417911193, "kl": 0.24267318844795227, "learning_rate": 9.981873964190986e-07, "loss": 0.00024340301752090454, "memory(GiB)": 165.76, "reward": 2.278067111968994, "reward_std": 0.2940681576728821, "rewards/GeoLocAccuracyV2ORM/mean": 0.7666666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4090210795402527, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5114004611968994, "rewards/GeoVisalEntityMatch2ORM/std": 0.1181817352771759, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 308, "train_speed(iter/s)": 0.027769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/mean_length": 281.71875, "completions/min_length": 194.0, "epoch": 0.03702372393961179, "grad_norm": 1.364038168640657, "kl": 0.2644345313310623, "learning_rate": 9.9817118622869e-07, "loss": 0.0002643335610628128, "memory(GiB)": 165.76, "reward": 2.6115450859069824, "reward_std": 0.07575622946023941, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.611545205116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.22036442160606384, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 309, "train_speed(iter/s)": 0.02777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 276.7395935058594, "completions/min_length": 177.0, "epoch": 0.037143541816439016, "grad_norm": 1.2069024652610962, "kl": 0.26773011684417725, "learning_rate": 9.981549040091856e-07, "loss": 0.0002688567037694156, "memory(GiB)": 165.76, "reward": 2.4415509700775146, "reward_std": 0.12900526821613312, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44155094027519226, "rewards/GeoVisalEntityMatch2ORM/std": 0.20384061336517334, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 310, "train_speed(iter/s)": 0.027793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.0, "completions/mean_length": 261.2708435058594, "completions/min_length": 199.0, "epoch": 0.03726335969326623, "grad_norm": 1.3089815371258167, "kl": 0.25823119282722473, "learning_rate": 9.981385497629395e-07, "loss": 0.0002600650186650455, "memory(GiB)": 165.76, "reward": 2.5351357460021973, "reward_std": 0.12748056650161743, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5351356267929077, "rewards/GeoVisalEntityMatch2ORM/std": 0.3073596656322479, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 311, "train_speed(iter/s)": 0.02783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.0, "completions/mean_length": 260.3020935058594, "completions/min_length": 184.0, "epoch": 0.037383177570093455, "grad_norm": 1.364625363541646, "kl": 0.2589406669139862, "learning_rate": 9.981221234923162e-07, "loss": 0.00026035806513391435, "memory(GiB)": 165.76, "reward": 2.480335235595703, "reward_std": 0.17444878816604614, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5220019221305847, "rewards/GeoVisalEntityMatch2ORM/std": 0.2068958878517151, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 312, "train_speed(iter/s)": 0.027824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 259.96875, "completions/min_length": 171.0, "epoch": 0.03750299544692068, "grad_norm": 1.2251874152312172, "kl": 0.2460002899169922, "learning_rate": 9.981056251996912e-07, "loss": 0.0002470935578458011, "memory(GiB)": 165.76, "reward": 2.4614133834838867, "reward_std": 0.16869065165519714, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4947466254234314, "rewards/GeoVisalEntityMatch2ORM/std": 0.2066034972667694, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 313, "train_speed(iter/s)": 0.027846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 252.0104217529297, "completions/min_length": 167.0, "epoch": 0.0376228133237479, "grad_norm": 1.356480868719502, "kl": 0.2922093868255615, "learning_rate": 9.980890548874493e-07, "loss": 0.0002932797069661319, "memory(GiB)": 165.76, "reward": 2.314798355102539, "reward_std": 0.1639155149459839, "rewards/GeoLocAccuracyV2ORM/mean": 0.9125000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.2592499256134033, "rewards/GeoVisalEntityMatch2ORM/mean": 0.40229830145835876, "rewards/GeoVisalEntityMatch2ORM/std": 0.13126349449157715, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 314, "train_speed(iter/s)": 0.027869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.0, "completions/mean_length": 262.21875, "completions/min_length": 180.0, "epoch": 0.037742631200575125, "grad_norm": 1.4183930014009842, "kl": 0.254508838057518, "learning_rate": 9.980724125579867e-07, "loss": 0.00025525689125061035, "memory(GiB)": 165.76, "reward": 2.450496196746826, "reward_std": 0.25658443570137024, "rewards/GeoLocAccuracyV2ORM/mean": 0.8145833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.3424306809902191, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6359126567840576, "rewards/GeoVisalEntityMatch2ORM/std": 0.20262499153614044, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 315, "train_speed(iter/s)": 0.027892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/mean_length": 266.6875, "completions/min_length": 210.0, "epoch": 0.03786244907740235, "grad_norm": 1.284687885885107, "kl": 0.28077730536460876, "learning_rate": 9.980556982137094e-07, "loss": 0.0002821236848831177, "memory(GiB)": 165.76, "reward": 2.5821969509124756, "reward_std": 0.10137473046779633, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5821969509124756, "rewards/GeoVisalEntityMatch2ORM/std": 0.2431807518005371, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 316, "train_speed(iter/s)": 0.02788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 253.25, "completions/min_length": 186.0, "epoch": 0.03798226695422957, "grad_norm": 1.2938460767576587, "kl": 0.2833740711212158, "learning_rate": 9.980389118570345e-07, "loss": 0.0002847065625246614, "memory(GiB)": 165.76, "reward": 2.457305431365967, "reward_std": 0.1880369931459427, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4781385362148285, "rewards/GeoVisalEntityMatch2ORM/std": 0.1837504655122757, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 317, "train_speed(iter/s)": 0.027874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.0, "completions/mean_length": 267.3958435058594, "completions/min_length": 209.0, "epoch": 0.038102084831056794, "grad_norm": 1.3387812642457937, "kl": 0.3073182553052902, "learning_rate": 9.980220534903887e-07, "loss": 0.0003073873813264072, "memory(GiB)": 165.76, "reward": 2.6905035972595215, "reward_std": 0.12392326444387436, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6905032992362976, "rewards/GeoVisalEntityMatch2ORM/std": 0.15239068865776062, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 318, "train_speed(iter/s)": 0.027868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.0, "completions/mean_length": 271.59375, "completions/min_length": 189.0, "epoch": 0.03822190270788402, "grad_norm": 1.277680135696697, "kl": 0.2737151086330414, "learning_rate": 9.980051231162098e-07, "loss": 0.0002734959125518799, "memory(GiB)": 165.76, "reward": 2.5604662895202637, "reward_std": 0.13572755455970764, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5604662895202637, "rewards/GeoVisalEntityMatch2ORM/std": 0.21251468360424042, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 319, "train_speed(iter/s)": 0.027892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/mean_length": 283.125, "completions/min_length": 203.0, "epoch": 0.03834172058471124, "grad_norm": 1.313623285114828, "kl": 0.25886067748069763, "learning_rate": 9.979881207369455e-07, "loss": 0.00025964280939660966, "memory(GiB)": 165.76, "reward": 2.2616899013519287, "reward_std": 0.20077785849571228, "rewards/GeoLocAccuracyV2ORM/mean": 0.7750000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.36157551407814026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48668986558914185, "rewards/GeoVisalEntityMatch2ORM/std": 0.18474024534225464, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 320, "train_speed(iter/s)": 0.027917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 289.3958435058594, "completions/min_length": 228.0, "epoch": 0.038461538461538464, "grad_norm": 1.2289097870061771, "kl": 0.283940389752388, "learning_rate": 9.979710463550542e-07, "loss": 0.0002846618590410799, "memory(GiB)": 165.76, "reward": 2.4758970737457275, "reward_std": 0.24698545038700104, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.31344157457351685, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6092303991317749, "rewards/GeoVisalEntityMatch2ORM/std": 0.17423997819423676, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 321, "train_speed(iter/s)": 0.027951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 303.8645935058594, "completions/min_length": 220.0, "epoch": 0.03858135633836569, "grad_norm": 1.3153878046310878, "kl": 0.2506047412753105, "learning_rate": 9.979538999730047e-07, "loss": 0.0002509678597562015, "memory(GiB)": 165.76, "reward": 2.426785945892334, "reward_std": 0.2051454782485962, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4851190745830536, "rewards/GeoVisalEntityMatch2ORM/std": 0.20880630612373352, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 322, "train_speed(iter/s)": 0.027973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/mean_length": 295.22918701171875, "completions/min_length": 223.0, "epoch": 0.03870117421519291, "grad_norm": 1.309620332117216, "kl": 0.3041731119155884, "learning_rate": 9.979366815932758e-07, "loss": 0.0003057718276977539, "memory(GiB)": 165.76, "reward": 2.454960346221924, "reward_std": 0.21218204498291016, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5049603581428528, "rewards/GeoVisalEntityMatch2ORM/std": 0.253409206867218, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 323, "train_speed(iter/s)": 0.027996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/mean_length": 299.15625, "completions/min_length": 251.0, "epoch": 0.038820992092020126, "grad_norm": 1.1631297887466154, "kl": 0.28156527876853943, "learning_rate": 9.979193912183576e-07, "loss": 0.00028209388256073, "memory(GiB)": 165.76, "reward": 2.5103135108947754, "reward_std": 0.10815756767988205, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5103133916854858, "rewards/GeoVisalEntityMatch2ORM/std": 0.22878549993038177, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 324, "train_speed(iter/s)": 0.028018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/mean_length": 288.6458435058594, "completions/min_length": 229.0, "epoch": 0.03894080996884735, "grad_norm": 1.29859466619355, "kl": 0.30719538033008575, "learning_rate": 9.979020288507499e-07, "loss": 0.0003080318565480411, "memory(GiB)": 165.8, "reward": 2.740219831466675, "reward_std": 0.12266532331705093, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7402199506759644, "rewards/GeoVisalEntityMatch2ORM/std": 0.21697120368480682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 325, "train_speed(iter/s)": 0.02801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.0, "completions/mean_length": 300.8958435058594, "completions/min_length": 219.0, "epoch": 0.03906062784567457, "grad_norm": 1.0107580522746111, "kl": 0.28278711438179016, "learning_rate": 9.978845944929628e-07, "loss": 0.00028311461210250854, "memory(GiB)": 165.8, "reward": 2.504613161087036, "reward_std": 0.08242689073085785, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7546131014823914, "rewards/GeoVisalEntityMatch2ORM/std": 0.18758831918239594, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 326, "train_speed(iter/s)": 0.028001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/mean_length": 330.2083435058594, "completions/min_length": 258.0, "epoch": 0.039180445722501796, "grad_norm": 1.1559935401092192, "kl": 0.28868967294692993, "learning_rate": 9.978670881475172e-07, "loss": 0.0002901516854763031, "memory(GiB)": 165.8, "reward": 2.601799488067627, "reward_std": 0.10173654556274414, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6017992496490479, "rewards/GeoVisalEntityMatch2ORM/std": 0.13146299123764038, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 327, "train_speed(iter/s)": 0.028019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/mean_length": 304.03125, "completions/min_length": 211.0, "epoch": 0.03930026359932902, "grad_norm": 1.0391250404315107, "kl": 0.3113795667886734, "learning_rate": 9.978495098169443e-07, "loss": 0.00031139267957769334, "memory(GiB)": 165.8, "reward": 2.366480588912964, "reward_std": 0.20358414947986603, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.37910327315330505, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6331473588943481, "rewards/GeoVisalEntityMatch2ORM/std": 0.24643303453922272, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 328, "train_speed(iter/s)": 0.028017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 314.71875, "completions/min_length": 250.0, "epoch": 0.03942008147615624, "grad_norm": 1.064356330652766, "kl": 0.28358039259910583, "learning_rate": 9.97831859503786e-07, "loss": 0.0002842918038368225, "memory(GiB)": 165.8, "reward": 2.355642318725586, "reward_std": 0.08415789902210236, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3524289131164551, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5577256679534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.3549138009548187, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 329, "train_speed(iter/s)": 0.028037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 325.875, "completions/min_length": 270.0, "epoch": 0.039539899352983465, "grad_norm": 1.2001114271087043, "kl": 0.30145157873630524, "learning_rate": 9.97814137210594e-07, "loss": 0.0003026127815246582, "memory(GiB)": 165.8, "reward": 2.5541915893554688, "reward_std": 0.12925985455513, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5541914701461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.21093785762786865, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 330, "train_speed(iter/s)": 0.028036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/mean_length": 315.8125, "completions/min_length": 194.0, "epoch": 0.03965971722981069, "grad_norm": 1.1810566932463211, "kl": 0.2843121439218521, "learning_rate": 9.977963429399305e-07, "loss": 0.000284343957901001, "memory(GiB)": 165.8, "reward": 2.667299270629883, "reward_std": 0.17912626266479492, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250825881958008, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6943824291229248, "rewards/GeoVisalEntityMatch2ORM/std": 0.2258155792951584, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 331, "train_speed(iter/s)": 0.028056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 328.4583435058594, "completions/min_length": 264.0, "epoch": 0.03977953510663791, "grad_norm": 1.183407030462308, "kl": 0.3199678063392639, "learning_rate": 9.977784766943687e-07, "loss": 0.00032142549753189087, "memory(GiB)": 165.8, "reward": 2.6725692749023438, "reward_std": 0.21494734287261963, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136451959609985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7454861402511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.15684162080287933, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 332, "train_speed(iter/s)": 0.028076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/mean_length": 332.625, "completions/min_length": 249.0, "epoch": 0.039899352983465135, "grad_norm": 1.097701790639125, "kl": 0.31255270540714264, "learning_rate": 9.977605384764918e-07, "loss": 0.0003130696713924408, "memory(GiB)": 165.8, "reward": 2.6639180183410645, "reward_std": 0.10386336594820023, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6639178991317749, "rewards/GeoVisalEntityMatch2ORM/std": 0.19008927047252655, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 333, "train_speed(iter/s)": 0.028096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 326.1770935058594, "completions/min_length": 195.0, "epoch": 0.04001917086029236, "grad_norm": 1.1795275941313559, "kl": 0.3089830130338669, "learning_rate": 9.977425282888932e-07, "loss": 0.0003096449072472751, "memory(GiB)": 165.8, "reward": 2.2891204357147217, "reward_std": 0.1691354513168335, "rewards/GeoLocAccuracyV2ORM/mean": 0.8229166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3370082378387451, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4662036895751953, "rewards/GeoVisalEntityMatch2ORM/std": 0.2768988013267517, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 334, "train_speed(iter/s)": 0.028117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 339.13543701171875, "completions/min_length": 219.0, "epoch": 0.04013898873711958, "grad_norm": 1.211731033775605, "kl": 0.28858160972595215, "learning_rate": 9.977244461341772e-07, "loss": 0.00028988844132982194, "memory(GiB)": 165.8, "reward": 2.5726852416992188, "reward_std": 0.11811558902263641, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5726852416992188, "rewards/GeoVisalEntityMatch2ORM/std": 0.2707720696926117, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 335, "train_speed(iter/s)": 0.028136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/mean_length": 323.7083435058594, "completions/min_length": 198.0, "epoch": 0.040258806613946804, "grad_norm": 1.2749932992355284, "kl": 0.31501734256744385, "learning_rate": 9.977062920149581e-07, "loss": 0.0003162821230944246, "memory(GiB)": 165.8, "reward": 2.2249999046325684, "reward_std": 0.2634883522987366, "rewards/GeoLocAccuracyV2ORM/mean": 0.8708333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.32956641912460327, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3541666865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.24316854774951935, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 336, "train_speed(iter/s)": 0.028142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/mean_length": 324.0208435058594, "completions/min_length": 193.0, "epoch": 0.04037862449077402, "grad_norm": 1.3671666950337769, "kl": 0.338232159614563, "learning_rate": 9.976880659338608e-07, "loss": 0.0003387629985809326, "memory(GiB)": 165.8, "reward": 2.452343702316284, "reward_std": 0.11005300283432007, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.45234379172325134, "rewards/GeoVisalEntityMatch2ORM/std": 0.2076205462217331, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 337, "train_speed(iter/s)": 0.028162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 347.66668701171875, "completions/min_length": 250.0, "epoch": 0.040498442367601244, "grad_norm": 1.33067686614034, "kl": 0.32214078307151794, "learning_rate": 9.976697678935204e-07, "loss": 0.0003240009245928377, "memory(GiB)": 165.8, "reward": 2.0497798919677734, "reward_std": 0.2586054801940918, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4603583812713623, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4664463400840759, "rewards/GeoVisalEntityMatch2ORM/std": 0.10371209681034088, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 338, "train_speed(iter/s)": 0.028161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 355.69793701171875, "completions/min_length": 225.0, "epoch": 0.04061826024442847, "grad_norm": 1.1964703071517677, "kl": 0.3232109099626541, "learning_rate": 9.976513978965829e-07, "loss": 0.00032384198857471347, "memory(GiB)": 165.8, "reward": 2.2216145992279053, "reward_std": 0.23199857771396637, "rewards/GeoLocAccuracyV2ORM/mean": 0.625, "rewards/GeoLocAccuracyV2ORM/std": 0.4507303833961487, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5966145992279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.21016068756580353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 339, "train_speed(iter/s)": 0.02818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.0, "completions/mean_length": 349.84375, "completions/min_length": 272.0, "epoch": 0.04073807812125569, "grad_norm": 1.2409475531553606, "kl": 0.34901711344718933, "learning_rate": 9.97632955945704e-07, "loss": 0.00034972530556842685, "memory(GiB)": 165.8, "reward": 2.480729103088379, "reward_std": 0.26118624210357666, "rewards/GeoLocAccuracyV2ORM/mean": 0.8062500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.39304813742637634, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6744791865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.2110188603401184, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 340, "train_speed(iter/s)": 0.028197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 358.0833435058594, "completions/min_length": 303.0, "epoch": 0.04085789599808291, "grad_norm": 1.2247081659845311, "kl": 0.3551909476518631, "learning_rate": 9.976144420435505e-07, "loss": 0.00035587450838647783, "memory(GiB)": 165.8, "reward": 2.5227184295654297, "reward_std": 0.14383059740066528, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.35702842473983765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.733134925365448, "rewards/GeoVisalEntityMatch2ORM/std": 0.16525238752365112, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 341, "train_speed(iter/s)": 0.028216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 483.0, "completions/mean_length": 377.51043701171875, "completions/min_length": 303.0, "epoch": 0.040977713874910136, "grad_norm": 1.1334723591486642, "kl": 0.35221680998802185, "learning_rate": 9.975958561927989e-07, "loss": 0.00035075348569080234, "memory(GiB)": 165.8, "reward": 2.418837070465088, "reward_std": 0.21036916971206665, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4396701753139496, "rewards/GeoVisalEntityMatch2ORM/std": 0.22013665735721588, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 342, "train_speed(iter/s)": 0.02823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 381.41668701171875, "completions/min_length": 312.0, "epoch": 0.04109753175173736, "grad_norm": 1.1622136528107463, "kl": 0.328252375125885, "learning_rate": 9.975771983961368e-07, "loss": 0.0003301352262496948, "memory(GiB)": 165.8, "reward": 2.335416793823242, "reward_std": 0.12229776382446289, "rewards/GeoLocAccuracyV2ORM/mean": 0.5625, "rewards/GeoLocAccuracyV2ORM/std": 0.45450955629348755, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7729166746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.18182392418384552, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 343, "train_speed(iter/s)": 0.02825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 386.51043701171875, "completions/min_length": 318.0, "epoch": 0.04121734962856458, "grad_norm": 0.8574450931234713, "kl": 0.3350455313920975, "learning_rate": 9.975584686562615e-07, "loss": 0.00033546099439263344, "memory(GiB)": 165.8, "reward": 2.3011364936828613, "reward_std": 0.12367109954357147, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4371959865093231, "rewards/GeoVisalEntityMatch2ORM/mean": 0.571969747543335, "rewards/GeoVisalEntityMatch2ORM/std": 0.26510781049728394, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 344, "train_speed(iter/s)": 0.028268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 399.26043701171875, "completions/min_length": 332.0, "epoch": 0.041337167505391806, "grad_norm": 1.0735648557404305, "kl": 0.4951344281435013, "learning_rate": 9.975396669758816e-07, "loss": 0.00047244131565093994, "memory(GiB)": 165.8, "reward": 2.06130051612854, "reward_std": 0.254410982131958, "rewards/GeoLocAccuracyV2ORM/mean": 0.5, "rewards/GeoLocAccuracyV2ORM/std": 0.5026246905326843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5717172026634216, "rewards/GeoVisalEntityMatch2ORM/std": 0.18082140386104584, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 345, "train_speed(iter/s)": 0.028285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 411.26043701171875, "completions/min_length": 348.0, "epoch": 0.04145698538221903, "grad_norm": 1.1142433959233047, "kl": 0.3665509521961212, "learning_rate": 9.97520793357715e-07, "loss": 0.00036817044019699097, "memory(GiB)": 165.8, "reward": 2.421085834503174, "reward_std": 0.20569616556167603, "rewards/GeoLocAccuracyV2ORM/mean": 0.8104166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3853854238986969, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6106692552566528, "rewards/GeoVisalEntityMatch2ORM/std": 0.13308431208133698, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 346, "train_speed(iter/s)": 0.028302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 396.22918701171875, "completions/min_length": 340.0, "epoch": 0.04157680325904625, "grad_norm": 1.1215361869951106, "kl": 0.3922899216413498, "learning_rate": 9.975018478044911e-07, "loss": 0.0003934589622076601, "memory(GiB)": 165.8, "reward": 2.3720240592956543, "reward_std": 0.17793993651866913, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.31719714403152466, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5178571939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.13234807550907135, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 347, "train_speed(iter/s)": 0.028317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 389.26043701171875, "completions/min_length": 321.0, "epoch": 0.041696621135873475, "grad_norm": 1.0283420746461867, "kl": 0.36046357452869415, "learning_rate": 9.97482830318949e-07, "loss": 0.00036107253981754184, "memory(GiB)": 165.8, "reward": 2.630516529083252, "reward_std": 0.08905155211687088, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6305164694786072, "rewards/GeoVisalEntityMatch2ORM/std": 0.17815537750720978, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 348, "train_speed(iter/s)": 0.028328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 489.0, "completions/mean_length": 405.9895935058594, "completions/min_length": 332.0, "epoch": 0.04181643901270069, "grad_norm": 1.1682585418236064, "kl": 0.8482407629489899, "learning_rate": 9.974637409038383e-07, "loss": 0.0008338069310411811, "memory(GiB)": 165.8, "reward": 2.375, "reward_std": 0.45654013752937317, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6666666865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.22002506256103516, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 349, "train_speed(iter/s)": 0.028326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 401.07293701171875, "completions/min_length": 351.0, "epoch": 0.041936256889527915, "grad_norm": 1.1077345975878055, "kl": 0.3948696553707123, "learning_rate": 9.974445795619191e-07, "loss": 0.00039587420178577304, "memory(GiB)": 165.8, "reward": 2.4881696701049805, "reward_std": 0.10039111971855164, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48816967010498047, "rewards/GeoVisalEntityMatch2ORM/std": 0.1153990849852562, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 350, "train_speed(iter/s)": 0.028342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/mean_length": 410.8020935058594, "completions/min_length": 348.0, "epoch": 0.04205607476635514, "grad_norm": 1.1061987527531805, "kl": 0.3509533703327179, "learning_rate": 9.974253462959618e-07, "loss": 0.0003534555435180664, "memory(GiB)": 165.8, "reward": 2.3583333492279053, "reward_std": 0.10945774614810944, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6083333492279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.315375953912735, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 351, "train_speed(iter/s)": 0.028356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 470.0, "completions/mean_length": 395.6875, "completions/min_length": 307.0, "epoch": 0.04217589264318236, "grad_norm": 1.0718664745021946, "kl": 0.422460600733757, "learning_rate": 9.974060411087476e-07, "loss": 0.00042074546217918396, "memory(GiB)": 165.8, "reward": 2.334862232208252, "reward_std": 0.2946125864982605, "rewards/GeoLocAccuracyV2ORM/mean": 0.9104166030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.2657577693462372, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4348619878292084, "rewards/GeoVisalEntityMatch2ORM/std": 0.2497655749320984, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 352, "train_speed(iter/s)": 0.028353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 391.1770935058594, "completions/min_length": 346.0, "epoch": 0.042295710520009584, "grad_norm": 1.0583768750830242, "kl": 0.39614975452423096, "learning_rate": 9.973866640030674e-07, "loss": 0.0003966093063354492, "memory(GiB)": 165.8, "reward": 2.2621612548828125, "reward_std": 0.16794651746749878, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.3727564215660095, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5121610760688782, "rewards/GeoVisalEntityMatch2ORM/std": 0.12269946932792664, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 353, "train_speed(iter/s)": 0.028371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 393.7708435058594, "completions/min_length": 325.0, "epoch": 0.04241552839683681, "grad_norm": 1.125363224920607, "kl": 0.3504529893398285, "learning_rate": 9.97367214981723e-07, "loss": 0.0003514289855957031, "memory(GiB)": 165.8, "reward": 2.703538417816162, "reward_std": 0.12475035339593887, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7139551043510437, "rewards/GeoVisalEntityMatch2ORM/std": 0.14403539896011353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 354, "train_speed(iter/s)": 0.02839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 453.0, "completions/mean_length": 375.85418701171875, "completions/min_length": 322.0, "epoch": 0.04253534627366403, "grad_norm": 1.1255476373217759, "kl": 0.6017458140850067, "learning_rate": 9.973476940475268e-07, "loss": 0.0005805616965517402, "memory(GiB)": 165.8, "reward": 2.4059152603149414, "reward_std": 0.29409047961235046, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.3060787618160248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5725818872451782, "rewards/GeoVisalEntityMatch2ORM/std": 0.2194034904241562, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 355, "train_speed(iter/s)": 0.028389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 379.84375, "completions/min_length": 332.0, "epoch": 0.042655164150491254, "grad_norm": 1.1900331151073067, "kl": 0.34870900213718414, "learning_rate": 9.973281012033008e-07, "loss": 0.0003497600555419922, "memory(GiB)": 165.8, "reward": 2.693286895751953, "reward_std": 0.10529862344264984, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6932870745658875, "rewards/GeoVisalEntityMatch2ORM/std": 0.16131548583507538, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 356, "train_speed(iter/s)": 0.028407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 425.2708435058594, "completions/min_length": 349.0, "epoch": 0.04277498202731848, "grad_norm": 1.0536423085321236, "kl": 0.3497021496295929, "learning_rate": 9.97308436451878e-07, "loss": 0.00034740567207336426, "memory(GiB)": 165.8, "reward": 2.4535715579986572, "reward_std": 0.26055002212524414, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3392457962036133, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5973214507102966, "rewards/GeoVisalEntityMatch2ORM/std": 0.18724292516708374, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 357, "train_speed(iter/s)": 0.028423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 395.88543701171875, "completions/min_length": 339.0, "epoch": 0.0428947999041457, "grad_norm": 1.0551674254853907, "kl": 0.37586459517478943, "learning_rate": 9.972886997961016e-07, "loss": 0.0003770105540752411, "memory(GiB)": 165.8, "reward": 2.496354341506958, "reward_std": 0.17492187023162842, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566414952278137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5796875357627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.11833897978067398, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 358, "train_speed(iter/s)": 0.028441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 471.0, "completions/mean_length": 384.03125, "completions/min_length": 325.0, "epoch": 0.04301461778097292, "grad_norm": 1.1466012067489513, "kl": 0.9441899061203003, "learning_rate": 9.972688912388257e-07, "loss": 0.0009451459045521915, "memory(GiB)": 165.8, "reward": 2.3945271968841553, "reward_std": 0.27430787682533264, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4120977520942688, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8195271492004395, "rewards/GeoVisalEntityMatch2ORM/std": 0.12912382185459137, "rewards/MathFormat/mean": 0.7916666865348816, "rewards/MathFormat/std": 0.40824827551841736, "step": 359, "train_speed(iter/s)": 0.02843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 371.96875, "completions/min_length": 320.0, "epoch": 0.043134435657800146, "grad_norm": 1.1246612799666365, "kl": 0.35498733818531036, "learning_rate": 9.972490107829141e-07, "loss": 0.00035618743277154863, "memory(GiB)": 165.8, "reward": 2.664980173110962, "reward_std": 0.1476590633392334, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6649801731109619, "rewards/GeoVisalEntityMatch2ORM/std": 0.19298100471496582, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 360, "train_speed(iter/s)": 0.028449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 378.79168701171875, "completions/min_length": 309.0, "epoch": 0.04325425353462737, "grad_norm": 1.2406578992037676, "kl": 0.37878577411174774, "learning_rate": 9.972290584312411e-07, "loss": 0.00037943324423395097, "memory(GiB)": 165.8, "reward": 2.225545883178711, "reward_std": 0.2578924596309662, "rewards/GeoLocAccuracyV2ORM/mean": 0.7416666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.37603655457496643, "rewards/GeoVisalEntityMatch2ORM/mean": 0.483879029750824, "rewards/GeoVisalEntityMatch2ORM/std": 0.168942391872406, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 361, "train_speed(iter/s)": 0.028473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 406.6875, "completions/min_length": 339.0, "epoch": 0.043374071411454586, "grad_norm": 1.1265871177685731, "kl": 0.36571764945983887, "learning_rate": 9.972090341866915e-07, "loss": 0.00036847591400146484, "memory(GiB)": 165.8, "reward": 2.572453737258911, "reward_std": 0.11379662156105042, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5891203880310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.1861063838005066, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 362, "train_speed(iter/s)": 0.028487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 404.03125, "completions/min_length": 337.0, "epoch": 0.04349388928828181, "grad_norm": 1.1144704931092022, "kl": 0.38657891750335693, "learning_rate": 9.97188938052161e-07, "loss": 0.0003864603932015598, "memory(GiB)": 165.8, "reward": 2.2982280254364014, "reward_std": 0.08962465822696686, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.548227846622467, "rewards/GeoVisalEntityMatch2ORM/std": 0.19969525933265686, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 363, "train_speed(iter/s)": 0.028502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 395.01043701171875, "completions/min_length": 308.0, "epoch": 0.04361370716510903, "grad_norm": 1.0956511756485907, "kl": 0.3525787889957428, "learning_rate": 9.971687700305548e-07, "loss": 0.0003542651829775423, "memory(GiB)": 165.8, "reward": 2.120076894760132, "reward_std": 0.26256099343299866, "rewards/GeoLocAccuracyV2ORM/mean": 0.6479167342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.4417618215084076, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4721602201461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.16925781965255737, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 364, "train_speed(iter/s)": 0.028518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 398.10418701171875, "completions/min_length": 327.0, "epoch": 0.043733525041936255, "grad_norm": 1.028707629248373, "kl": 0.4033854901790619, "learning_rate": 9.971485301247891e-07, "loss": 0.00040019553853198886, "memory(GiB)": 165.8, "reward": 2.508556604385376, "reward_std": 0.24855633080005646, "rewards/GeoLocAccuracyV2ORM/mean": 0.8312500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.33128538727760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6877232789993286, "rewards/GeoVisalEntityMatch2ORM/std": 0.16890276968479156, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 365, "train_speed(iter/s)": 0.028534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 416.8645935058594, "completions/min_length": 340.0, "epoch": 0.04385334291876348, "grad_norm": 1.0262257166516204, "kl": 0.39110860228538513, "learning_rate": 9.971282183377902e-07, "loss": 0.0003883813915308565, "memory(GiB)": 165.8, "reward": 2.3673009872436523, "reward_std": 0.1419522911310196, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4411657154560089, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6381343603134155, "rewards/GeoVisalEntityMatch2ORM/std": 0.21445399522781372, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 366, "train_speed(iter/s)": 0.028549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 477.0, "completions/mean_length": 404.9375, "completions/min_length": 316.0, "epoch": 0.0439731607955907, "grad_norm": 1.239401765946442, "kl": 0.3512454032897949, "learning_rate": 9.971078346724952e-07, "loss": 0.0003518660960253328, "memory(GiB)": 165.8, "reward": 2.599640369415283, "reward_std": 0.13469335436820984, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5996403694152832, "rewards/GeoVisalEntityMatch2ORM/std": 0.16472022235393524, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 367, "train_speed(iter/s)": 0.028546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 396.60418701171875, "completions/min_length": 316.0, "epoch": 0.044092978672417925, "grad_norm": 1.1312963170953547, "kl": 0.3660299777984619, "learning_rate": 9.97087379131851e-07, "loss": 0.0003667175769805908, "memory(GiB)": 165.8, "reward": 2.5461807250976562, "reward_std": 0.16256839036941528, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5649305582046509, "rewards/GeoVisalEntityMatch2ORM/std": 0.15686406195163727, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 368, "train_speed(iter/s)": 0.028561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 546.0, "completions/mean_length": 417.3958435058594, "completions/min_length": 340.0, "epoch": 0.04421279654924515, "grad_norm": 1.0905584442451448, "kl": 0.36188776791095734, "learning_rate": 9.970668517188157e-07, "loss": 0.00036228945828042924, "memory(GiB)": 165.8, "reward": 2.413628578186035, "reward_std": 0.2337474375963211, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5386285185813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.1570986956357956, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 369, "train_speed(iter/s)": 0.028577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 506.0, "completions/mean_length": 416.2708435058594, "completions/min_length": 336.0, "epoch": 0.04433261442607237, "grad_norm": 1.0492970072584682, "kl": 0.5644348859786987, "learning_rate": 9.970462524363567e-07, "loss": 0.0005612274399027228, "memory(GiB)": 165.8, "reward": 2.2679717540740967, "reward_std": 0.45027416944503784, "rewards/GeoLocAccuracyV2ORM/mean": 0.699999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4495611786842346, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8179716467857361, "rewards/GeoVisalEntityMatch2ORM/std": 0.13068760931491852, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 370, "train_speed(iter/s)": 0.028566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 532.0, "completions/mean_length": 447.10418701171875, "completions/min_length": 375.0, "epoch": 0.044452432302899594, "grad_norm": 1.0899499242191195, "kl": 0.4141418933868408, "learning_rate": 9.970255812874526e-07, "loss": 0.0004100663063582033, "memory(GiB)": 165.8, "reward": 2.516695499420166, "reward_std": 0.1754828691482544, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5375289916992188, "rewards/GeoVisalEntityMatch2ORM/std": 0.15889635682106018, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 371, "train_speed(iter/s)": 0.02858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/mean_length": 424.0625, "completions/min_length": 323.0, "epoch": 0.04457225017972682, "grad_norm": 1.1238732777380698, "kl": 0.391116201877594, "learning_rate": 9.970048382750923e-07, "loss": 0.0003922929463442415, "memory(GiB)": 165.8, "reward": 2.5710153579711914, "reward_std": 0.10834214091300964, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5710152387619019, "rewards/GeoVisalEntityMatch2ORM/std": 0.20608676970005035, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 372, "train_speed(iter/s)": 0.028596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 552.0, "completions/mean_length": 447.88543701171875, "completions/min_length": 376.0, "epoch": 0.04469206805655404, "grad_norm": 1.0318154700422675, "kl": 0.38076119124889374, "learning_rate": 9.96984023402275e-07, "loss": 0.00038153183413669467, "memory(GiB)": 165.8, "reward": 2.5812501907348633, "reward_std": 0.3383418023586273, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2783094346523285, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7062500715255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.21908903121948242, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 373, "train_speed(iter/s)": 0.028618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4895833333333333, "completions/max_length": 524.0, "completions/mean_length": 425.63543701171875, "completions/min_length": 391.0, "epoch": 0.044811885933381264, "grad_norm": 1.0827622775003696, "kl": 0.5960274636745453, "learning_rate": 9.9696313667201e-07, "loss": 0.0005969889461994171, "memory(GiB)": 165.8, "reward": 1.6595823764801025, "reward_std": 0.13974331319332123, "rewards/GeoLocAccuracyV2ORM/mean": 0.5, "rewards/GeoLocAccuracyV2ORM/std": 0.5026246905326843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6491656303405762, "rewards/GeoVisalEntityMatch2ORM/std": 0.15589238703250885, "rewards/MathFormat/mean": 0.5104166865348816, "rewards/MathFormat/std": 0.5025156140327454, "step": 374, "train_speed(iter/s)": 0.028611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 542.0, "completions/mean_length": 450.0625, "completions/min_length": 367.0, "epoch": 0.04493170381020848, "grad_norm": 1.0544711527632564, "kl": 0.4073089361190796, "learning_rate": 9.969421780873175e-07, "loss": 0.00040608644485473633, "memory(GiB)": 165.8, "reward": 2.2477867603302, "reward_std": 0.21620294451713562, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.44672298431396484, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5394533276557922, "rewards/GeoVisalEntityMatch2ORM/std": 0.17593644559383392, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 375, "train_speed(iter/s)": 0.028625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 417.3020935058594, "completions/min_length": 344.0, "epoch": 0.0450515216870357, "grad_norm": 1.060115310854574, "kl": 0.36798250675201416, "learning_rate": 9.969211476512276e-07, "loss": 0.00036983442259952426, "memory(GiB)": 165.8, "reward": 2.6317338943481445, "reward_std": 0.10775745660066605, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6317340135574341, "rewards/GeoVisalEntityMatch2ORM/std": 0.14615198969841003, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 376, "train_speed(iter/s)": 0.02864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 562.0, "completions/mean_length": 435.76043701171875, "completions/min_length": 336.0, "epoch": 0.045171339563862926, "grad_norm": 1.0606951734934813, "kl": 0.3601263016462326, "learning_rate": 9.969000453667814e-07, "loss": 0.0003609446284826845, "memory(GiB)": 165.8, "reward": 2.5511574745178223, "reward_std": 0.12099568545818329, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5511574745178223, "rewards/GeoVisalEntityMatch2ORM/std": 0.17794887721538544, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 377, "train_speed(iter/s)": 0.028663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 529.0, "completions/mean_length": 457.9375, "completions/min_length": 385.0, "epoch": 0.04529115744069015, "grad_norm": 1.0076164419363869, "kl": 0.42895929515361786, "learning_rate": 9.968788712370295e-07, "loss": 0.00042525058961473405, "memory(GiB)": 165.8, "reward": 2.248297691345215, "reward_std": 0.37031519412994385, "rewards/GeoLocAccuracyV2ORM/mean": 0.8729166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.31135669350624084, "rewards/GeoVisalEntityMatch2ORM/mean": 0.42746442556381226, "rewards/GeoVisalEntityMatch2ORM/std": 0.20855750143527985, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336146235466003, "step": 378, "train_speed(iter/s)": 0.028676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 535.0, "completions/mean_length": 458.6770935058594, "completions/min_length": 382.0, "epoch": 0.04541097531751737, "grad_norm": 1.1342151024622726, "kl": 0.4571702182292938, "learning_rate": 9.968576252650337e-07, "loss": 0.00045256566954776645, "memory(GiB)": 165.8, "reward": 2.4049322605133057, "reward_std": 0.345681369304657, "rewards/GeoLocAccuracyV2ORM/mean": 0.9104167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.28115519881248474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.567432165145874, "rewards/GeoVisalEntityMatch2ORM/std": 0.1525687277317047, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 379, "train_speed(iter/s)": 0.028691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 437.1770935058594, "completions/min_length": 373.0, "epoch": 0.045530793194344596, "grad_norm": 1.0547972226804923, "kl": 0.37151943147182465, "learning_rate": 9.968363074538661e-07, "loss": 0.00037079057074151933, "memory(GiB)": 165.8, "reward": 2.3777778148651123, "reward_std": 0.17111186683177948, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3252461850643158, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5486111640930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.13570785522460938, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 380, "train_speed(iter/s)": 0.028703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 427.5833435058594, "completions/min_length": 361.0, "epoch": 0.04565061107117182, "grad_norm": 1.075989306449702, "kl": 0.3718640059232712, "learning_rate": 9.968149178066087e-07, "loss": 0.0003728717565536499, "memory(GiB)": 165.8, "reward": 2.5643162727355957, "reward_std": 0.09899431467056274, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5643162727355957, "rewards/GeoVisalEntityMatch2ORM/std": 0.15425032377243042, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 381, "train_speed(iter/s)": 0.028716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 438.7395935058594, "completions/min_length": 384.0, "epoch": 0.04577042894799904, "grad_norm": 1.0535581633019448, "kl": 0.5118965059518814, "learning_rate": 9.967934563263543e-07, "loss": 0.0004965414991602302, "memory(GiB)": 165.8, "reward": 2.578848361968994, "reward_std": 0.15874728560447693, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5996817350387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.12568841874599457, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 382, "train_speed(iter/s)": 0.028729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 413.09375, "completions/min_length": 345.0, "epoch": 0.045890246824826265, "grad_norm": 1.0703305848753253, "kl": 0.38565991818904877, "learning_rate": 9.96771923016206e-07, "loss": 0.000386583327781409, "memory(GiB)": 165.8, "reward": 2.5130209922790527, "reward_std": 0.13445334136486053, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5130208730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.22801898419857025, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 383, "train_speed(iter/s)": 0.028668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 446.0, "completions/mean_length": 381.01043701171875, "completions/min_length": 307.0, "epoch": 0.04601006470165349, "grad_norm": 1.0761323300916157, "kl": 0.5258961170911789, "learning_rate": 9.96750317879277e-07, "loss": 0.0005166580667719245, "memory(GiB)": 165.8, "reward": 2.2161459922790527, "reward_std": 0.3241211175918579, "rewards/GeoLocAccuracyV2ORM/mean": 0.7270833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.40012606978416443, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5307291746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.1718510538339615, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 384, "train_speed(iter/s)": 0.028665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 393.3958435058594, "completions/min_length": 325.0, "epoch": 0.04612988257848071, "grad_norm": 1.188335787728277, "kl": 0.38792821764945984, "learning_rate": 9.96728640918691e-07, "loss": 0.00038982927799224854, "memory(GiB)": 165.8, "reward": 2.4496941566467285, "reward_std": 0.17002198100090027, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336149215698242, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5017774701118469, "rewards/GeoVisalEntityMatch2ORM/std": 0.13702154159545898, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 385, "train_speed(iter/s)": 0.028671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 400.38543701171875, "completions/min_length": 342.0, "epoch": 0.046249700455307935, "grad_norm": 1.1132632229142723, "kl": 0.3721648305654526, "learning_rate": 9.967068921375826e-07, "loss": 0.00037309032632037997, "memory(GiB)": 165.8, "reward": 2.319502353668213, "reward_std": 0.16691723465919495, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868919670581818, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3611690402030945, "rewards/GeoVisalEntityMatch2ORM/std": 0.15060953795909882, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 386, "train_speed(iter/s)": 0.028668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 522.0, "completions/mean_length": 387.04168701171875, "completions/min_length": 328.0, "epoch": 0.04636951833213515, "grad_norm": 1.1060717954776276, "kl": 0.40191128849983215, "learning_rate": 9.966850715390965e-07, "loss": 0.00039948400808498263, "memory(GiB)": 165.8, "reward": 2.3966312408447266, "reward_std": 0.2358969748020172, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3070802092552185, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5320478081703186, "rewards/GeoVisalEntityMatch2ORM/std": 0.2677226960659027, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 387, "train_speed(iter/s)": 0.028665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 368.69793701171875, "completions/min_length": 309.0, "epoch": 0.046489336208962374, "grad_norm": 1.1467946886843026, "kl": 0.3692219853401184, "learning_rate": 9.966631791263871e-07, "loss": 0.00037012994289398193, "memory(GiB)": 165.8, "reward": 2.5463955402374268, "reward_std": 0.18041032552719116, "rewards/GeoLocAccuracyV2ORM/mean": 0.9291666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.23749423027038574, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6172288656234741, "rewards/GeoVisalEntityMatch2ORM/std": 0.23236927390098572, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 388, "train_speed(iter/s)": 0.028681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 361.0625, "completions/min_length": 304.0, "epoch": 0.0466091540857896, "grad_norm": 0.9986971180727251, "kl": 0.3511642664670944, "learning_rate": 9.966412149026202e-07, "loss": 0.0003529836831148714, "memory(GiB)": 165.8, "reward": 2.709399938583374, "reward_std": 0.09392797946929932, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7260664701461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.20260930061340332, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 389, "train_speed(iter/s)": 0.02868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 368.0833435058594, "completions/min_length": 306.0, "epoch": 0.04672897196261682, "grad_norm": 1.1302106279309694, "kl": 0.3765043765306473, "learning_rate": 9.966191788709714e-07, "loss": 0.0003769099712371826, "memory(GiB)": 165.8, "reward": 2.5889759063720703, "reward_std": 0.10600799322128296, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5889756679534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.17463363707065582, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 390, "train_speed(iter/s)": 0.028689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 358.0208435058594, "completions/min_length": 290.0, "epoch": 0.046848789839444044, "grad_norm": 1.0409323508660921, "kl": 0.366679847240448, "learning_rate": 9.96597071034627e-07, "loss": 0.0003680636582430452, "memory(GiB)": 165.8, "reward": 2.5507068634033203, "reward_std": 0.10099397599697113, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5507068634033203, "rewards/GeoVisalEntityMatch2ORM/std": 0.17441987991333008, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 391, "train_speed(iter/s)": 0.028704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 356.85418701171875, "completions/min_length": 291.0, "epoch": 0.04696860771627127, "grad_norm": 1.0952766775797966, "kl": 0.38233911991119385, "learning_rate": 9.96574891396783e-07, "loss": 0.00038355341530404985, "memory(GiB)": 165.8, "reward": 2.368773937225342, "reward_std": 0.13279560208320618, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5687736868858337, "rewards/GeoVisalEntityMatch2ORM/std": 0.18310724198818207, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 392, "train_speed(iter/s)": 0.028729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/mean_length": 363.16668701171875, "completions/min_length": 293.0, "epoch": 0.04708842559309849, "grad_norm": 1.077800858818171, "kl": 0.36881859600543976, "learning_rate": 9.96552639960647e-07, "loss": 0.0003697127103805542, "memory(GiB)": 165.8, "reward": 2.4509549140930176, "reward_std": 0.24456177651882172, "rewards/GeoLocAccuracyV2ORM/mean": 0.84375, "rewards/GeoLocAccuracyV2ORM/std": 0.3649982213973999, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6072049140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.14665181934833527, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 393, "train_speed(iter/s)": 0.028745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 353.09375, "completions/min_length": 296.0, "epoch": 0.04720824346992571, "grad_norm": 1.19524663613053, "kl": 0.3917136639356613, "learning_rate": 9.965303167294357e-07, "loss": 0.00039230287075042725, "memory(GiB)": 165.8, "reward": 2.471874952316284, "reward_std": 0.17893840372562408, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5052083730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.2028065174818039, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 394, "train_speed(iter/s)": 0.02876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 368.875, "completions/min_length": 316.0, "epoch": 0.047328061346752937, "grad_norm": 1.1872254545477197, "kl": 0.3795868307352066, "learning_rate": 9.965079217063771e-07, "loss": 0.0003797300159931183, "memory(GiB)": 165.8, "reward": 2.4203126430511475, "reward_std": 0.14672040939331055, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4453125, "rewards/GeoVisalEntityMatch2ORM/std": 0.1165602058172226, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 395, "train_speed(iter/s)": 0.028776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 368.625, "completions/min_length": 301.0, "epoch": 0.04744787922358016, "grad_norm": 1.1466251452583005, "kl": 0.35477426648139954, "learning_rate": 9.964854548947091e-07, "loss": 0.00035587450838647783, "memory(GiB)": 165.8, "reward": 2.534635543823242, "reward_std": 0.17730754613876343, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5846354365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.15034720301628113, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 396, "train_speed(iter/s)": 0.028791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/mean_length": 347.04168701171875, "completions/min_length": 272.0, "epoch": 0.04756769710040738, "grad_norm": 1.094802697071481, "kl": 0.3741194158792496, "learning_rate": 9.964629162976798e-07, "loss": 0.00037480395985767245, "memory(GiB)": 165.8, "reward": 2.2698495388031006, "reward_std": 0.10230239480733871, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5198495388031006, "rewards/GeoVisalEntityMatch2ORM/std": 0.15432627499103546, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 397, "train_speed(iter/s)": 0.028807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/mean_length": 347.85418701171875, "completions/min_length": 283.0, "epoch": 0.047687514977234606, "grad_norm": 1.1144073489308777, "kl": 0.390165314078331, "learning_rate": 9.964403059185487e-07, "loss": 0.00039159756852313876, "memory(GiB)": 165.8, "reward": 2.596874952316284, "reward_std": 0.19140419363975525, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336146235466003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6489583253860474, "rewards/GeoVisalEntityMatch2ORM/std": 0.2065703123807907, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 398, "train_speed(iter/s)": 0.02882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 413.0, "completions/mean_length": 352.9895935058594, "completions/min_length": 312.0, "epoch": 0.04780733285406183, "grad_norm": 1.1557270032406586, "kl": 0.3947697579860687, "learning_rate": 9.964176237605843e-07, "loss": 0.0003948882222175598, "memory(GiB)": 165.8, "reward": 2.6054482460021973, "reward_std": 0.19758820533752441, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6262813806533813, "rewards/GeoVisalEntityMatch2ORM/std": 0.21224623918533325, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 399, "train_speed(iter/s)": 0.028817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/mean_length": 351.76043701171875, "completions/min_length": 274.0, "epoch": 0.047927150730889045, "grad_norm": 1.1604207806227627, "kl": 0.39057017862796783, "learning_rate": 9.963948698270665e-07, "loss": 0.0003923600015696138, "memory(GiB)": 165.8, "reward": 2.776562452316284, "reward_std": 0.11290531605482101, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7765625715255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.1964089572429657, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 400, "train_speed(iter/s)": 0.028832 }, { "epoch": 0.047927150730889045, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.003844246031746032, "eval_completions/max_length": 432.9166666666667, "eval_completions/mean_length": 371.1455940973191, "eval_completions/min_length": 313.55357142857144, "eval_kl": 238.6193354885493, "eval_loss": 0.2583176791667938, "eval_reward": 2.4989745240835917, "eval_reward_std": 0.15790458613385758, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.9022569551709152, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.1380696406233169, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.6001897289284638, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.15492817786123073, "eval_rewards/MathFormat/mean": 0.9965277793151992, "eval_rewards/MathFormat/std": 0.011925970603312765, "eval_runtime": 1721.6358, "eval_samples_per_second": 0.196, "eval_steps_per_second": 0.005, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 376.0625, "completions/min_length": 298.0, "epoch": 0.04804696860771627, "grad_norm": 1.1051350437638439, "kl": 0.40719500184059143, "learning_rate": 9.96372044121285e-07, "loss": 0.00040766102029010653, "memory(GiB)": 165.8, "reward": 2.5960443019866943, "reward_std": 0.15812742710113525, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6127108335494995, "rewards/GeoVisalEntityMatch2ORM/std": 0.15731927752494812, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 401, "train_speed(iter/s)": 0.025593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 449.0, "completions/mean_length": 377.5208435058594, "completions/min_length": 319.0, "epoch": 0.04816678648454349, "grad_norm": 1.3582454868497968, "kl": 2.1564380824565887, "learning_rate": 9.963491466465403e-07, "loss": 0.00210610032081604, "memory(GiB)": 165.8, "reward": 2.0651443004608154, "reward_std": 0.38374269008636475, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354167699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.4078129529953003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44431090354919434, "rewards/GeoVisalEntityMatch2ORM/std": 0.10728754103183746, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 402, "train_speed(iter/s)": 0.025592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 367.63543701171875, "completions/min_length": 298.0, "epoch": 0.048286604361370715, "grad_norm": 1.0161468949876182, "kl": 0.38660359382629395, "learning_rate": 9.96326177406143e-07, "loss": 0.00038730850792489946, "memory(GiB)": 165.8, "reward": 2.671875, "reward_std": 0.08162147551774979, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.671875, "rewards/GeoVisalEntityMatch2ORM/std": 0.28900885581970215, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 403, "train_speed(iter/s)": 0.025611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 455.0, "completions/mean_length": 386.04168701171875, "completions/min_length": 332.0, "epoch": 0.04840642223819794, "grad_norm": 0.9611796407070862, "kl": 0.7614239454269409, "learning_rate": 9.963031364034143e-07, "loss": 0.0007366872159764171, "memory(GiB)": 165.8, "reward": 2.551562547683716, "reward_std": 0.2530391812324524, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6140625476837158, "rewards/GeoVisalEntityMatch2ORM/std": 0.1692100316286087, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 404, "train_speed(iter/s)": 0.025615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 397.51043701171875, "completions/min_length": 333.0, "epoch": 0.04852624011502516, "grad_norm": 1.0812336692030655, "kl": 0.4122651070356369, "learning_rate": 9.962800236416852e-07, "loss": 0.00041357678128406405, "memory(GiB)": 165.8, "reward": 2.7118053436279297, "reward_std": 0.10758071392774582, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7118055820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.13495902717113495, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 405, "train_speed(iter/s)": 0.025633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3229166666666667, "completions/max_length": 467.0, "completions/mean_length": 383.94793701171875, "completions/min_length": 326.0, "epoch": 0.048646057991852384, "grad_norm": 1.0938323214626213, "kl": 0.6396948993206024, "learning_rate": 9.96256839124298e-07, "loss": 0.0006365329027175903, "memory(GiB)": 165.8, "reward": 1.8194031715393066, "reward_std": 0.41925644874572754, "rewards/GeoLocAccuracyV2ORM/mean": 0.6770833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4700457453727722, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46523648500442505, "rewards/GeoVisalEntityMatch2ORM/std": 0.17168205976486206, "rewards/MathFormat/mean": 0.6770833730697632, "rewards/MathFormat/std": 0.4700457453727722, "step": 406, "train_speed(iter/s)": 0.025633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 403.375, "completions/min_length": 322.0, "epoch": 0.04876587586867961, "grad_norm": 1.1230036782721582, "kl": 0.4108402729034424, "learning_rate": 9.962335828546047e-07, "loss": 0.0004118333454243839, "memory(GiB)": 165.8, "reward": 2.4818453788757324, "reward_std": 0.17238697409629822, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204163908958435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6401786208152771, "rewards/GeoVisalEntityMatch2ORM/std": 0.16838258504867554, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 407, "train_speed(iter/s)": 0.02565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 395.125, "completions/min_length": 300.0, "epoch": 0.04888569374550683, "grad_norm": 1.1157887424019333, "kl": 0.3990577757358551, "learning_rate": 9.96210254835968e-07, "loss": 0.0003993511199951172, "memory(GiB)": 165.8, "reward": 2.6538195610046387, "reward_std": 0.21287205815315247, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7621528506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.1809571385383606, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 408, "train_speed(iter/s)": 0.025669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 409.01043701171875, "completions/min_length": 351.0, "epoch": 0.049005511622334054, "grad_norm": 0.9500310373002445, "kl": 0.3981832265853882, "learning_rate": 9.961868550717602e-07, "loss": 0.0003985464572906494, "memory(GiB)": 165.8, "reward": 2.4474704265594482, "reward_std": 0.1030469536781311, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6391369104385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.2840874493122101, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 409, "train_speed(iter/s)": 0.025649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 416.7708435058594, "completions/min_length": 363.0, "epoch": 0.04912532949916128, "grad_norm": 1.0963910581700356, "kl": 0.408199742436409, "learning_rate": 9.961633835653654e-07, "loss": 0.00040935975266620517, "memory(GiB)": 165.8, "reward": 2.751748561859131, "reward_std": 0.11663012951612473, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7517485618591309, "rewards/GeoVisalEntityMatch2ORM/std": 0.1929851919412613, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 410, "train_speed(iter/s)": 0.025665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 499.0, "completions/mean_length": 423.0520935058594, "completions/min_length": 368.0, "epoch": 0.0492451473759885, "grad_norm": 1.054737610644646, "kl": 0.4692011922597885, "learning_rate": 9.961398403201767e-07, "loss": 0.0004689246416091919, "memory(GiB)": 165.8, "reward": 2.788715362548828, "reward_std": 0.20468690991401672, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8303819894790649, "rewards/GeoVisalEntityMatch2ORM/std": 0.15103916823863983, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 411, "train_speed(iter/s)": 0.025659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 490.0, "completions/mean_length": 417.47918701171875, "completions/min_length": 334.0, "epoch": 0.04936496525281572, "grad_norm": 1.194225992141315, "kl": 0.5947259962558746, "learning_rate": 9.961162253395988e-07, "loss": 0.0005945551092736423, "memory(GiB)": 165.8, "reward": 2.2009592056274414, "reward_std": 0.38860267400741577, "rewards/GeoLocAccuracyV2ORM/mean": 0.7374999523162842, "rewards/GeoLocAccuracyV2ORM/std": 0.4248219430446625, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6509590148925781, "rewards/GeoVisalEntityMatch2ORM/std": 0.19037875533103943, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236128330230713, "step": 412, "train_speed(iter/s)": 0.025643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 531.0, "completions/mean_length": 419.875, "completions/min_length": 336.0, "epoch": 0.04948478312964294, "grad_norm": 1.1071892785420339, "kl": 0.7182362675666809, "learning_rate": 9.960925386270453e-07, "loss": 0.0007178162923082709, "memory(GiB)": 165.8, "reward": 2.0652408599853516, "reward_std": 0.4095265865325928, "rewards/GeoLocAccuracyV2ORM/mean": 0.6145833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.48924845457077026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6277406215667725, "rewards/GeoVisalEntityMatch2ORM/std": 0.18852725625038147, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 413, "train_speed(iter/s)": 0.025642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2916666666666667, "completions/max_length": 552.0, "completions/mean_length": 431.9270935058594, "completions/min_length": 366.0, "epoch": 0.04960460100647016, "grad_norm": 0.9729323472324519, "kl": 0.5385268330574036, "learning_rate": 9.960687801859414e-07, "loss": 0.0005388110876083374, "memory(GiB)": 165.8, "reward": 2.0520339012145996, "reward_std": 0.40800774097442627, "rewards/GeoLocAccuracyV2ORM/mean": 0.6291667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.4732678234577179, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7145336866378784, "rewards/GeoVisalEntityMatch2ORM/std": 0.166348397731781, "rewards/MathFormat/mean": 0.7083333730697632, "rewards/MathFormat/std": 0.45691564679145813, "step": 414, "train_speed(iter/s)": 0.025641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 523.0, "completions/mean_length": 429.6458435058594, "completions/min_length": 362.0, "epoch": 0.049724418883297386, "grad_norm": 0.88031479767068, "kl": 0.43241940438747406, "learning_rate": 9.960449500197226e-07, "loss": 0.0010291400831192732, "memory(GiB)": 165.8, "reward": 2.615596294403076, "reward_std": 0.08889337629079819, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6155960559844971, "rewards/GeoVisalEntityMatch2ORM/std": 0.17693163454532623, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 415, "train_speed(iter/s)": 0.025656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 411.28125, "completions/min_length": 329.0, "epoch": 0.04984423676012461, "grad_norm": 1.1318732455469687, "kl": 0.41067542135715485, "learning_rate": 9.960210481318339e-07, "loss": 0.0004123052058275789, "memory(GiB)": 165.8, "reward": 2.602083444595337, "reward_std": 0.1799018681049347, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6020833253860474, "rewards/GeoVisalEntityMatch2ORM/std": 0.3035029470920563, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 416, "train_speed(iter/s)": 0.025659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 477.0, "completions/mean_length": 407.88543701171875, "completions/min_length": 344.0, "epoch": 0.04996405463695183, "grad_norm": 1.164232419401254, "kl": 0.6793598532676697, "learning_rate": 9.959970745257314e-07, "loss": 0.0006788423052057624, "memory(GiB)": 165.8, "reward": 2.190972328186035, "reward_std": 0.24635076522827148, "rewards/GeoLocAccuracyV2ORM/mean": 0.78125, "rewards/GeoLocAccuracyV2ORM/std": 0.4155687391757965, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6284722089767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.18790492415428162, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 417, "train_speed(iter/s)": 0.025658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 517.0, "completions/mean_length": 431.125, "completions/min_length": 359.0, "epoch": 0.050083872513779056, "grad_norm": 1.0569764021942198, "kl": 0.4322347044944763, "learning_rate": 9.959730292048811e-07, "loss": 0.00043295821524225175, "memory(GiB)": 165.8, "reward": 2.4975695610046387, "reward_std": 0.1911563277244568, "rewards/GeoLocAccuracyV2ORM/mean": 0.9229166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.24169538915157318, "rewards/GeoVisalEntityMatch2ORM/mean": 0.585069477558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.23870903253555298, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 418, "train_speed(iter/s)": 0.025676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 402.9375, "completions/min_length": 313.0, "epoch": 0.05020369039060628, "grad_norm": 1.115319433434633, "kl": 0.4032529592514038, "learning_rate": 9.959489121727602e-07, "loss": 0.0004051874275319278, "memory(GiB)": 165.8, "reward": 2.574942111968994, "reward_std": 0.08713793754577637, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5749421715736389, "rewards/GeoVisalEntityMatch2ORM/std": 0.21837382018566132, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 419, "train_speed(iter/s)": 0.02568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 406.9895935058594, "completions/min_length": 350.0, "epoch": 0.0503235082674335, "grad_norm": 1.097085004186728, "kl": 0.4103042334318161, "learning_rate": 9.959247234328555e-07, "loss": 0.0004107269342057407, "memory(GiB)": 165.8, "reward": 2.467336416244507, "reward_std": 0.25993990898132324, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662496328354, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5131696462631226, "rewards/GeoVisalEntityMatch2ORM/std": 0.2359689474105835, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 420, "train_speed(iter/s)": 0.025696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 410.25, "completions/min_length": 344.0, "epoch": 0.050443326144260725, "grad_norm": 1.0625359759879287, "kl": 0.3844805955886841, "learning_rate": 9.959004629886641e-07, "loss": 0.00038547752774320543, "memory(GiB)": 165.8, "reward": 2.6732804775238037, "reward_std": 0.11263471096754074, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6732804179191589, "rewards/GeoVisalEntityMatch2ORM/std": 0.2965003550052643, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 421, "train_speed(iter/s)": 0.025712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2916666666666667, "completions/max_length": 457.0, "completions/mean_length": 391.29168701171875, "completions/min_length": 324.0, "epoch": 0.05056314402108795, "grad_norm": 1.1381850174994546, "kl": 0.5943562090396881, "learning_rate": 9.95876130843694e-07, "loss": 0.0005908882012590766, "memory(GiB)": 165.8, "reward": 2.02264404296875, "reward_std": 0.2933349609375, "rewards/GeoLocAccuracyV2ORM/mean": 0.5666667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.46851763129234314, "rewards/GeoVisalEntityMatch2ORM/mean": 0.737227201461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.13563889265060425, "rewards/MathFormat/mean": 0.71875, "rewards/MathFormat/std": 0.45196935534477234, "step": 422, "train_speed(iter/s)": 0.025709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 401.0, "completions/min_length": 346.0, "epoch": 0.05068296189791517, "grad_norm": 1.0843607925717087, "kl": 0.43927763402462006, "learning_rate": 9.958517270014634e-07, "loss": 0.00043999403715133667, "memory(GiB)": 165.8, "reward": 2.4250993728637695, "reward_std": 0.1002928763628006, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4250992238521576, "rewards/GeoVisalEntityMatch2ORM/std": 0.15614815056324005, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 423, "train_speed(iter/s)": 0.025727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 389.4583435058594, "completions/min_length": 329.0, "epoch": 0.050802779774742395, "grad_norm": 1.1773352046332868, "kl": 0.40758438408374786, "learning_rate": 9.958272514655004e-07, "loss": 0.0004086109693162143, "memory(GiB)": 165.8, "reward": 2.479464292526245, "reward_std": 0.25450649857521057, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6461309790611267, "rewards/GeoVisalEntityMatch2ORM/std": 0.1864900141954422, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 424, "train_speed(iter/s)": 0.025742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 376.9583435058594, "completions/min_length": 297.0, "epoch": 0.05092259765156961, "grad_norm": 1.1773270536729183, "kl": 0.4409857541322708, "learning_rate": 9.958027042393443e-07, "loss": 0.00044186910963617265, "memory(GiB)": 165.8, "reward": 2.382279396057129, "reward_std": 0.1064457818865776, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5822793245315552, "rewards/GeoVisalEntityMatch2ORM/std": 0.13684889674186707, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 425, "train_speed(iter/s)": 0.02576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 395.9583435058594, "completions/min_length": 331.0, "epoch": 0.051042415528396834, "grad_norm": 1.0538366680683302, "kl": 0.41703490912914276, "learning_rate": 9.95778085326544e-07, "loss": 0.0004169046878814697, "memory(GiB)": 165.8, "reward": 2.424032688140869, "reward_std": 0.16658180952072144, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44069942831993103, "rewards/GeoVisalEntityMatch2ORM/std": 0.22673752903938293, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 426, "train_speed(iter/s)": 0.025777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 457.0, "completions/mean_length": 374.0, "completions/min_length": 308.0, "epoch": 0.05116223340522406, "grad_norm": 1.1945471162493213, "kl": 0.4417699873447418, "learning_rate": 9.957533947306593e-07, "loss": 0.00044281285954639316, "memory(GiB)": 165.8, "reward": 2.562066078186035, "reward_std": 0.19314277172088623, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5828993320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.12113423645496368, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 427, "train_speed(iter/s)": 0.02578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 387.0, "completions/min_length": 318.0, "epoch": 0.05128205128205128, "grad_norm": 1.0801182835568124, "kl": 0.40212011337280273, "learning_rate": 9.957286324552598e-07, "loss": 0.00040251266909763217, "memory(GiB)": 165.8, "reward": 2.55078125, "reward_std": 0.1983664631843567, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566414952278137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6341146230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.12881435453891754, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 428, "train_speed(iter/s)": 0.025797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 459.0, "completions/mean_length": 369.8333435058594, "completions/min_length": 317.0, "epoch": 0.0514018691588785, "grad_norm": 1.165394315282265, "kl": 0.6096844375133514, "learning_rate": 9.95703798503926e-07, "loss": 0.0006045898189768195, "memory(GiB)": 165.8, "reward": 2.255654811859131, "reward_std": 0.17854687571525574, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321254789829254, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5764881372451782, "rewards/GeoVisalEntityMatch2ORM/std": 0.1817077100276947, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 429, "train_speed(iter/s)": 0.025796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 375.46875, "completions/min_length": 288.0, "epoch": 0.05152168703570573, "grad_norm": 1.191984707288049, "kl": 0.428838312625885, "learning_rate": 9.956788928802489e-07, "loss": 0.00043068578816019, "memory(GiB)": 165.8, "reward": 2.42578125, "reward_std": 0.13828042149543762, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5924479365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.29445430636405945, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 430, "train_speed(iter/s)": 0.025813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 451.0, "completions/mean_length": 362.84375, "completions/min_length": 282.0, "epoch": 0.05164150491253295, "grad_norm": 1.098252078293901, "kl": 0.5119380354881287, "learning_rate": 9.956539155878289e-07, "loss": 0.0005103697767481208, "memory(GiB)": 165.8, "reward": 2.401186466217041, "reward_std": 0.3095856010913849, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.374634325504303, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7345196604728699, "rewards/GeoVisalEntityMatch2ORM/std": 0.14720524847507477, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 431, "train_speed(iter/s)": 0.025812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 368.625, "completions/min_length": 293.0, "epoch": 0.05176132278936017, "grad_norm": 1.109062849401477, "kl": 0.4203522503376007, "learning_rate": 9.95628866630278e-07, "loss": 0.0004215538501739502, "memory(GiB)": 165.8, "reward": 2.3282408714294434, "reward_std": 0.19871506094932556, "rewards/GeoLocAccuracyV2ORM/mean": 0.8999999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.26596397161483765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.42824074625968933, "rewards/GeoVisalEntityMatch2ORM/std": 0.15160590410232544, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 432, "train_speed(iter/s)": 0.025829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 379.13543701171875, "completions/min_length": 313.0, "epoch": 0.051881140666187396, "grad_norm": 1.1056739436054184, "kl": 0.4383791387081146, "learning_rate": 9.956037460112171e-07, "loss": 0.0004397047159727663, "memory(GiB)": 165.8, "reward": 2.5790181159973145, "reward_std": 0.18253180384635925, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6540178656578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.1530427187681198, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 433, "train_speed(iter/s)": 0.025834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 365.4583435058594, "completions/min_length": 281.0, "epoch": 0.05200095854301462, "grad_norm": 1.2244306566223124, "kl": 0.43430858850479126, "learning_rate": 9.955785537342794e-07, "loss": 0.0004353771801106632, "memory(GiB)": 165.8, "reward": 2.6468751430511475, "reward_std": 0.18028636276721954, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6677083969116211, "rewards/GeoVisalEntityMatch2ORM/std": 0.13179007172584534, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 434, "train_speed(iter/s)": 0.025852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 371.0208435058594, "completions/min_length": 313.0, "epoch": 0.05212077641984184, "grad_norm": 1.1159925920304732, "kl": 0.4448164701461792, "learning_rate": 9.955532898031068e-07, "loss": 0.0004461060161702335, "memory(GiB)": 165.8, "reward": 2.2573494911193848, "reward_std": 0.0891808569431305, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4573492407798767, "rewards/GeoVisalEntityMatch2ORM/std": 0.2292274832725525, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 435, "train_speed(iter/s)": 0.02587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 359.03125, "completions/min_length": 306.0, "epoch": 0.052240594296669066, "grad_norm": 1.125063982146586, "kl": 0.4147360622882843, "learning_rate": 9.95527954221352e-07, "loss": 0.0004149464366491884, "memory(GiB)": 165.8, "reward": 2.59704852104187, "reward_std": 0.10843908786773682, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5970486402511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.2934195101261139, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 436, "train_speed(iter/s)": 0.025887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 362.09375, "completions/min_length": 306.0, "epoch": 0.05236041217349629, "grad_norm": 1.1730752976230934, "kl": 0.3852407932281494, "learning_rate": 9.955025469926785e-07, "loss": 0.0003861015138681978, "memory(GiB)": 165.8, "reward": 2.413132429122925, "reward_std": 0.1643715500831604, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.43813246488571167, "rewards/GeoVisalEntityMatch2ORM/std": 0.21535292267799377, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 437, "train_speed(iter/s)": 0.025904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 505.0, "completions/mean_length": 387.9895935058594, "completions/min_length": 318.0, "epoch": 0.052480230050323505, "grad_norm": 1.062141629763933, "kl": 0.4336521625518799, "learning_rate": 9.954770681207597e-07, "loss": 0.0004347264766693115, "memory(GiB)": 165.8, "reward": 2.4625000953674316, "reward_std": 0.15933643281459808, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4791666865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.19709967076778412, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 438, "train_speed(iter/s)": 0.025922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 386.5208435058594, "completions/min_length": 324.0, "epoch": 0.05260004792715073, "grad_norm": 1.159847567376216, "kl": 0.3999069929122925, "learning_rate": 9.954515176092795e-07, "loss": 0.00040121874189935625, "memory(GiB)": 165.8, "reward": 2.1531198024749756, "reward_std": 0.2369820475578308, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.38707178831100464, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44478639960289, "rewards/GeoVisalEntityMatch2ORM/std": 0.12952296435832977, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 439, "train_speed(iter/s)": 0.02594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 394.97918701171875, "completions/min_length": 312.0, "epoch": 0.05271986580397795, "grad_norm": 1.1720966992071256, "kl": 0.42668475210666656, "learning_rate": 9.954258954619324e-07, "loss": 0.00042782476521097124, "memory(GiB)": 165.8, "reward": 2.3251984119415283, "reward_std": 0.25079435110092163, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5168651342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.21857628226280212, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 440, "train_speed(iter/s)": 0.025957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 362.82293701171875, "completions/min_length": 262.0, "epoch": 0.052839683680805175, "grad_norm": 1.1610366869169495, "kl": 0.4317123144865036, "learning_rate": 9.954002016824225e-07, "loss": 0.0004334238765295595, "memory(GiB)": 165.8, "reward": 2.651479959487915, "reward_std": 0.1438922882080078, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6848132014274597, "rewards/GeoVisalEntityMatch2ORM/std": 0.217197448015213, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 441, "train_speed(iter/s)": 0.025975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 486.0, "completions/mean_length": 389.46875, "completions/min_length": 335.0, "epoch": 0.0529595015576324, "grad_norm": 1.2234338046499895, "kl": 0.8562842309474945, "learning_rate": 9.953744362744654e-07, "loss": 0.0008578648557886481, "memory(GiB)": 165.8, "reward": 2.0999999046325684, "reward_std": 0.1073351725935936, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6000000238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.2454855740070343, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 442, "train_speed(iter/s)": 0.025975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 402.5, "completions/min_length": 354.0, "epoch": 0.05307931943445962, "grad_norm": 1.0871561910877299, "kl": 0.45026540756225586, "learning_rate": 9.953485992417862e-07, "loss": 0.00045204785419628024, "memory(GiB)": 165.8, "reward": 2.617140293121338, "reward_std": 0.14146381616592407, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6671401858329773, "rewards/GeoVisalEntityMatch2ORM/std": 0.2397264838218689, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 443, "train_speed(iter/s)": 0.025992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 401.3645935058594, "completions/min_length": 324.0, "epoch": 0.053199137311286844, "grad_norm": 1.0923687010210847, "kl": 0.4472932666540146, "learning_rate": 9.953226905881208e-07, "loss": 0.0004493569431360811, "memory(GiB)": 165.8, "reward": 2.4849538803100586, "reward_std": 0.13321909308433533, "rewards/GeoLocAccuracyV2ORM/mean": 0.8020833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3559949994087219, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6828703880310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.19486865401268005, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 444, "train_speed(iter/s)": 0.026008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/mean_length": 412.8125, "completions/min_length": 302.0, "epoch": 0.05331895518811407, "grad_norm": 1.0903426146081676, "kl": 0.4218621402978897, "learning_rate": 9.952967103172146e-07, "loss": 0.0004233097133692354, "memory(GiB)": 165.8, "reward": 2.5557870864868164, "reward_std": 0.16097134351730347, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7307870388031006, "rewards/GeoVisalEntityMatch2ORM/std": 0.22255627810955048, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 445, "train_speed(iter/s)": 0.026024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 529.0, "completions/mean_length": 421.9270935058594, "completions/min_length": 344.0, "epoch": 0.05343877306494129, "grad_norm": 1.1144431401578179, "kl": 0.5751583576202393, "learning_rate": 9.952706584328248e-07, "loss": 0.000561244785785675, "memory(GiB)": 165.8, "reward": 2.143078327178955, "reward_std": 0.2700011134147644, "rewards/GeoLocAccuracyV2ORM/mean": 0.6499999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128623008728, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5034947991371155, "rewards/GeoVisalEntityMatch2ORM/std": 0.2449110597372055, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 446, "train_speed(iter/s)": 0.02604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19791666666666666, "completions/max_length": 546.0, "completions/mean_length": 417.2395935058594, "completions/min_length": 333.0, "epoch": 0.053558590941768514, "grad_norm": 1.0963520518964942, "kl": 0.6063612699508667, "learning_rate": 9.952445349387177e-07, "loss": 0.0006063220789656043, "memory(GiB)": 165.8, "reward": 2.170254707336426, "reward_std": 0.36058011651039124, "rewards/GeoLocAccuracyV2ORM/mean": 0.7437500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.4252088963985443, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6244212985038757, "rewards/GeoVisalEntityMatch2ORM/std": 0.15903475880622864, "rewards/MathFormat/mean": 0.8020833730697632, "rewards/MathFormat/std": 0.4005205035209656, "step": 447, "train_speed(iter/s)": 0.026038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 506.0, "completions/mean_length": 429.4583435058594, "completions/min_length": 362.0, "epoch": 0.05367840881859574, "grad_norm": 1.7806767637914553, "kl": 2.6711975634098053, "learning_rate": 9.952183398386703e-07, "loss": 0.0026230416260659695, "memory(GiB)": 165.8, "reward": 2.274405002593994, "reward_std": 0.40324005484580994, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.374634325504303, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6077381372451782, "rewards/GeoVisalEntityMatch2ORM/std": 0.16924676299095154, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 448, "train_speed(iter/s)": 0.026041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/mean_length": 451.10418701171875, "completions/min_length": 383.0, "epoch": 0.05379822669542296, "grad_norm": 1.0513766641088458, "kl": 0.4256279766559601, "learning_rate": 9.951920731364707e-07, "loss": 0.00042641040636226535, "memory(GiB)": 165.8, "reward": 2.583035945892334, "reward_std": 0.2483915388584137, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6663690805435181, "rewards/GeoVisalEntityMatch2ORM/std": 0.15479420125484467, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 449, "train_speed(iter/s)": 0.026057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 430.84375, "completions/min_length": 362.0, "epoch": 0.05391804457225018, "grad_norm": 1.0480063130649164, "kl": 0.4280095398426056, "learning_rate": 9.95165734835916e-07, "loss": 0.00042847543954849243, "memory(GiB)": 165.8, "reward": 2.7729454040527344, "reward_std": 0.103912852704525, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7729456424713135, "rewards/GeoVisalEntityMatch2ORM/std": 0.15132379531860352, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 450, "train_speed(iter/s)": 0.026072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 527.0, "completions/mean_length": 428.54168701171875, "completions/min_length": 340.0, "epoch": 0.0540378624490774, "grad_norm": 0.933996352919317, "kl": 0.4799180328845978, "learning_rate": 9.95139324940815e-07, "loss": 0.00047976215137168765, "memory(GiB)": 165.8, "reward": 2.483234167098999, "reward_std": 0.2915028929710388, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783623337745667, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6499007940292358, "rewards/GeoVisalEntityMatch2ORM/std": 0.1507636308670044, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 451, "train_speed(iter/s)": 0.026076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/mean_length": 444.4895935058594, "completions/min_length": 361.0, "epoch": 0.05415768032590462, "grad_norm": 1.110121506712314, "kl": 0.46107810735702515, "learning_rate": 9.95112843454986e-07, "loss": 0.00046210986329242587, "memory(GiB)": 165.8, "reward": 2.4949073791503906, "reward_std": 0.11055102944374084, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5032407641410828, "rewards/GeoVisalEntityMatch2ORM/std": 0.2714958190917969, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 452, "train_speed(iter/s)": 0.026097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 505.0, "completions/mean_length": 431.21875, "completions/min_length": 363.0, "epoch": 0.054277498202731846, "grad_norm": 1.0791074295730567, "kl": 0.4515370577573776, "learning_rate": 9.950862903822575e-07, "loss": 0.00045124691678211093, "memory(GiB)": 165.8, "reward": 2.4117188453674316, "reward_std": 0.3067946434020996, "rewards/GeoLocAccuracyV2ORM/mean": 0.7854167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.36504626274108887, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6575520634651184, "rewards/GeoVisalEntityMatch2ORM/std": 0.1711249053478241, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 453, "train_speed(iter/s)": 0.02611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 524.0, "completions/mean_length": 442.76043701171875, "completions/min_length": 361.0, "epoch": 0.05439731607955907, "grad_norm": 1.0989071134640658, "kl": 0.519731879234314, "learning_rate": 9.950596657264694e-07, "loss": 0.0005195687408559024, "memory(GiB)": 165.8, "reward": 2.272817611694336, "reward_std": 0.2725561261177063, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7728174924850464, "rewards/GeoVisalEntityMatch2ORM/std": 0.1802506148815155, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 454, "train_speed(iter/s)": 0.026113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 518.0, "completions/mean_length": 453.6145935058594, "completions/min_length": 390.0, "epoch": 0.05451713395638629, "grad_norm": 1.0711677486517537, "kl": 0.5030863583087921, "learning_rate": 9.950329694914706e-07, "loss": 0.0005016922950744629, "memory(GiB)": 165.8, "reward": 2.2620038986206055, "reward_std": 0.32258614897727966, "rewards/GeoLocAccuracyV2ORM/mean": 0.7583333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.39139872789382935, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5557539463043213, "rewards/GeoVisalEntityMatch2ORM/std": 0.245164155960083, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 455, "train_speed(iter/s)": 0.026127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 506.0, "completions/mean_length": 449.34375, "completions/min_length": 373.0, "epoch": 0.054636951833213515, "grad_norm": 1.1266069116287043, "kl": 0.7007893323898315, "learning_rate": 9.950062016811216e-07, "loss": 0.0006951491232030094, "memory(GiB)": 165.8, "reward": 2.0881733894348145, "reward_std": 0.6237609386444092, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5881733894348145, "rewards/GeoVisalEntityMatch2ORM/std": 0.19217073917388916, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 456, "train_speed(iter/s)": 0.02613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 492.0, "completions/mean_length": 411.90625, "completions/min_length": 364.0, "epoch": 0.05475676971004074, "grad_norm": 1.164942077138557, "kl": 0.45331668853759766, "learning_rate": 9.949793622992922e-07, "loss": 0.0004538620705716312, "memory(GiB)": 165.8, "reward": 2.21992826461792, "reward_std": 0.061299510300159454, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7199281454086304, "rewards/GeoVisalEntityMatch2ORM/std": 0.22874441742897034, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 457, "train_speed(iter/s)": 0.026128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 533.0, "completions/mean_length": 433.63543701171875, "completions/min_length": 351.0, "epoch": 0.05487658758686796, "grad_norm": 1.147019258641516, "kl": 0.5446498095989227, "learning_rate": 9.949524513498636e-07, "loss": 0.0005417665233835578, "memory(GiB)": 165.8, "reward": 2.398073673248291, "reward_std": 0.3754774332046509, "rewards/GeoLocAccuracyV2ORM/mean": 0.8187500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.35786283016204834, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6522404551506042, "rewards/GeoVisalEntityMatch2ORM/std": 0.16134142875671387, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 458, "train_speed(iter/s)": 0.026131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 504.0, "completions/mean_length": 413.0833435058594, "completions/min_length": 352.0, "epoch": 0.054996405463695185, "grad_norm": 1.1765840246205201, "kl": 0.5261306166648865, "learning_rate": 9.94925468836726e-07, "loss": 0.0005279183387756348, "memory(GiB)": 165.8, "reward": 2.515144109725952, "reward_std": 0.25722694396972656, "rewards/GeoLocAccuracyV2ORM/mean": 0.8020833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4005205035209656, "rewards/GeoVisalEntityMatch2ORM/mean": 0.9109775424003601, "rewards/GeoVisalEntityMatch2ORM/std": 0.15104086697101593, "rewards/MathFormat/mean": 0.8020833730697632, "rewards/MathFormat/std": 0.4005205035209656, "step": 459, "train_speed(iter/s)": 0.026129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 395.79168701171875, "completions/min_length": 330.0, "epoch": 0.05511622334052241, "grad_norm": 1.1400303523232211, "kl": 0.44789254665374756, "learning_rate": 9.948984147637816e-07, "loss": 0.00044893971062265337, "memory(GiB)": 165.8, "reward": 2.755470037460327, "reward_std": 0.1389603316783905, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7888032793998718, "rewards/GeoVisalEntityMatch2ORM/std": 0.15800811350345612, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 460, "train_speed(iter/s)": 0.026145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 405.09375, "completions/min_length": 325.0, "epoch": 0.05523604121734963, "grad_norm": 1.1789210396503886, "kl": 0.4595528244972229, "learning_rate": 9.948712891349414e-07, "loss": 0.00046116983867250383, "memory(GiB)": 165.8, "reward": 2.3182871341705322, "reward_std": 0.1831827610731125, "rewards/GeoLocAccuracyV2ORM/mean": 0.7750000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.36157551407814026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5432870388031006, "rewards/GeoVisalEntityMatch2ORM/std": 0.16125304996967316, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 461, "train_speed(iter/s)": 0.02616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 402.60418701171875, "completions/min_length": 322.0, "epoch": 0.055355859094176854, "grad_norm": 1.027942778601422, "kl": 0.40588945150375366, "learning_rate": 9.948440919541277e-07, "loss": 0.0004066502151545137, "memory(GiB)": 165.8, "reward": 2.697991132736206, "reward_std": 0.11857196688652039, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7063244581222534, "rewards/GeoVisalEntityMatch2ORM/std": 0.2404276430606842, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 462, "train_speed(iter/s)": 0.026175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 416.66668701171875, "completions/min_length": 350.0, "epoch": 0.05547567697100407, "grad_norm": 1.1108424796573788, "kl": 0.4573908895254135, "learning_rate": 9.948168232252728e-07, "loss": 0.0004583224654197693, "memory(GiB)": 165.8, "reward": 2.364872694015503, "reward_std": 0.15635764598846436, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3752893805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.24034424126148224, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 463, "train_speed(iter/s)": 0.02619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 490.0, "completions/mean_length": 398.5, "completions/min_length": 327.0, "epoch": 0.055595494847831294, "grad_norm": 1.1341997579281649, "kl": 0.6725375652313232, "learning_rate": 9.947894829523193e-07, "loss": 0.0006670182337984443, "memory(GiB)": 165.8, "reward": 2.174454689025879, "reward_std": 0.4248669445514679, "rewards/GeoLocAccuracyV2ORM/mean": 0.6791667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.4299122393131256, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6411210894584656, "rewards/GeoVisalEntityMatch2ORM/std": 0.1920369267463684, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 464, "train_speed(iter/s)": 0.026188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 417.01043701171875, "completions/min_length": 358.0, "epoch": 0.05571531272465852, "grad_norm": 1.1330814224272863, "kl": 0.45866574347019196, "learning_rate": 9.947620711392204e-07, "loss": 0.0004598399100359529, "memory(GiB)": 165.8, "reward": 2.5330440998077393, "reward_std": 0.12966346740722656, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5413773059844971, "rewards/GeoVisalEntityMatch2ORM/std": 0.2159850150346756, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 465, "train_speed(iter/s)": 0.026202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 483.0, "completions/mean_length": 399.53125, "completions/min_length": 322.0, "epoch": 0.05583513060148574, "grad_norm": 1.1393501351852107, "kl": 0.4944656044244766, "learning_rate": 9.947345877899395e-07, "loss": 0.000493854284286499, "memory(GiB)": 165.8, "reward": 2.5283236503601074, "reward_std": 0.24672791361808777, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662496328354, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5741567611694336, "rewards/GeoVisalEntityMatch2ORM/std": 0.1952337622642517, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 466, "train_speed(iter/s)": 0.026215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 394.09375, "completions/min_length": 309.0, "epoch": 0.05595494847831296, "grad_norm": 1.2161013102298148, "kl": 0.46897201240062714, "learning_rate": 9.947070329084503e-07, "loss": 0.00047101452946662903, "memory(GiB)": 165.8, "reward": 2.5716147422790527, "reward_std": 0.09988022595643997, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5716146230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.2137771099805832, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 467, "train_speed(iter/s)": 0.02623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 403.66668701171875, "completions/min_length": 332.0, "epoch": 0.056074766355140186, "grad_norm": 1.1321352029399312, "kl": 0.4891234338283539, "learning_rate": 9.946794064987368e-07, "loss": 0.0004904419183731079, "memory(GiB)": 165.8, "reward": 2.606250286102295, "reward_std": 0.11391507089138031, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6145833730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.1422117054462433, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 468, "train_speed(iter/s)": 0.026245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 410.9375, "completions/min_length": 338.0, "epoch": 0.05619458423196741, "grad_norm": 1.1354801974588618, "kl": 0.4811127632856369, "learning_rate": 9.946517085647937e-07, "loss": 0.00048249465180560946, "memory(GiB)": 165.8, "reward": 2.0978009700775146, "reward_std": 0.10955491662025452, "rewards/GeoLocAccuracyV2ORM/mean": 0.5583333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.4587549567222595, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5394675731658936, "rewards/GeoVisalEntityMatch2ORM/std": 0.257472425699234, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 469, "train_speed(iter/s)": 0.02626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 416.01043701171875, "completions/min_length": 333.0, "epoch": 0.05631440210879463, "grad_norm": 1.16984844982217, "kl": 0.48904961347579956, "learning_rate": 9.946239391106254e-07, "loss": 0.0004902047803625464, "memory(GiB)": 165.8, "reward": 2.489161968231201, "reward_std": 0.18330688774585724, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.3204164206981659, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6308283805847168, "rewards/GeoVisalEntityMatch2ORM/std": 0.1886444091796875, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 470, "train_speed(iter/s)": 0.026275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 529.0, "completions/mean_length": 413.47918701171875, "completions/min_length": 347.0, "epoch": 0.056434219985621856, "grad_norm": 1.1405211279176388, "kl": 0.4688932001590729, "learning_rate": 9.94596098140247e-07, "loss": 0.00046957534505054355, "memory(GiB)": 165.8, "reward": 2.1385419368743896, "reward_std": 0.18644699454307556, "rewards/GeoLocAccuracyV2ORM/mean": 0.5395833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.45894137024879456, "rewards/GeoVisalEntityMatch2ORM/mean": 0.609375, "rewards/GeoVisalEntityMatch2ORM/std": 0.20027513802051544, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 471, "train_speed(iter/s)": 0.02629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 408.91668701171875, "completions/min_length": 316.0, "epoch": 0.05655403786244908, "grad_norm": 0.9139355038817155, "kl": 0.49691422283649445, "learning_rate": 9.945681856576843e-07, "loss": 0.000493152707349509, "memory(GiB)": 165.8, "reward": 2.5694446563720703, "reward_std": 0.1310415416955948, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5902777910232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.16695880889892578, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 472, "train_speed(iter/s)": 0.026305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 477.0, "completions/mean_length": 423.1770935058594, "completions/min_length": 339.0, "epoch": 0.0566738557392763, "grad_norm": 1.1917689581823272, "kl": 0.7269685566425323, "learning_rate": 9.945402016669728e-07, "loss": 0.0007253835792653263, "memory(GiB)": 165.8, "reward": 2.1868553161621094, "reward_std": 0.47566360235214233, "rewards/GeoLocAccuracyV2ORM/mean": 0.6708333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4504773020744324, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6410218477249146, "rewards/GeoVisalEntityMatch2ORM/std": 0.2068089097738266, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 473, "train_speed(iter/s)": 0.026314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 514.0, "completions/mean_length": 429.8645935058594, "completions/min_length": 326.0, "epoch": 0.056793673616103525, "grad_norm": 1.1853440499253454, "kl": 0.4921751320362091, "learning_rate": 9.945121461721588e-07, "loss": 0.0004943497478961945, "memory(GiB)": 165.8, "reward": 2.5962798595428467, "reward_std": 0.248063325881958, "rewards/GeoLocAccuracyV2ORM/mean": 0.7958333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4010293781757355, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8108631372451782, "rewards/GeoVisalEntityMatch2ORM/std": 0.24998360872268677, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 474, "train_speed(iter/s)": 0.026328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 512.0, "completions/mean_length": 432.5, "completions/min_length": 364.0, "epoch": 0.05691349149293075, "grad_norm": 1.0641973828947298, "kl": 0.7871008813381195, "learning_rate": 9.944840191772985e-07, "loss": 0.0007865069201216102, "memory(GiB)": 165.8, "reward": 2.321610450744629, "reward_std": 0.30444473028182983, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.3797506093978882, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6632771492004395, "rewards/GeoVisalEntityMatch2ORM/std": 0.11580801755189896, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 475, "train_speed(iter/s)": 0.026334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/mean_length": 452.07293701171875, "completions/min_length": 362.0, "epoch": 0.057033309369757965, "grad_norm": 1.0577375055231086, "kl": 0.4993923157453537, "learning_rate": 9.94455820686459e-07, "loss": 0.0005001897807233036, "memory(GiB)": 165.8, "reward": 2.421428680419922, "reward_std": 0.07369216531515121, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6714286208152771, "rewards/GeoVisalEntityMatch2ORM/std": 0.23368345201015472, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 476, "train_speed(iter/s)": 0.026353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 546.0, "completions/mean_length": 441.16668701171875, "completions/min_length": 321.0, "epoch": 0.05715312724658519, "grad_norm": 0.9359175382114345, "kl": 0.6562192142009735, "learning_rate": 9.944275507037172e-07, "loss": 0.0006391754141077399, "memory(GiB)": 165.8, "reward": 2.807725667953491, "reward_std": 0.20236043632030487, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8702256679534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.15936526656150818, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 477, "train_speed(iter/s)": 0.026367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 506.0, "completions/mean_length": 452.65625, "completions/min_length": 371.0, "epoch": 0.05727294512341241, "grad_norm": 1.0983130709313795, "kl": 0.6425648629665375, "learning_rate": 9.943992092331606e-07, "loss": 0.0006372543866746128, "memory(GiB)": 165.8, "reward": 2.4416542053222656, "reward_std": 0.3748764395713806, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.608320951461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.19546358287334442, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 478, "train_speed(iter/s)": 0.026378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 551.0, "completions/mean_length": 470.3958435058594, "completions/min_length": 374.0, "epoch": 0.057392763000239634, "grad_norm": 1.0699473794816219, "kl": 0.6141125559806824, "learning_rate": 9.943707962788872e-07, "loss": 0.0006121000042185187, "memory(GiB)": 165.8, "reward": 2.2034971714019775, "reward_std": 0.402579128742218, "rewards/GeoLocAccuracyV2ORM/mean": 0.6770833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4700457453727722, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5993303656578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.11979293078184128, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 479, "train_speed(iter/s)": 0.026391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.22916666666666666, "completions/max_length": 521.0, "completions/mean_length": 437.16668701171875, "completions/min_length": 326.0, "epoch": 0.05751258087706686, "grad_norm": 1.0831759803710805, "kl": 0.7042604684829712, "learning_rate": 9.94342311845005e-07, "loss": 0.0007043331861495972, "memory(GiB)": 165.8, "reward": 2.100818634033203, "reward_std": 0.3414536118507385, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.49332383275032043, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7799851298332214, "rewards/GeoVisalEntityMatch2ORM/std": 0.16802582144737244, "rewards/MathFormat/mean": 0.7708333730697632, "rewards/MathFormat/std": 0.4225029945373535, "step": 480, "train_speed(iter/s)": 0.026397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 552.0, "completions/mean_length": 435.60418701171875, "completions/min_length": 330.0, "epoch": 0.05763239875389408, "grad_norm": 1.1894982900698563, "kl": 1.1047324240207672, "learning_rate": 9.943137559356326e-07, "loss": 0.0011023854603990912, "memory(GiB)": 165.8, "reward": 1.7013890743255615, "reward_std": 0.3833661675453186, "rewards/GeoLocAccuracyV2ORM/mean": 0.5104166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.5025156140327454, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6909722685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.21422941982746124, "rewards/MathFormat/mean": 0.5, "rewards/MathFormat/std": 0.5026246905326843, "step": 481, "train_speed(iter/s)": 0.026394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 596.0, "completions/mean_length": 450.75, "completions/min_length": 352.0, "epoch": 0.057752216630721304, "grad_norm": 1.1381224353001884, "kl": 0.6668561398983002, "learning_rate": 9.942851285548985e-07, "loss": 0.0006628334522247314, "memory(GiB)": 165.8, "reward": 2.3786582946777344, "reward_std": 0.3280707597732544, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.670324981212616, "rewards/GeoVisalEntityMatch2ORM/std": 0.20290064811706543, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 482, "train_speed(iter/s)": 0.026396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 506.0, "completions/mean_length": 435.90625, "completions/min_length": 331.0, "epoch": 0.05787203450754853, "grad_norm": 1.0784496744053023, "kl": 0.6086403727531433, "learning_rate": 9.942564297069421e-07, "loss": 0.000603504478931427, "memory(GiB)": 165.8, "reward": 2.409266948699951, "reward_std": 0.463101863861084, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930106818675995, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5967668294906616, "rewards/GeoVisalEntityMatch2ORM/std": 0.15684927999973297, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 483, "train_speed(iter/s)": 0.026409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 530.0, "completions/mean_length": 460.1875, "completions/min_length": 392.0, "epoch": 0.05799185238437575, "grad_norm": 1.0249156269551363, "kl": 0.5587711036205292, "learning_rate": 9.942276593959132e-07, "loss": 0.0005557692493312061, "memory(GiB)": 165.8, "reward": 2.3935186862945557, "reward_std": 0.2536790072917938, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4456018805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.2017594873905182, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 484, "train_speed(iter/s)": 0.026423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4895833333333333, "completions/max_length": 488.0, "completions/mean_length": 391.0, "completions/min_length": 338.0, "epoch": 0.05811167026120297, "grad_norm": 1.2001439886572416, "kl": 0.6694897413253784, "learning_rate": 9.94198817625971e-07, "loss": 0.0006708664586767554, "memory(GiB)": 165.8, "reward": 1.831770896911621, "reward_std": 0.1866791546344757, "rewards/GeoLocAccuracyV2ORM/mean": 0.5104166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.5025156140327454, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8109375238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.15896831452846527, "rewards/MathFormat/mean": 0.5104166865348816, "rewards/MathFormat/std": 0.5025156140327454, "step": 485, "train_speed(iter/s)": 0.026418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 397.13543701171875, "completions/min_length": 330.0, "epoch": 0.058231488138030196, "grad_norm": 1.1069543519252045, "kl": 0.509984016418457, "learning_rate": 9.94169904401286e-07, "loss": 0.0005106280441395938, "memory(GiB)": 165.8, "reward": 2.2695064544677734, "reward_std": 0.17942604422569275, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4569156765937805, "rewards/GeoVisalEntityMatch2ORM/mean": 0.561173141002655, "rewards/GeoVisalEntityMatch2ORM/std": 0.2218511700630188, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 486, "train_speed(iter/s)": 0.026432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 400.78125, "completions/min_length": 347.0, "epoch": 0.05835130601485742, "grad_norm": 1.1488996607971111, "kl": 0.49904271960258484, "learning_rate": 9.941409197260383e-07, "loss": 0.0004994372720830142, "memory(GiB)": 165.8, "reward": 2.620573043823242, "reward_std": 0.15800470113754272, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6393229365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.12592177093029022, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 487, "train_speed(iter/s)": 0.026446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 388.3020935058594, "completions/min_length": 317.0, "epoch": 0.05847112389168464, "grad_norm": 1.1536805603835725, "kl": 0.5281042158603668, "learning_rate": 9.941118636044193e-07, "loss": 0.0005293041467666626, "memory(GiB)": 165.8, "reward": 2.4820313453674316, "reward_std": 0.248295396566391, "rewards/GeoLocAccuracyV2ORM/mean": 0.8104166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3853854238986969, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6716146469116211, "rewards/GeoVisalEntityMatch2ORM/std": 0.1514609456062317, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 488, "train_speed(iter/s)": 0.026461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 400.6458435058594, "completions/min_length": 305.0, "epoch": 0.05859094176851186, "grad_norm": 1.2153525330023371, "kl": 0.49673648178577423, "learning_rate": 9.940827360406296e-07, "loss": 0.0004974156618118286, "memory(GiB)": 165.8, "reward": 2.5537450313568115, "reward_std": 0.23601502180099487, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713693141937, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6620783805847168, "rewards/GeoVisalEntityMatch2ORM/std": 0.24123597145080566, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 489, "train_speed(iter/s)": 0.026475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 374.2083435058594, "completions/min_length": 295.0, "epoch": 0.05871075964533908, "grad_norm": 1.1966938077659652, "kl": 0.5346286296844482, "learning_rate": 9.94053537038881e-07, "loss": 0.0005369372665882111, "memory(GiB)": 165.8, "reward": 2.460664749145508, "reward_std": 0.18844230473041534, "rewards/GeoLocAccuracyV2ORM/mean": 0.9395833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.21739082038402557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5210813879966736, "rewards/GeoVisalEntityMatch2ORM/std": 0.1411709189414978, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 490, "train_speed(iter/s)": 0.026484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 352.82293701171875, "completions/min_length": 289.0, "epoch": 0.058830577522166305, "grad_norm": 1.2516432433218017, "kl": 0.5454743206501007, "learning_rate": 9.940242666033954e-07, "loss": 0.0005472153425216675, "memory(GiB)": 165.8, "reward": 2.6576390266418457, "reward_std": 0.11856839060783386, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6576389074325562, "rewards/GeoVisalEntityMatch2ORM/std": 0.24559174478054047, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 491, "train_speed(iter/s)": 0.026503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 379.22918701171875, "completions/min_length": 299.0, "epoch": 0.05895039539899353, "grad_norm": 1.0448195907812508, "kl": 0.5222270786762238, "learning_rate": 9.939949247384045e-07, "loss": 0.0005227526417002082, "memory(GiB)": 165.8, "reward": 2.5288195610046387, "reward_std": 0.1710529327392578, "rewards/GeoLocAccuracyV2ORM/mean": 0.668749988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4437430500984192, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8600694537162781, "rewards/GeoVisalEntityMatch2ORM/std": 0.12801861763000488, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 492, "train_speed(iter/s)": 0.026517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 424.0, "completions/mean_length": 369.3958435058594, "completions/min_length": 303.0, "epoch": 0.05907021327582075, "grad_norm": 1.017923592580425, "kl": 0.648071676492691, "learning_rate": 9.939655114481508e-07, "loss": 0.00064784538699314, "memory(GiB)": 165.8, "reward": 2.1887154579162598, "reward_std": 0.23809266090393066, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321254789829254, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6762152910232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.23004604876041412, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906978726387024, "step": 493, "train_speed(iter/s)": 0.026515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 380.5208435058594, "completions/min_length": 312.0, "epoch": 0.059190031152647975, "grad_norm": 1.1848765521795368, "kl": 0.5346008241176605, "learning_rate": 9.939360267368877e-07, "loss": 0.0005355378380045295, "memory(GiB)": 165.8, "reward": 2.437830924987793, "reward_std": 0.16450285911560059, "rewards/GeoLocAccuracyV2ORM/mean": 0.8041666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.35181236267089844, "rewards/GeoVisalEntityMatch2ORM/mean": 0.633664071559906, "rewards/GeoVisalEntityMatch2ORM/std": 0.22410395741462708, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 494, "train_speed(iter/s)": 0.026528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 398.5520935058594, "completions/min_length": 312.0, "epoch": 0.0593098490294752, "grad_norm": 1.0805033432322042, "kl": 0.4901791512966156, "learning_rate": 9.939064706088777e-07, "loss": 0.0004909411072731018, "memory(GiB)": 165.8, "reward": 2.6655008792877197, "reward_std": 0.10151512920856476, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6655009984970093, "rewards/GeoVisalEntityMatch2ORM/std": 0.14713923633098602, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 495, "train_speed(iter/s)": 0.026542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 377.4270935058594, "completions/min_length": 309.0, "epoch": 0.05942966690630242, "grad_norm": 1.1805614518574683, "kl": 0.5377985239028931, "learning_rate": 9.938768430683942e-07, "loss": 0.0005407705903053284, "memory(GiB)": 165.8, "reward": 2.3977653980255127, "reward_std": 0.26999431848526, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3252461850643158, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5685986876487732, "rewards/GeoVisalEntityMatch2ORM/std": 0.12667854130268097, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 496, "train_speed(iter/s)": 0.026555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 453.0, "completions/mean_length": 363.0625, "completions/min_length": 285.0, "epoch": 0.059549484783129644, "grad_norm": 1.2311859285256428, "kl": 0.6403509974479675, "learning_rate": 9.938471441197214e-07, "loss": 0.0006400893325917423, "memory(GiB)": 165.8, "reward": 2.2688987255096436, "reward_std": 0.26735544204711914, "rewards/GeoLocAccuracyV2ORM/mean": 0.8041666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.38605061173439026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6105655431747437, "rewards/GeoVisalEntityMatch2ORM/std": 0.16269677877426147, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 497, "train_speed(iter/s)": 0.026553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 465.0, "completions/mean_length": 394.3333435058594, "completions/min_length": 334.0, "epoch": 0.05966930265995687, "grad_norm": 1.1711647790621462, "kl": 0.7364005446434021, "learning_rate": 9.93817373767153e-07, "loss": 0.0007298787822946906, "memory(GiB)": 165.8, "reward": 2.3976852893829346, "reward_std": 0.25271904468536377, "rewards/GeoLocAccuracyV2ORM/mean": 0.8604167699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.32846662402153015, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6101852059364319, "rewards/GeoVisalEntityMatch2ORM/std": 0.13867390155792236, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 498, "train_speed(iter/s)": 0.026554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 385.3125, "completions/min_length": 333.0, "epoch": 0.05978912053678409, "grad_norm": 0.9137643522847725, "kl": 0.5391572117805481, "learning_rate": 9.937875320149935e-07, "loss": 0.0005399187793955207, "memory(GiB)": 165.8, "reward": 2.7688357830047607, "reward_std": 0.05682094022631645, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.768835723400116, "rewards/GeoVisalEntityMatch2ORM/std": 0.07197465747594833, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 499, "train_speed(iter/s)": 0.026569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 360.04168701171875, "completions/min_length": 289.0, "epoch": 0.059908938413611314, "grad_norm": 1.213880176791927, "kl": 0.5417233109474182, "learning_rate": 9.937576188675577e-07, "loss": 0.0005442686378955841, "memory(GiB)": 165.8, "reward": 2.6796875, "reward_std": 0.08996610343456268, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6796875, "rewards/GeoVisalEntityMatch2ORM/std": 0.1363089680671692, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 500, "train_speed(iter/s)": 0.026582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 365.7395935058594, "completions/min_length": 302.0, "epoch": 0.06002875629043854, "grad_norm": 1.098894306236822, "kl": 0.4969830960035324, "learning_rate": 9.937276343291708e-07, "loss": 0.0004978030920028687, "memory(GiB)": 165.8, "reward": 2.7074077129364014, "reward_std": 0.1302880346775055, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7074074745178223, "rewards/GeoVisalEntityMatch2ORM/std": 0.21454980969429016, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 501, "train_speed(iter/s)": 0.026596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 376.8958435058594, "completions/min_length": 304.0, "epoch": 0.06014857416726575, "grad_norm": 1.1411425123500483, "kl": 0.5485790371894836, "learning_rate": 9.936975784041677e-07, "loss": 0.0005493512144312263, "memory(GiB)": 165.8, "reward": 2.5653645992279053, "reward_std": 0.06502815335988998, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5653645992279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.1848158836364746, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 502, "train_speed(iter/s)": 0.02661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 370.88543701171875, "completions/min_length": 272.0, "epoch": 0.060268392044092976, "grad_norm": 1.2309491744875372, "kl": 0.5284487903118134, "learning_rate": 9.936674510968947e-07, "loss": 0.000529783486854285, "memory(GiB)": 165.8, "reward": 2.5801243782043457, "reward_std": 0.096987284719944, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5801241993904114, "rewards/GeoVisalEntityMatch2ORM/std": 0.11838973313570023, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 503, "train_speed(iter/s)": 0.026624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 478.0, "completions/mean_length": 379.8020935058594, "completions/min_length": 314.0, "epoch": 0.0603882099209202, "grad_norm": 1.1814872707476172, "kl": 0.5158532857894897, "learning_rate": 9.936372524117073e-07, "loss": 0.0005159875145182014, "memory(GiB)": 165.8, "reward": 2.4110121726989746, "reward_std": 0.22309830784797668, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.44672298431396484, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6922619342803955, "rewards/GeoVisalEntityMatch2ORM/std": 0.11572659760713577, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 504, "train_speed(iter/s)": 0.026625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 443.0, "completions/mean_length": 372.82293701171875, "completions/min_length": 329.0, "epoch": 0.06050802779774742, "grad_norm": 1.2154062387762736, "kl": 0.9012809097766876, "learning_rate": 9.936069823529722e-07, "loss": 0.0008941665291786194, "memory(GiB)": 165.8, "reward": 2.5652778148651123, "reward_std": 0.352538526058197, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8152777552604675, "rewards/GeoVisalEntityMatch2ORM/std": 0.12074734270572662, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 505, "train_speed(iter/s)": 0.026622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 391.8125, "completions/min_length": 318.0, "epoch": 0.060627845674574646, "grad_norm": 1.1687069984814353, "kl": 0.5048717260360718, "learning_rate": 9.93576640925066e-07, "loss": 0.0005037685623392463, "memory(GiB)": 165.8, "reward": 2.544010639190674, "reward_std": 0.21433866024017334, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713693141937, "rewards/GeoVisalEntityMatch2ORM/mean": 0.65234375, "rewards/GeoVisalEntityMatch2ORM/std": 0.3111492991447449, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 506, "train_speed(iter/s)": 0.026635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 368.13543701171875, "completions/min_length": 292.0, "epoch": 0.06074766355140187, "grad_norm": 1.1838422211268682, "kl": 0.5755870044231415, "learning_rate": 9.935462281323754e-07, "loss": 0.0005763024091720581, "memory(GiB)": 165.8, "reward": 2.414393901824951, "reward_std": 0.20558933913707733, "rewards/GeoLocAccuracyV2ORM/mean": 0.8458333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.36067673563957214, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5685606002807617, "rewards/GeoVisalEntityMatch2ORM/std": 0.1690179407596588, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 507, "train_speed(iter/s)": 0.026648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 398.1458435058594, "completions/min_length": 317.0, "epoch": 0.06086748142822909, "grad_norm": 1.1270366173262985, "kl": 0.5484834909439087, "learning_rate": 9.935157439792981e-07, "loss": 0.0005486036534421146, "memory(GiB)": 165.8, "reward": 2.5106163024902344, "reward_std": 0.07163676619529724, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7106162309646606, "rewards/GeoVisalEntityMatch2ORM/std": 0.12485605478286743, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 508, "train_speed(iter/s)": 0.02665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 404.7083435058594, "completions/min_length": 330.0, "epoch": 0.060987299305056315, "grad_norm": 1.234001792500901, "kl": 0.9842311143875122, "learning_rate": 9.934851884702413e-07, "loss": 0.000941013335250318, "memory(GiB)": 165.8, "reward": 2.6498141288757324, "reward_std": 0.2119176983833313, "rewards/GeoLocAccuracyV2ORM/mean": 0.9625000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.18194851279258728, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6977306604385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.16774223744869232, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 509, "train_speed(iter/s)": 0.026661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 402.04168701171875, "completions/min_length": 351.0, "epoch": 0.06110711718188354, "grad_norm": 1.146887002674205, "kl": 0.5317076444625854, "learning_rate": 9.93454561609623e-07, "loss": 0.000532130419742316, "memory(GiB)": 165.8, "reward": 2.4802000522613525, "reward_std": 0.18742525577545166, "rewards/GeoLocAccuracyV2ORM/mean": 0.8125, "rewards/GeoLocAccuracyV2ORM/std": 0.39236128330230713, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6677001714706421, "rewards/GeoVisalEntityMatch2ORM/std": 0.16373403370380402, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 510, "train_speed(iter/s)": 0.026675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 402.60418701171875, "completions/min_length": 306.0, "epoch": 0.06122693505871076, "grad_norm": 1.1588771231330721, "kl": 0.552627444267273, "learning_rate": 9.93423863401872e-07, "loss": 0.000554405152797699, "memory(GiB)": 165.8, "reward": 2.80798602104187, "reward_std": 0.10033617913722992, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8079861402511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.1305927038192749, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 511, "train_speed(iter/s)": 0.026694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 408.5208435058594, "completions/min_length": 341.0, "epoch": 0.061346752935537985, "grad_norm": 1.140034847961708, "kl": 0.5301571786403656, "learning_rate": 9.933930938514262e-07, "loss": 0.0005315641756169498, "memory(GiB)": 165.8, "reward": 2.551649570465088, "reward_std": 0.19406256079673767, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6766493320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.21250410377979279, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 512, "train_speed(iter/s)": 0.026706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 519.0, "completions/mean_length": 430.25, "completions/min_length": 341.0, "epoch": 0.06146657081236521, "grad_norm": 1.0867297694166471, "kl": 0.638647735118866, "learning_rate": 9.933622529627346e-07, "loss": 0.0006374642252922058, "memory(GiB)": 165.8, "reward": 2.1841766834259033, "reward_std": 0.3729952573776245, "rewards/GeoLocAccuracyV2ORM/mean": 0.6458333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4629235863685608, "rewards/GeoVisalEntityMatch2ORM/mean": 0.705009937286377, "rewards/GeoVisalEntityMatch2ORM/std": 0.17917034029960632, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 513, "train_speed(iter/s)": 0.026717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 552.0, "completions/mean_length": 448.84375, "completions/min_length": 357.0, "epoch": 0.061586388689192424, "grad_norm": 1.1204464994808552, "kl": 0.6008109152317047, "learning_rate": 9.933313407402567e-07, "loss": 0.0005961855640634894, "memory(GiB)": 165.8, "reward": 2.352344036102295, "reward_std": 0.34553825855255127, "rewards/GeoLocAccuracyV2ORM/mean": 0.8354166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3346574604511261, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5481771230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.21850837767124176, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 514, "train_speed(iter/s)": 0.026729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 529.0, "completions/mean_length": 429.54168701171875, "completions/min_length": 350.0, "epoch": 0.06170620656601965, "grad_norm": 1.0473423523161862, "kl": 0.5467195510864258, "learning_rate": 9.933003571884619e-07, "loss": 0.0005460754036903381, "memory(GiB)": 165.8, "reward": 2.688244342803955, "reward_std": 0.19460563361644745, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7090773582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.1584702432155609, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 515, "train_speed(iter/s)": 0.026743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 552.0, "completions/mean_length": 476.78125, "completions/min_length": 397.0, "epoch": 0.06182602444284687, "grad_norm": 1.064170685256136, "kl": 0.6508584916591644, "learning_rate": 9.932693023118298e-07, "loss": 0.0006436010589823127, "memory(GiB)": 165.8, "reward": 2.505965232849121, "reward_std": 0.4645810127258301, "rewards/GeoLocAccuracyV2ORM/mean": 0.9020833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2901829481124878, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6767985224723816, "rewards/GeoVisalEntityMatch2ORM/std": 0.17029884457588196, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 516, "train_speed(iter/s)": 0.026755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 524.0, "completions/mean_length": 466.6875, "completions/min_length": 395.0, "epoch": 0.061945842319674094, "grad_norm": 1.1801082761319306, "kl": 1.0661452412605286, "learning_rate": 9.932381761148505e-07, "loss": 0.00102967768907547, "memory(GiB)": 165.8, "reward": 2.5935020446777344, "reward_std": 0.35241419076919556, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336149215698242, "rewards/GeoVisalEntityMatch2ORM/mean": 0.697668731212616, "rewards/GeoVisalEntityMatch2ORM/std": 0.1623963713645935, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 517, "train_speed(iter/s)": 0.026768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4791666666666667, "completions/max_length": 552.0, "completions/mean_length": 470.625, "completions/min_length": 368.0, "epoch": 0.06206566019650132, "grad_norm": 1.1555822267920843, "kl": 1.1073578596115112, "learning_rate": 9.93206978602025e-07, "loss": 0.0010803652694448829, "memory(GiB)": 165.8, "reward": 1.8015623092651367, "reward_std": 0.6661809086799622, "rewards/GeoLocAccuracyV2ORM/mean": 0.5229166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.5004165172576904, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7473958730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.19542768597602844, "rewards/MathFormat/mean": 0.53125, "rewards/MathFormat/std": 0.5016420483589172, "step": 518, "train_speed(iter/s)": 0.026764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 506.0, "completions/mean_length": 451.0, "completions/min_length": 385.0, "epoch": 0.06218547807332854, "grad_norm": 1.1368399547583408, "kl": 1.185734897851944, "learning_rate": 9.931757097778636e-07, "loss": 0.0011683901539072394, "memory(GiB)": 165.8, "reward": 2.4231152534484863, "reward_std": 0.3456679582595825, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6731151342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.19960813224315643, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 519, "train_speed(iter/s)": 0.026776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3229166666666667, "completions/max_length": 506.0, "completions/mean_length": 429.78125, "completions/min_length": 336.0, "epoch": 0.06230529595015576, "grad_norm": 1.7405401228209023, "kl": 1.7174145877361298, "learning_rate": 9.93144369646887e-07, "loss": 0.0016923982184380293, "memory(GiB)": 165.8, "reward": 2.034809112548828, "reward_std": 0.5441148281097412, "rewards/GeoLocAccuracyV2ORM/mean": 0.6770833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.47004571557044983, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6702257394790649, "rewards/GeoVisalEntityMatch2ORM/std": 0.19322873651981354, "rewards/MathFormat/mean": 0.6875, "rewards/MathFormat/std": 0.4659455716609955, "step": 520, "train_speed(iter/s)": 0.026773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 552.0, "completions/mean_length": 487.6770935058594, "completions/min_length": 397.0, "epoch": 0.062425113826982986, "grad_norm": 1.0153062640495827, "kl": 0.81080362200737, "learning_rate": 9.931129582136275e-07, "loss": 0.0007958449423313141, "memory(GiB)": 165.8, "reward": 2.5553030967712402, "reward_std": 0.46732133626937866, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930107116699219, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7428030967712402, "rewards/GeoVisalEntityMatch2ORM/std": 0.17547261714935303, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930107116699219, "step": 521, "train_speed(iter/s)": 0.026784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 552.0, "completions/mean_length": 460.4375, "completions/min_length": 386.0, "epoch": 0.0625449317038102, "grad_norm": 1.0235783562248901, "kl": 0.5780656039714813, "learning_rate": 9.930814754826261e-07, "loss": 0.000573833822272718, "memory(GiB)": 165.8, "reward": 2.2574493885040283, "reward_std": 0.35173219442367554, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4649089574813843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6428661942481995, "rewards/GeoVisalEntityMatch2ORM/std": 0.12446568161249161, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 522, "train_speed(iter/s)": 0.026797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 506.0, "completions/mean_length": 414.15625, "completions/min_length": 301.0, "epoch": 0.06266474958063743, "grad_norm": 1.1919742830954192, "kl": 0.726296603679657, "learning_rate": 9.93049921458435e-07, "loss": 0.000715404748916626, "memory(GiB)": 165.8, "reward": 2.5672621726989746, "reward_std": 0.3773980438709259, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136451959609985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7130951881408691, "rewards/GeoVisalEntityMatch2ORM/std": 0.24344336986541748, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 523, "train_speed(iter/s)": 0.026797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 507.0, "completions/mean_length": 430.34375, "completions/min_length": 330.0, "epoch": 0.06278456745746465, "grad_norm": 1.1215682089775676, "kl": 0.5804945826530457, "learning_rate": 9.930182961456166e-07, "loss": 0.0005774585297331214, "memory(GiB)": 165.8, "reward": 2.5164685249328613, "reward_std": 0.23427949845790863, "rewards/GeoLocAccuracyV2ORM/mean": 0.9354166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.23304806649684906, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6123015880584717, "rewards/GeoVisalEntityMatch2ORM/std": 0.1330573409795761, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 524, "train_speed(iter/s)": 0.026809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 413.71875, "completions/min_length": 348.0, "epoch": 0.06290438533429188, "grad_norm": 1.1074907836470356, "kl": 0.5133658647537231, "learning_rate": 9.929865995487434e-07, "loss": 0.0005142515292391181, "memory(GiB)": 165.8, "reward": 2.5321760177612305, "reward_std": 0.1538034826517105, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5655093193054199, "rewards/GeoVisalEntityMatch2ORM/std": 0.10114552080631256, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 525, "train_speed(iter/s)": 0.026822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/mean_length": 396.44793701171875, "completions/min_length": 301.0, "epoch": 0.0630242032111191, "grad_norm": 1.136080384830087, "kl": 0.5228615403175354, "learning_rate": 9.92954831672398e-07, "loss": 0.0005239049787633121, "memory(GiB)": 165.8, "reward": 2.4391493797302246, "reward_std": 0.06812814623117447, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6891493201255798, "rewards/GeoVisalEntityMatch2ORM/std": 0.14873796701431274, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 526, "train_speed(iter/s)": 0.026835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 384.5208435058594, "completions/min_length": 306.0, "epoch": 0.06314402108794633, "grad_norm": 1.1375910101188018, "kl": 0.5488036274909973, "learning_rate": 9.929229925211743e-07, "loss": 0.0005492568016052246, "memory(GiB)": 165.8, "reward": 2.7753474712371826, "reward_std": 0.09929879009723663, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7753472328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.1233837828040123, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 527, "train_speed(iter/s)": 0.026848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 429.0, "completions/mean_length": 359.3333435058594, "completions/min_length": 297.0, "epoch": 0.06326383896477354, "grad_norm": 1.1762892826589473, "kl": 0.6014562845230103, "learning_rate": 9.928910820996756e-07, "loss": 0.0006011873483657837, "memory(GiB)": 165.8, "reward": 2.234938621520996, "reward_std": 0.11803257465362549, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49535536766052246, "rewards/GeoVisalEntityMatch2ORM/std": 0.24894875288009644, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 528, "train_speed(iter/s)": 0.026849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 330.15625, "completions/min_length": 282.0, "epoch": 0.06338365684160077, "grad_norm": 1.293018638309666, "kl": 0.5467047989368439, "learning_rate": 9.928591004125153e-07, "loss": 0.0005483081331476569, "memory(GiB)": 165.8, "reward": 2.3797616958618164, "reward_std": 0.2199690192937851, "rewards/GeoLocAccuracyV2ORM/mean": 0.7416666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3760365843772888, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6380952596664429, "rewards/GeoVisalEntityMatch2ORM/std": 0.2888846695423126, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 529, "train_speed(iter/s)": 0.026846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 349.35418701171875, "completions/min_length": 265.0, "epoch": 0.06350347471842799, "grad_norm": 1.2708549163860219, "kl": 0.5338581204414368, "learning_rate": 9.92827047464318e-07, "loss": 0.0005347716505639255, "memory(GiB)": 165.8, "reward": 2.2707672119140625, "reward_std": 0.20661187171936035, "rewards/GeoLocAccuracyV2ORM/mean": 0.699999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.45883145928382874, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5707672238349915, "rewards/GeoVisalEntityMatch2ORM/std": 0.18185614049434662, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 530, "train_speed(iter/s)": 0.02686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.0, "completions/mean_length": 328.375, "completions/min_length": 241.0, "epoch": 0.06362329259525522, "grad_norm": 1.2164846425567695, "kl": 0.5352540910243988, "learning_rate": 9.92794923259718e-07, "loss": 0.0005368869751691818, "memory(GiB)": 165.8, "reward": 2.573908805847168, "reward_std": 0.14796333014965057, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5739087462425232, "rewards/GeoVisalEntityMatch2ORM/std": 0.26574939489364624, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 531, "train_speed(iter/s)": 0.026874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/mean_length": 297.8958435058594, "completions/min_length": 178.0, "epoch": 0.06374311047208243, "grad_norm": 1.2661497494363514, "kl": 0.5573208630084991, "learning_rate": 9.9276272780336e-07, "loss": 0.0005589040811173618, "memory(GiB)": 165.8, "reward": 2.792187452316284, "reward_std": 0.11896447092294693, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7921874523162842, "rewards/GeoVisalEntityMatch2ORM/std": 0.18151335418224335, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 532, "train_speed(iter/s)": 0.026876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.0, "completions/mean_length": 300.41668701171875, "completions/min_length": 245.0, "epoch": 0.06386292834890965, "grad_norm": 1.2605324173237573, "kl": 0.5705428421497345, "learning_rate": 9.927304610998992e-07, "loss": 0.0005716010928153992, "memory(GiB)": 165.8, "reward": 2.5049543380737305, "reward_std": 0.1345176249742508, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5216209888458252, "rewards/GeoVisalEntityMatch2ORM/std": 0.11488498747348785, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 533, "train_speed(iter/s)": 0.026891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 308.5625, "completions/min_length": 221.0, "epoch": 0.06398274622573688, "grad_norm": 1.3386787090438552, "kl": 0.5667218863964081, "learning_rate": 9.926981231540007e-07, "loss": 0.0005681117763742805, "memory(GiB)": 165.8, "reward": 2.459226131439209, "reward_std": 0.14461766183376312, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5008928775787354, "rewards/GeoVisalEntityMatch2ORM/std": 0.24917317926883698, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 534, "train_speed(iter/s)": 0.026888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/mean_length": 302.71875, "completions/min_length": 224.0, "epoch": 0.0641025641025641, "grad_norm": 1.3315621864633025, "kl": 0.5619111955165863, "learning_rate": 9.9266571397034e-07, "loss": 0.0005637705326080322, "memory(GiB)": 165.8, "reward": 2.3665878772735596, "reward_std": 0.1575135588645935, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.541587769985199, "rewards/GeoVisalEntityMatch2ORM/std": 0.14175499975681305, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 535, "train_speed(iter/s)": 0.026901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/mean_length": 283.125, "completions/min_length": 208.0, "epoch": 0.06422238197939133, "grad_norm": 1.2435826006158928, "kl": 0.5159542858600616, "learning_rate": 9.926332335536038e-07, "loss": 0.0005167722702026367, "memory(GiB)": 165.8, "reward": 2.6861112117767334, "reward_std": 0.11734341830015182, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6861110925674438, "rewards/GeoVisalEntityMatch2ORM/std": 0.14614275097846985, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 536, "train_speed(iter/s)": 0.0269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.0, "completions/mean_length": 282.7083435058594, "completions/min_length": 199.0, "epoch": 0.06434219985621854, "grad_norm": 1.297487050030913, "kl": 0.5564651489257812, "learning_rate": 9.926006819084876e-07, "loss": 0.0005576225812546909, "memory(GiB)": 165.8, "reward": 2.555468797683716, "reward_std": 0.1340932846069336, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5638021230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.16806986927986145, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 537, "train_speed(iter/s)": 0.026915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.0, "completions/mean_length": 289.04168701171875, "completions/min_length": 208.0, "epoch": 0.06446201773304577, "grad_norm": 1.3488845091877661, "kl": 0.52564537525177, "learning_rate": 9.925680590396982e-07, "loss": 0.0005275259609334171, "memory(GiB)": 165.8, "reward": 2.567101001739502, "reward_std": 0.08451811224222183, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5671007037162781, "rewards/GeoVisalEntityMatch2ORM/std": 0.11850428581237793, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 538, "train_speed(iter/s)": 0.026929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 302.66668701171875, "completions/min_length": 257.0, "epoch": 0.06458183560987299, "grad_norm": 1.2079885037633757, "kl": 0.5916000604629517, "learning_rate": 9.925353649519528e-07, "loss": 0.0005926539888605475, "memory(GiB)": 165.8, "reward": 2.3169562816619873, "reward_std": 0.0938446968793869, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5669560432434082, "rewards/GeoVisalEntityMatch2ORM/std": 0.14275126159191132, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 539, "train_speed(iter/s)": 0.026943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/mean_length": 305.13543701171875, "completions/min_length": 251.0, "epoch": 0.06470165348670022, "grad_norm": 1.311482448470608, "kl": 0.5688731074333191, "learning_rate": 9.925025996499778e-07, "loss": 0.0005693510174751282, "memory(GiB)": 165.8, "reward": 2.5801258087158203, "reward_std": 0.13985022902488708, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5988757014274597, "rewards/GeoVisalEntityMatch2ORM/std": 0.16536836326122284, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 540, "train_speed(iter/s)": 0.026958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/mean_length": 309.26043701171875, "completions/min_length": 255.0, "epoch": 0.06482147136352744, "grad_norm": 1.0674586392276395, "kl": 0.5494218468666077, "learning_rate": 9.924697631385112e-07, "loss": 0.0005504166474565864, "memory(GiB)": 165.8, "reward": 2.6483466625213623, "reward_std": 0.20497287809848785, "rewards/GeoLocAccuracyV2ORM/mean": 0.8999999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.26596397161483765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7483466863632202, "rewards/GeoVisalEntityMatch2ORM/std": 0.19197897613048553, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 541, "train_speed(iter/s)": 0.02696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 305.1458435058594, "completions/min_length": 255.0, "epoch": 0.06494128924035467, "grad_norm": 1.3440452961221745, "kl": 0.5695317089557648, "learning_rate": 9.924368554223005e-07, "loss": 0.0005706734955310822, "memory(GiB)": 165.8, "reward": 2.672482967376709, "reward_std": 0.11124883592128754, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6724826693534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.14791987836360931, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 542, "train_speed(iter/s)": 0.026975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/mean_length": 307.84375, "completions/min_length": 262.0, "epoch": 0.06506110711718188, "grad_norm": 1.345506037078858, "kl": 0.5634509325027466, "learning_rate": 9.92403876506104e-07, "loss": 0.0005662739276885986, "memory(GiB)": 165.8, "reward": 2.703868865966797, "reward_std": 0.1269172728061676, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7122024297714233, "rewards/GeoVisalEntityMatch2ORM/std": 0.2129855453968048, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 543, "train_speed(iter/s)": 0.026973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 306.03125, "completions/min_length": 262.0, "epoch": 0.06518092499400911, "grad_norm": 1.2484651564572664, "kl": 0.559652179479599, "learning_rate": 9.923708263946897e-07, "loss": 0.0005594193935394287, "memory(GiB)": 165.8, "reward": 2.576223611831665, "reward_std": 0.08236336708068848, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.576223611831665, "rewards/GeoVisalEntityMatch2ORM/std": 0.22430384159088135, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 544, "train_speed(iter/s)": 0.026987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/mean_length": 320.01043701171875, "completions/min_length": 263.0, "epoch": 0.06530074287083633, "grad_norm": 1.3283916335807793, "kl": 0.5829790830612183, "learning_rate": 9.923377050928364e-07, "loss": 0.0005842844839207828, "memory(GiB)": 165.8, "reward": 2.5031023025512695, "reward_std": 0.1383993774652481, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002317905426, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5364354848861694, "rewards/GeoVisalEntityMatch2ORM/std": 0.1377583146095276, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 545, "train_speed(iter/s)": 0.027002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 391.0, "completions/mean_length": 319.0833435058594, "completions/min_length": 246.0, "epoch": 0.06542056074766354, "grad_norm": 1.3284572814116158, "kl": 0.5551646947860718, "learning_rate": 9.923045126053328e-07, "loss": 0.0005553936352953315, "memory(GiB)": 165.8, "reward": 2.574338674545288, "reward_std": 0.2525475025177002, "rewards/GeoLocAccuracyV2ORM/mean": 0.9395833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.21739082038402557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6451719999313354, "rewards/GeoVisalEntityMatch2ORM/std": 0.17256315052509308, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 546, "train_speed(iter/s)": 0.027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 329.96875, "completions/min_length": 273.0, "epoch": 0.06554037862449077, "grad_norm": 1.232093426413076, "kl": 0.615178108215332, "learning_rate": 9.922712489369784e-07, "loss": 0.0006170471897348762, "memory(GiB)": 165.8, "reward": 2.7020833492279053, "reward_std": 0.12892600893974304, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7104166746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.18658597767353058, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 547, "train_speed(iter/s)": 0.027006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/mean_length": 326.0833435058594, "completions/min_length": 281.0, "epoch": 0.06566019650131799, "grad_norm": 1.240191778711348, "kl": 0.5845634639263153, "learning_rate": 9.922379140925825e-07, "loss": 0.0005852754111401737, "memory(GiB)": 165.8, "reward": 2.678992986679077, "reward_std": 0.122347392141819, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6789931058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.22413243353366852, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 548, "train_speed(iter/s)": 0.027019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.0, "completions/mean_length": 334.60418701171875, "completions/min_length": 290.0, "epoch": 0.06578001437814522, "grad_norm": 1.1621575191291535, "kl": 0.5725390017032623, "learning_rate": 9.92204508076965e-07, "loss": 0.0005734165897592902, "memory(GiB)": 165.8, "reward": 2.550434112548828, "reward_std": 0.2800905704498291, "rewards/GeoLocAccuracyV2ORM/mean": 0.8187500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.34589701890945435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7316839694976807, "rewards/GeoVisalEntityMatch2ORM/std": 0.1886572241783142, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 549, "train_speed(iter/s)": 0.027021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 345.10418701171875, "completions/min_length": 286.0, "epoch": 0.06589983225497244, "grad_norm": 1.2231920840876604, "kl": 0.5738468766212463, "learning_rate": 9.92171030894956e-07, "loss": 0.0005757560720667243, "memory(GiB)": 165.8, "reward": 2.707775592803955, "reward_std": 0.09029518067836761, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7077752947807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.15080520510673523, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 550, "train_speed(iter/s)": 0.027033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 344.60418701171875, "completions/min_length": 285.0, "epoch": 0.06601965013179967, "grad_norm": 1.2339812910185932, "kl": 0.5758319795131683, "learning_rate": 9.921374825513956e-07, "loss": 0.0005777999758720398, "memory(GiB)": 165.8, "reward": 2.504852294921875, "reward_std": 0.10977214574813843, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5048522353172302, "rewards/GeoVisalEntityMatch2ORM/std": 0.19752907752990723, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 551, "train_speed(iter/s)": 0.027046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 346.0833435058594, "completions/min_length": 280.0, "epoch": 0.06613946800862688, "grad_norm": 1.3233533027243225, "kl": 0.5964938998222351, "learning_rate": 9.921038630511345e-07, "loss": 0.0005980457062833011, "memory(GiB)": 165.8, "reward": 2.7346560955047607, "reward_std": 0.17074739933013916, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7679893970489502, "rewards/GeoVisalEntityMatch2ORM/std": 0.20668473839759827, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 552, "train_speed(iter/s)": 0.027057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 344.3020935058594, "completions/min_length": 286.0, "epoch": 0.06625928588545411, "grad_norm": 1.2247168375333033, "kl": 0.5961660146713257, "learning_rate": 9.92070172399034e-07, "loss": 0.0005963742733001709, "memory(GiB)": 165.8, "reward": 2.4087963104248047, "reward_std": 0.11903506517410278, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4087963104248047, "rewards/GeoVisalEntityMatch2ORM/std": 0.15885323286056519, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 553, "train_speed(iter/s)": 0.027071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.0, "completions/mean_length": 348.1458435058594, "completions/min_length": 292.0, "epoch": 0.06637910376228133, "grad_norm": 1.2509710583003488, "kl": 0.4980600029230118, "learning_rate": 9.92036410599965e-07, "loss": 0.0005000283708795905, "memory(GiB)": 165.8, "reward": 2.4376487731933594, "reward_std": 0.1981126070022583, "rewards/GeoLocAccuracyV2ORM/mean": 0.8937500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.28425803780555725, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5438988208770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.11641914397478104, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 554, "train_speed(iter/s)": 0.027084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 346.3958435058594, "completions/min_length": 286.0, "epoch": 0.06649892163910856, "grad_norm": 1.1949827133821005, "kl": 0.6018542647361755, "learning_rate": 9.920025776588092e-07, "loss": 0.0006027718773111701, "memory(GiB)": 165.8, "reward": 2.2672953605651855, "reward_std": 0.11968090385198593, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5172953605651855, "rewards/GeoVisalEntityMatch2ORM/std": 0.16555500030517578, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 555, "train_speed(iter/s)": 0.027098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 417.0, "completions/mean_length": 360.51043701171875, "completions/min_length": 307.0, "epoch": 0.06661873951593578, "grad_norm": 1.2243779548344298, "kl": 0.8146825134754181, "learning_rate": 9.919686735804585e-07, "loss": 0.0008076379890553653, "memory(GiB)": 165.8, "reward": 2.0892114639282227, "reward_std": 0.3086310625076294, "rewards/GeoLocAccuracyV2ORM/mean": 0.6208333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.4825189709663391, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5725446939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.16890794038772583, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070801794528961, "step": 556, "train_speed(iter/s)": 0.027097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/mean_length": 350.01043701171875, "completions/min_length": 281.0, "epoch": 0.066738557392763, "grad_norm": 1.1556100484282417, "kl": 0.5483109951019287, "learning_rate": 9.919346983698146e-07, "loss": 0.0005504290456883609, "memory(GiB)": 165.8, "reward": 2.576637029647827, "reward_std": 0.14440670609474182, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5766369104385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.21341939270496368, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 557, "train_speed(iter/s)": 0.027103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 345.4895935058594, "completions/min_length": 273.0, "epoch": 0.06685837526959022, "grad_norm": 1.2462317982417161, "kl": 0.5657894313335419, "learning_rate": 9.919006520317902e-07, "loss": 0.0005675318534485996, "memory(GiB)": 165.8, "reward": 2.4662327766418457, "reward_std": 0.10995551943778992, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.666232705116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.1641327440738678, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 558, "train_speed(iter/s)": 0.027116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 379.125, "completions/min_length": 330.0, "epoch": 0.06697819314641744, "grad_norm": 1.2517751728979762, "kl": 0.5970714390277863, "learning_rate": 9.918665345713078e-07, "loss": 0.000597730278968811, "memory(GiB)": 165.8, "reward": 2.359924793243408, "reward_std": 0.16159367561340332, "rewards/GeoLocAccuracyV2ORM/mean": 0.7750000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.36157551407814026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5849248170852661, "rewards/GeoVisalEntityMatch2ORM/std": 0.17824535071849823, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 559, "train_speed(iter/s)": 0.027129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 388.15625, "completions/min_length": 330.0, "epoch": 0.06709801102324467, "grad_norm": 1.2429210698196513, "kl": 0.556084930896759, "learning_rate": 9.918323459933004e-07, "loss": 0.0005586830666288733, "memory(GiB)": 165.8, "reward": 2.5776290893554688, "reward_std": 0.13320842385292053, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5776289701461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.21943806111812592, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 560, "train_speed(iter/s)": 0.027141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.0, "completions/mean_length": 366.38543701171875, "completions/min_length": 328.0, "epoch": 0.06721782890007189, "grad_norm": 1.1175971761655004, "kl": 0.5685409903526306, "learning_rate": 9.917980863027114e-07, "loss": 0.0005702798953279853, "memory(GiB)": 165.8, "reward": 2.6086807250976562, "reward_std": 0.09198355674743652, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6086806058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.21504086256027222, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 561, "train_speed(iter/s)": 0.027142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 378.34375, "completions/min_length": 322.0, "epoch": 0.06733764677689912, "grad_norm": 1.1191636372197509, "kl": 0.586480438709259, "learning_rate": 9.91763755504494e-07, "loss": 0.0005878235097043216, "memory(GiB)": 165.8, "reward": 2.6312499046325684, "reward_std": 0.1468384861946106, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6395833492279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.19887775182724, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 562, "train_speed(iter/s)": 0.027155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 360.3958435058594, "completions/min_length": 258.0, "epoch": 0.06745746465372633, "grad_norm": 1.027991121253289, "kl": 0.59991854429245, "learning_rate": 9.917293536036122e-07, "loss": 0.0006015002727508545, "memory(GiB)": 165.8, "reward": 2.695225715637207, "reward_std": 0.05350552126765251, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.695225715637207, "rewards/GeoVisalEntityMatch2ORM/std": 0.22616411745548248, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 563, "train_speed(iter/s)": 0.027167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 367.1770935058594, "completions/min_length": 319.0, "epoch": 0.06757728253055356, "grad_norm": 1.1525525244640429, "kl": 0.558300644159317, "learning_rate": 9.9169488060504e-07, "loss": 0.0005602141609415412, "memory(GiB)": 165.8, "reward": 2.683333396911621, "reward_std": 0.18178308010101318, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7916666865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.19814413785934448, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 564, "train_speed(iter/s)": 0.027179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 362.6458435058594, "completions/min_length": 314.0, "epoch": 0.06769710040738078, "grad_norm": 1.267361888210985, "kl": 0.5869503319263458, "learning_rate": 9.916603365137616e-07, "loss": 0.0005880197277292609, "memory(GiB)": 165.8, "reward": 2.419097423553467, "reward_std": 0.1350051462650299, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6795138716697693, "rewards/GeoVisalEntityMatch2ORM/std": 0.2020433396100998, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 565, "train_speed(iter/s)": 0.027192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 481.0, "completions/mean_length": 380.625, "completions/min_length": 319.0, "epoch": 0.06781691828420801, "grad_norm": 1.9332779713656785, "kl": 0.5914437770843506, "learning_rate": 9.916257213347718e-07, "loss": 0.000591624528169632, "memory(GiB)": 165.8, "reward": 2.0628719329833984, "reward_std": 0.9102820158004761, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.4774521291255951, "rewards/GeoVisalEntityMatch2ORM/mean": 0.719122052192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.09215851128101349, "rewards/MathFormat/mean": 0.6875, "rewards/MathFormat/std": 0.4659455716609955, "step": 566, "train_speed(iter/s)": 0.027188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 373.4270935058594, "completions/min_length": 322.0, "epoch": 0.06793673616103522, "grad_norm": 2.6440961110250543, "kl": 0.5867885947227478, "learning_rate": 9.915910350730756e-07, "loss": 0.0005884816637262702, "memory(GiB)": 165.8, "reward": 1.9907615184783936, "reward_std": 0.9583905935287476, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.4774521291255951, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6782614588737488, "rewards/GeoVisalEntityMatch2ORM/std": 0.20837824046611786, "rewards/MathFormat/mean": 0.65625, "rewards/MathFormat/std": 0.4774521291255951, "step": 567, "train_speed(iter/s)": 0.027201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 379.4895935058594, "completions/min_length": 311.0, "epoch": 0.06805655403786245, "grad_norm": 1.1308072129678723, "kl": 0.5593662559986115, "learning_rate": 9.91556277733688e-07, "loss": 0.0005613590474240482, "memory(GiB)": 165.8, "reward": 2.7036209106445312, "reward_std": 0.20404434204101562, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7244543433189392, "rewards/GeoVisalEntityMatch2ORM/std": 0.19397613406181335, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 568, "train_speed(iter/s)": 0.027212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 451.0, "completions/mean_length": 377.2083435058594, "completions/min_length": 308.0, "epoch": 0.06817637191468967, "grad_norm": 1.1787250862437946, "kl": 0.6078441143035889, "learning_rate": 9.915214493216344e-07, "loss": 0.0006093333358876407, "memory(GiB)": 165.8, "reward": 2.662748098373413, "reward_std": 0.20768046379089355, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7044147253036499, "rewards/GeoVisalEntityMatch2ORM/std": 0.1632441282272339, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 569, "train_speed(iter/s)": 0.027213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 392.94793701171875, "completions/min_length": 327.0, "epoch": 0.0682961897915169, "grad_norm": 1.1575887906336486, "kl": 0.556562602519989, "learning_rate": 9.91486549841951e-07, "loss": 0.0005576014518737793, "memory(GiB)": 165.8, "reward": 2.5425596237182617, "reward_std": 0.18055105209350586, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6008929014205933, "rewards/GeoVisalEntityMatch2ORM/std": 0.2434975504875183, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 570, "train_speed(iter/s)": 0.027225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 412.57293701171875, "completions/min_length": 342.0, "epoch": 0.06841600766834412, "grad_norm": 0.9927619984488038, "kl": 0.6041633486747742, "learning_rate": 9.914515792996832e-07, "loss": 0.000904406129848212, "memory(GiB)": 165.8, "reward": 2.467365026473999, "reward_std": 0.05483953654766083, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7173649668693542, "rewards/GeoVisalEntityMatch2ORM/std": 0.11352512985467911, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 571, "train_speed(iter/s)": 0.027237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 393.25, "completions/min_length": 324.0, "epoch": 0.06853582554517133, "grad_norm": 1.239381519190124, "kl": 0.5984466969966888, "learning_rate": 9.914165376998876e-07, "loss": 0.0005990018835291266, "memory(GiB)": 165.8, "reward": 2.590625286102295, "reward_std": 0.21109244227409363, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7239583730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.2808384597301483, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 572, "train_speed(iter/s)": 0.027249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 366.75, "completions/min_length": 311.0, "epoch": 0.06865564342199856, "grad_norm": 1.2429767229705329, "kl": 0.6022721827030182, "learning_rate": 9.913814250476306e-07, "loss": 0.0006027768249623477, "memory(GiB)": 165.8, "reward": 2.597867012023926, "reward_std": 0.14109420776367188, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6145337820053101, "rewards/GeoVisalEntityMatch2ORM/std": 0.17580874264240265, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 573, "train_speed(iter/s)": 0.027261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 395.6145935058594, "completions/min_length": 332.0, "epoch": 0.06877546129882578, "grad_norm": 1.3645288214205142, "kl": 0.5997940301895142, "learning_rate": 9.913462413479894e-07, "loss": 0.0006018827552907169, "memory(GiB)": 165.8, "reward": 2.6711807250976562, "reward_std": 0.23103423416614532, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.779513955116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.16970008611679077, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 574, "train_speed(iter/s)": 0.027273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 399.6145935058594, "completions/min_length": 326.0, "epoch": 0.06889527917565301, "grad_norm": 1.2003587364611275, "kl": 0.5826109349727631, "learning_rate": 9.913109866060506e-07, "loss": 0.0005839020013809204, "memory(GiB)": 165.8, "reward": 2.54350209236145, "reward_std": 0.1764470934867859, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5935019850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.1346978396177292, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 575, "train_speed(iter/s)": 0.027284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 408.32293701171875, "completions/min_length": 345.0, "epoch": 0.06901509705248023, "grad_norm": 1.174751979030782, "kl": 0.5718098282814026, "learning_rate": 9.912756608269118e-07, "loss": 0.0005736997118219733, "memory(GiB)": 165.8, "reward": 2.6538195610046387, "reward_std": 0.09060824662446976, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6538194417953491, "rewards/GeoVisalEntityMatch2ORM/std": 0.23181812465190887, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 576, "train_speed(iter/s)": 0.027301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 411.6145935058594, "completions/min_length": 343.0, "epoch": 0.06913491492930746, "grad_norm": 1.1251561322588446, "kl": 0.5660743415355682, "learning_rate": 9.91240264015681e-07, "loss": 0.000567857176065445, "memory(GiB)": 165.8, "reward": 2.6765151023864746, "reward_std": 0.08832729607820511, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6765152215957642, "rewards/GeoVisalEntityMatch2ORM/std": 0.21090148389339447, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 577, "train_speed(iter/s)": 0.027312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 453.0, "completions/mean_length": 390.34375, "completions/min_length": 311.0, "epoch": 0.06925473280613467, "grad_norm": 1.2836691193203205, "kl": 1.7817234992980957, "learning_rate": 9.912047961774756e-07, "loss": 0.0017508318414911628, "memory(GiB)": 165.8, "reward": 2.453113555908203, "reward_std": 0.32128262519836426, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930106818675995, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6406135559082031, "rewards/GeoVisalEntityMatch2ORM/std": 0.12751369178295135, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 578, "train_speed(iter/s)": 0.027308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 463.0, "completions/mean_length": 402.8020935058594, "completions/min_length": 336.0, "epoch": 0.0693745506829619, "grad_norm": 1.241502028075721, "kl": 0.7289577424526215, "learning_rate": 9.911692573174242e-07, "loss": 0.0007243603467941284, "memory(GiB)": 165.8, "reward": 2.519564628601074, "reward_std": 0.3297158479690552, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.35023802518844604, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7445642948150635, "rewards/GeoVisalEntityMatch2ORM/std": 0.1505497694015503, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 579, "train_speed(iter/s)": 0.027308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 420.2395935058594, "completions/min_length": 331.0, "epoch": 0.06949436855978912, "grad_norm": 1.0994594819892838, "kl": 0.5869483351707458, "learning_rate": 9.911336474406648e-07, "loss": 0.0005890156025998294, "memory(GiB)": 165.8, "reward": 2.231278896331787, "reward_std": 0.1642482876777649, "rewards/GeoLocAccuracyV2ORM/mean": 0.7583333253860474, "rewards/GeoLocAccuracyV2ORM/std": 0.3692571818828583, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4729456305503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.17050497233867645, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 580, "train_speed(iter/s)": 0.027323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 477.0, "completions/mean_length": 409.46875, "completions/min_length": 344.0, "epoch": 0.06961418643661635, "grad_norm": 1.1974486457308513, "kl": 0.8070687651634216, "learning_rate": 9.910979665523468e-07, "loss": 0.0008060485124588013, "memory(GiB)": 165.8, "reward": 1.874913215637207, "reward_std": 0.3928601145744324, "rewards/GeoLocAccuracyV2ORM/mean": 0.5187500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.49551936984062195, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5540798902511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.1562676727771759, "rewards/MathFormat/mean": 0.8020833730697632, "rewards/MathFormat/std": 0.4005205035209656, "step": 581, "train_speed(iter/s)": 0.027321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 409.7083435058594, "completions/min_length": 334.0, "epoch": 0.06973400431344356, "grad_norm": 1.3056925721213732, "kl": 0.5917142927646637, "learning_rate": 9.910622146576284e-07, "loss": 0.0005935728549957275, "memory(GiB)": 165.8, "reward": 2.538541793823242, "reward_std": 0.1278592050075531, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5468750596046448, "rewards/GeoVisalEntityMatch2ORM/std": 0.1785396933555603, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 582, "train_speed(iter/s)": 0.027332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 381.71875, "completions/min_length": 302.0, "epoch": 0.0698538221902708, "grad_norm": 1.3199387558774913, "kl": 0.5865985751152039, "learning_rate": 9.910263917616792e-07, "loss": 0.000588504015468061, "memory(GiB)": 165.8, "reward": 2.413541793823242, "reward_std": 0.20842161774635315, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824827551841736, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6218750476837158, "rewards/GeoVisalEntityMatch2ORM/std": 0.1315322071313858, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 583, "train_speed(iter/s)": 0.027344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 385.76043701171875, "completions/min_length": 306.0, "epoch": 0.06997364006709801, "grad_norm": 1.041354836572059, "kl": 0.5887397229671478, "learning_rate": 9.909904978696789e-07, "loss": 0.0005897345836274326, "memory(GiB)": 165.8, "reward": 2.547858953475952, "reward_std": 0.10637505352497101, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7311921119689941, "rewards/GeoVisalEntityMatch2ORM/std": 0.19294512271881104, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 584, "train_speed(iter/s)": 0.027356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 361.8958435058594, "completions/min_length": 302.0, "epoch": 0.07009345794392523, "grad_norm": 1.2488780811813933, "kl": 0.6162400245666504, "learning_rate": 9.909545329868173e-07, "loss": 0.0006183137884363532, "memory(GiB)": 165.8, "reward": 2.470808744430542, "reward_std": 0.1551659256219864, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.654141902923584, "rewards/GeoVisalEntityMatch2ORM/std": 0.17901811003684998, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 585, "train_speed(iter/s)": 0.027368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/mean_length": 362.5520935058594, "completions/min_length": 292.0, "epoch": 0.07021327582075246, "grad_norm": 1.279290877329786, "kl": 0.614190548658371, "learning_rate": 9.90918497118294e-07, "loss": 0.0006169590051285923, "memory(GiB)": 165.8, "reward": 2.492295026779175, "reward_std": 0.21669113636016846, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5443783402442932, "rewards/GeoVisalEntityMatch2ORM/std": 0.1860385239124298, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 586, "train_speed(iter/s)": 0.027369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 396.04168701171875, "completions/min_length": 318.0, "epoch": 0.07033309369757967, "grad_norm": 1.186002313659276, "kl": 0.6512655913829803, "learning_rate": 9.908823902693198e-07, "loss": 0.0006523082847706974, "memory(GiB)": 165.8, "reward": 2.388293743133545, "reward_std": 0.23007042706012726, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.473710298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.20086970925331116, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 587, "train_speed(iter/s)": 0.02738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 388.1770935058594, "completions/min_length": 321.0, "epoch": 0.0704529115744069, "grad_norm": 1.3131555666435297, "kl": 0.6384720504283905, "learning_rate": 9.908462124451152e-07, "loss": 0.0006404271116480231, "memory(GiB)": 165.8, "reward": 2.412128210067749, "reward_std": 0.1505427360534668, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3524289131164551, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6246280074119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.1895572394132614, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 588, "train_speed(iter/s)": 0.027392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/mean_length": 384.25, "completions/min_length": 326.0, "epoch": 0.07057272945123412, "grad_norm": 1.1461446992486637, "kl": 0.5847408473491669, "learning_rate": 9.908099636509109e-07, "loss": 0.00058673822786659, "memory(GiB)": 165.8, "reward": 2.329848051071167, "reward_std": 0.09682652354240417, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5798479914665222, "rewards/GeoVisalEntityMatch2ORM/std": 0.17596344649791718, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 589, "train_speed(iter/s)": 0.027403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 383.21875, "completions/min_length": 337.0, "epoch": 0.07069254732806135, "grad_norm": 1.215906073371688, "kl": 0.6580138206481934, "learning_rate": 9.90773643891948e-07, "loss": 0.0006596322054974735, "memory(GiB)": 165.8, "reward": 2.634486198425293, "reward_std": 0.10296022891998291, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6428196430206299, "rewards/GeoVisalEntityMatch2ORM/std": 0.09338618814945221, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 590, "train_speed(iter/s)": 0.027414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 384.22918701171875, "completions/min_length": 314.0, "epoch": 0.07081236520488857, "grad_norm": 1.3063052734214615, "kl": 0.5871869623661041, "learning_rate": 9.907372531734779e-07, "loss": 0.0005881240358576179, "memory(GiB)": 165.8, "reward": 2.4060921669006348, "reward_std": 0.10250093787908554, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6560921669006348, "rewards/GeoVisalEntityMatch2ORM/std": 0.20102611184120178, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 591, "train_speed(iter/s)": 0.027426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 393.125, "completions/min_length": 308.0, "epoch": 0.0709321830817158, "grad_norm": 1.1951439233139267, "kl": 0.6016219854354858, "learning_rate": 9.907007915007624e-07, "loss": 0.0006030748481862247, "memory(GiB)": 165.8, "reward": 2.4048032760620117, "reward_std": 0.0809798389673233, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6548032164573669, "rewards/GeoVisalEntityMatch2ORM/std": 0.2528587281703949, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 592, "train_speed(iter/s)": 0.027437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 469.0, "completions/mean_length": 384.84375, "completions/min_length": 333.0, "epoch": 0.07105200095854301, "grad_norm": 1.2650079132425578, "kl": 0.7898977696895599, "learning_rate": 9.90664258879073e-07, "loss": 0.000788908451795578, "memory(GiB)": 165.8, "reward": 2.1071760654449463, "reward_std": 0.392572283744812, "rewards/GeoLocAccuracyV2ORM/mean": 0.6333333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.4501851201057434, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6821759939193726, "rewards/GeoVisalEntityMatch2ORM/std": 0.16714146733283997, "rewards/MathFormat/mean": 0.7916666865348816, "rewards/MathFormat/std": 0.40824830532073975, "step": 593, "train_speed(iter/s)": 0.027433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 397.53125, "completions/min_length": 333.0, "epoch": 0.07117181883537024, "grad_norm": 1.2568071169981978, "kl": 0.6180711388587952, "learning_rate": 9.906276553136922e-07, "loss": 0.0006197243928909302, "memory(GiB)": 165.8, "reward": 2.231734037399292, "reward_std": 0.2342994213104248, "rewards/GeoLocAccuracyV2ORM/mean": 0.6416666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4587549567222595, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5900673866271973, "rewards/GeoVisalEntityMatch2ORM/std": 0.21497775614261627, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 594, "train_speed(iter/s)": 0.027443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 403.72918701171875, "completions/min_length": 331.0, "epoch": 0.07129163671219746, "grad_norm": 1.1855606907309058, "kl": 0.6273353099822998, "learning_rate": 9.905909808099123e-07, "loss": 0.000629401474725455, "memory(GiB)": 165.8, "reward": 2.5495619773864746, "reward_std": 0.11400878429412842, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5495618581771851, "rewards/GeoVisalEntityMatch2ORM/std": 0.14030960202217102, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 595, "train_speed(iter/s)": 0.027455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 458.0, "completions/mean_length": 381.8645935058594, "completions/min_length": 319.0, "epoch": 0.07141145458902469, "grad_norm": 1.2664256728802081, "kl": 0.7036871910095215, "learning_rate": 9.90554235373036e-07, "loss": 0.0007053067674860358, "memory(GiB)": 165.8, "reward": 2.243055820465088, "reward_std": 0.10268649458885193, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7430555820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.16855652630329132, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 596, "train_speed(iter/s)": 0.02745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 416.21875, "completions/min_length": 352.0, "epoch": 0.0715312724658519, "grad_norm": 1.1220176728126399, "kl": 0.6223405003547668, "learning_rate": 9.905174190083762e-07, "loss": 0.0006239439244382083, "memory(GiB)": 165.8, "reward": 2.5154199600219727, "reward_std": 0.14930881559848785, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6487531661987305, "rewards/GeoVisalEntityMatch2ORM/std": 0.1526385098695755, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 597, "train_speed(iter/s)": 0.027456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 453.0, "completions/mean_length": 395.22918701171875, "completions/min_length": 339.0, "epoch": 0.07165109034267912, "grad_norm": 1.13158592052561, "kl": 0.6560257375240326, "learning_rate": 9.904805317212558e-07, "loss": 0.0006546514923684299, "memory(GiB)": 165.8, "reward": 2.583667755126953, "reward_std": 0.15754824876785278, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6045011281967163, "rewards/GeoVisalEntityMatch2ORM/std": 0.16321498155593872, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 598, "train_speed(iter/s)": 0.027457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 400.7708435058594, "completions/min_length": 341.0, "epoch": 0.07177090821950635, "grad_norm": 1.2438920542825862, "kl": 0.5988329946994781, "learning_rate": 9.904435735170089e-07, "loss": 0.0006002212758176029, "memory(GiB)": 165.8, "reward": 2.7013022899627686, "reward_std": 0.11955533921718597, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7096354365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.17306290566921234, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 599, "train_speed(iter/s)": 0.027474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 479.0, "completions/mean_length": 410.82293701171875, "completions/min_length": 326.0, "epoch": 0.07189072609633357, "grad_norm": 1.0263638110191389, "kl": 0.7027359306812286, "learning_rate": 9.904065444009783e-07, "loss": 0.0007008413667790592, "memory(GiB)": 165.8, "reward": 2.301041603088379, "reward_std": 0.261058509349823, "rewards/GeoLocAccuracyV2ORM/mean": 0.7375000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.434620201587677, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6677083373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.3273548483848572, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 600, "train_speed(iter/s)": 0.027474 }, { "epoch": 0.07189072609633357, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.007316468253968253, "eval_completions/max_length": 452.86309523809524, "eval_completions/mean_length": 391.2068565005348, "eval_completions/min_length": 333.5, "eval_kl": 0.7154721848311878, "eval_loss": 0.000719002797268331, "eval_reward": 2.53046620956489, "eval_reward_std": 0.15022772363209652, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.9001488180032798, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.13235520163462275, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.6377578209198657, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.1520918731222905, "eval_rewards/MathFormat/mean": 0.9925595252286821, "eval_rewards/MathFormat/std": 0.018230303589786803, "eval_runtime": 1744.3156, "eval_samples_per_second": 0.193, "eval_steps_per_second": 0.005, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 402.0520935058594, "completions/min_length": 332.0, "epoch": 0.0720105439731608, "grad_norm": 1.2223922853761975, "kl": 0.6039028167724609, "learning_rate": 9.903694443785185e-07, "loss": 0.0006049474468454719, "memory(GiB)": 165.8, "reward": 2.370002508163452, "reward_std": 0.0903356671333313, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8200024366378784, "rewards/GeoVisalEntityMatch2ORM/std": 0.10583844035863876, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 601, "train_speed(iter/s)": 0.025403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/mean_length": 391.09375, "completions/min_length": 323.0, "epoch": 0.07213036184998801, "grad_norm": 1.2016695678037244, "kl": 0.6342152655124664, "learning_rate": 9.903322734549934e-07, "loss": 0.0006351619958877563, "memory(GiB)": 165.8, "reward": 2.4725446701049805, "reward_std": 0.20406463742256165, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5975446701049805, "rewards/GeoVisalEntityMatch2ORM/std": 0.18540412187576294, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 602, "train_speed(iter/s)": 0.025416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 389.9270935058594, "completions/min_length": 331.0, "epoch": 0.07225017972681524, "grad_norm": 1.1506017632017747, "kl": 0.6344503164291382, "learning_rate": 9.902950316357778e-07, "loss": 0.0006359269609674811, "memory(GiB)": 165.8, "reward": 2.307849884033203, "reward_std": 0.09480610489845276, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5578497052192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.15641754865646362, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 603, "train_speed(iter/s)": 0.025432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 362.3645935058594, "completions/min_length": 309.0, "epoch": 0.07236999760364246, "grad_norm": 1.2725360571078088, "kl": 0.6547739505767822, "learning_rate": 9.902577189262562e-07, "loss": 0.0006555467844009399, "memory(GiB)": 165.8, "reward": 2.5276784896850586, "reward_std": 0.1732998788356781, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466571509838104, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5776786208152771, "rewards/GeoVisalEntityMatch2ORM/std": 0.13257943093776703, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 604, "train_speed(iter/s)": 0.025438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 390.1770935058594, "completions/min_length": 322.0, "epoch": 0.07248981548046969, "grad_norm": 1.1125356630851069, "kl": 0.6467363238334656, "learning_rate": 9.902203353318233e-07, "loss": 0.0006488065118901432, "memory(GiB)": 165.8, "reward": 2.5966849327087402, "reward_std": 0.17310038208961487, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6279348731040955, "rewards/GeoVisalEntityMatch2ORM/std": 0.1603434532880783, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 605, "train_speed(iter/s)": 0.025456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 370.2083435058594, "completions/min_length": 303.0, "epoch": 0.07260963335729691, "grad_norm": 1.2055075421852857, "kl": 0.6132299900054932, "learning_rate": 9.901828808578845e-07, "loss": 0.0006137291784398258, "memory(GiB)": 165.8, "reward": 2.719874382019043, "reward_std": 0.0880335345864296, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.719874382019043, "rewards/GeoVisalEntityMatch2ORM/std": 0.10000834614038467, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 606, "train_speed(iter/s)": 0.02547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 447.0, "completions/mean_length": 368.375, "completions/min_length": 296.0, "epoch": 0.07272945123412414, "grad_norm": 1.278053998555523, "kl": 0.7879885733127594, "learning_rate": 9.901453555098553e-07, "loss": 0.0007834136486053467, "memory(GiB)": 165.8, "reward": 2.304513931274414, "reward_std": 0.3847898840904236, "rewards/GeoLocAccuracyV2ORM/mean": 0.7854167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.37640380859375, "rewards/GeoVisalEntityMatch2ORM/mean": 0.592013955116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.1448998898267746, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 607, "train_speed(iter/s)": 0.025469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 365.47918701171875, "completions/min_length": 310.0, "epoch": 0.07284926911095135, "grad_norm": 1.1766167779227668, "kl": 0.6462059915065765, "learning_rate": 9.901077592931612e-07, "loss": 0.0006480937590822577, "memory(GiB)": 165.8, "reward": 2.625, "reward_std": 0.10674875974655151, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.625, "rewards/GeoVisalEntityMatch2ORM/std": 0.18761104345321655, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 608, "train_speed(iter/s)": 0.025482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 370.79168701171875, "completions/min_length": 299.0, "epoch": 0.07296908698777857, "grad_norm": 1.1783650790445999, "kl": 0.6352315843105316, "learning_rate": 9.900700922132382e-07, "loss": 0.0006379088154062629, "memory(GiB)": 165.8, "reward": 2.293402671813965, "reward_std": 0.1290205419063568, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.553819477558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.2128816395998001, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 609, "train_speed(iter/s)": 0.025495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 359.79168701171875, "completions/min_length": 297.0, "epoch": 0.0730889048646058, "grad_norm": 1.2990016844079533, "kl": 0.6582375466823578, "learning_rate": 9.900323542755325e-07, "loss": 0.0006600792403332889, "memory(GiB)": 165.8, "reward": 2.03769850730896, "reward_std": 0.11171881854534149, "rewards/GeoLocAccuracyV2ORM/mean": 0.5, "rewards/GeoLocAccuracyV2ORM/std": 0.5026246905326843, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5376984477043152, "rewards/GeoVisalEntityMatch2ORM/std": 0.1863483339548111, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 610, "train_speed(iter/s)": 0.025508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 368.0520935058594, "completions/min_length": 304.0, "epoch": 0.07320872274143302, "grad_norm": 1.1984255008064042, "kl": 0.5970586836338043, "learning_rate": 9.899945454855005e-07, "loss": 0.0005985299940221012, "memory(GiB)": 165.8, "reward": 2.5467262268066406, "reward_std": 0.12212368845939636, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.555059552192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.1789509654045105, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 611, "train_speed(iter/s)": 0.025521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 356.7708435058594, "completions/min_length": 292.0, "epoch": 0.07332854061826025, "grad_norm": 1.2874573211956728, "kl": 0.6681076288223267, "learning_rate": 9.89956665848609e-07, "loss": 0.0006693167379125953, "memory(GiB)": 165.8, "reward": 2.3886866569519043, "reward_std": 0.09934734553098679, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5886863470077515, "rewards/GeoVisalEntityMatch2ORM/std": 0.1542174071073532, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 612, "train_speed(iter/s)": 0.025534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 370.22918701171875, "completions/min_length": 315.0, "epoch": 0.07344835849508746, "grad_norm": 1.1946621434339095, "kl": 0.6197927892208099, "learning_rate": 9.899187153703345e-07, "loss": 0.0006196498870849609, "memory(GiB)": 165.8, "reward": 2.643683910369873, "reward_std": 0.0844235047698021, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.643683910369873, "rewards/GeoVisalEntityMatch2ORM/std": 0.13251067698001862, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 613, "train_speed(iter/s)": 0.025547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 432.0, "completions/mean_length": 368.8958435058594, "completions/min_length": 316.0, "epoch": 0.07356817637191469, "grad_norm": 1.319698069549986, "kl": 0.7472431659698486, "learning_rate": 9.898806940561645e-07, "loss": 0.0007360776653513312, "memory(GiB)": 165.8, "reward": 2.5105323791503906, "reward_std": 0.28438812494277954, "rewards/GeoLocAccuracyV2ORM/mean": 0.9625000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.18194851279258728, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5688657760620117, "rewards/GeoVisalEntityMatch2ORM/std": 0.2160344421863556, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 614, "train_speed(iter/s)": 0.02555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/mean_length": 370.6770935058594, "completions/min_length": 313.0, "epoch": 0.07368799424874191, "grad_norm": 1.2486470828554743, "kl": 0.6096944808959961, "learning_rate": 9.898426019115964e-07, "loss": 0.0006102224579080939, "memory(GiB)": 165.8, "reward": 2.583895444869995, "reward_std": 0.20542682707309723, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.28383341431617737, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7005621790885925, "rewards/GeoVisalEntityMatch2ORM/std": 0.15589991211891174, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 615, "train_speed(iter/s)": 0.025557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 356.76043701171875, "completions/min_length": 295.0, "epoch": 0.07380781212556914, "grad_norm": 1.1913538720247203, "kl": 0.6815016269683838, "learning_rate": 9.898044389421378e-07, "loss": 0.000682209967635572, "memory(GiB)": 165.8, "reward": 2.598116636276245, "reward_std": 0.15316978096961975, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7231166362762451, "rewards/GeoVisalEntityMatch2ORM/std": 0.1930425465106964, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 616, "train_speed(iter/s)": 0.02557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/mean_length": 355.38543701171875, "completions/min_length": 303.0, "epoch": 0.07392763000239636, "grad_norm": 1.0036976796692403, "kl": 0.6257067322731018, "learning_rate": 9.897662051533063e-07, "loss": 0.0006266050040721893, "memory(GiB)": 165.8, "reward": 2.5418403148651123, "reward_std": 0.15465578436851501, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6335070133209229, "rewards/GeoVisalEntityMatch2ORM/std": 0.2521544098854065, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 617, "train_speed(iter/s)": 0.025573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 345.35418701171875, "completions/min_length": 275.0, "epoch": 0.07404744787922359, "grad_norm": 1.1971633305584772, "kl": 0.6573269367218018, "learning_rate": 9.897279005506303e-07, "loss": 0.000658991455566138, "memory(GiB)": 165.8, "reward": 2.5639824867248535, "reward_std": 0.11735208332538605, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5723156332969666, "rewards/GeoVisalEntityMatch2ORM/std": 0.2262900024652481, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 618, "train_speed(iter/s)": 0.025586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 351.97918701171875, "completions/min_length": 259.0, "epoch": 0.0741672657560508, "grad_norm": 1.1796908813832996, "kl": 0.649831086397171, "learning_rate": 9.896895251396482e-07, "loss": 0.0006523467600345612, "memory(GiB)": 165.8, "reward": 2.565662384033203, "reward_std": 0.05323842912912369, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5656622052192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.18879972398281097, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 619, "train_speed(iter/s)": 0.025599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 345.57293701171875, "completions/min_length": 270.0, "epoch": 0.07428708363287803, "grad_norm": 1.216464363431302, "kl": 0.612434446811676, "learning_rate": 9.896510789259082e-07, "loss": 0.0006150181288830936, "memory(GiB)": 165.8, "reward": 2.5950522422790527, "reward_std": 0.18423840403556824, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.26224401593208313, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6783854365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.1457068771123886, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 620, "train_speed(iter/s)": 0.025612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 380.3333435058594, "completions/min_length": 313.0, "epoch": 0.07440690150970525, "grad_norm": 1.3195482358968578, "kl": 0.6310353577136993, "learning_rate": 9.896125619149696e-07, "loss": 0.000631699978839606, "memory(GiB)": 165.8, "reward": 2.547569751739502, "reward_std": 0.07951387017965317, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5475695133209229, "rewards/GeoVisalEntityMatch2ORM/std": 0.13470737636089325, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 621, "train_speed(iter/s)": 0.025625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 381.60418701171875, "completions/min_length": 307.0, "epoch": 0.07452671938653246, "grad_norm": 1.2109043201943916, "kl": 0.6241541802883148, "learning_rate": 9.895739741124011e-07, "loss": 0.0006263852119445801, "memory(GiB)": 165.8, "reward": 2.7963168621063232, "reward_std": 0.08446536958217621, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7963169813156128, "rewards/GeoVisalEntityMatch2ORM/std": 0.15309785306453705, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 622, "train_speed(iter/s)": 0.025637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 432.0, "completions/mean_length": 369.9270935058594, "completions/min_length": 311.0, "epoch": 0.0746465372633597, "grad_norm": 1.2091689761842517, "kl": 0.6326409578323364, "learning_rate": 9.895353155237822e-07, "loss": 0.0006335763027891517, "memory(GiB)": 165.8, "reward": 2.310185194015503, "reward_std": 0.20050111413002014, "rewards/GeoLocAccuracyV2ORM/mean": 0.8479167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.31887272000312805, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47268521785736084, "rewards/GeoVisalEntityMatch2ORM/std": 0.17709603905677795, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 623, "train_speed(iter/s)": 0.025637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 379.71875, "completions/min_length": 320.0, "epoch": 0.07476635514018691, "grad_norm": 1.14672700143323, "kl": 0.660573810338974, "learning_rate": 9.894965861547022e-07, "loss": 0.0006621654029004276, "memory(GiB)": 165.8, "reward": 2.643287181854248, "reward_std": 0.10547752678394318, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6432870626449585, "rewards/GeoVisalEntityMatch2ORM/std": 0.2137841135263443, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 624, "train_speed(iter/s)": 0.02565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 361.3645935058594, "completions/min_length": 274.0, "epoch": 0.07488617301701414, "grad_norm": 1.3392609373690105, "kl": 0.6925033330917358, "learning_rate": 9.894577860107611e-07, "loss": 0.0006947530200704932, "memory(GiB)": 165.8, "reward": 2.589409828186035, "reward_std": 0.14691200852394104, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6144097447395325, "rewards/GeoVisalEntityMatch2ORM/std": 0.2523355185985565, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 625, "train_speed(iter/s)": 0.025653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 436.0, "completions/mean_length": 362.47918701171875, "completions/min_length": 290.0, "epoch": 0.07500599089384136, "grad_norm": 1.2836119263222723, "kl": 0.6717004179954529, "learning_rate": 9.89418915097569e-07, "loss": 0.000671515881549567, "memory(GiB)": 165.8, "reward": 2.658978223800659, "reward_std": 0.18155667185783386, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6798114776611328, "rewards/GeoVisalEntityMatch2ORM/std": 0.275558203458786, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 626, "train_speed(iter/s)": 0.025653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 386.8125, "completions/min_length": 320.0, "epoch": 0.07512580877066859, "grad_norm": 1.2266737029886412, "kl": 0.6334618628025055, "learning_rate": 9.893799734207457e-07, "loss": 0.0006343710119836032, "memory(GiB)": 165.8, "reward": 2.4036459922790527, "reward_std": 0.1355312168598175, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.35293257236480713, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6119791865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.17598018050193787, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 627, "train_speed(iter/s)": 0.025666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 399.22918701171875, "completions/min_length": 338.0, "epoch": 0.0752456266474958, "grad_norm": 1.177461795891996, "kl": 0.6457783579826355, "learning_rate": 9.89340960985922e-07, "loss": 0.0006458809366449714, "memory(GiB)": 165.8, "reward": 2.379166841506958, "reward_std": 0.13463693857192993, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3791666626930237, "rewards/GeoVisalEntityMatch2ORM/std": 0.1405659019947052, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 628, "train_speed(iter/s)": 0.025678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 401.5208435058594, "completions/min_length": 317.0, "epoch": 0.07536544452432303, "grad_norm": 1.2384034433181523, "kl": 0.596103310585022, "learning_rate": 9.893018777987384e-07, "loss": 0.000597044825553894, "memory(GiB)": 165.8, "reward": 2.3981692790985107, "reward_std": 0.1483120620250702, "rewards/GeoLocAccuracyV2ORM/mean": 0.8250000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5731692314147949, "rewards/GeoVisalEntityMatch2ORM/std": 0.17710967361927032, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 629, "train_speed(iter/s)": 0.02569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 474.0, "completions/mean_length": 398.7083435058594, "completions/min_length": 344.0, "epoch": 0.07548526240115025, "grad_norm": 0.9772328643182441, "kl": 0.6185868084430695, "learning_rate": 9.892627238648462e-07, "loss": 0.0006189396372064948, "memory(GiB)": 165.8, "reward": 2.6152448654174805, "reward_std": 0.22760243713855743, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6777447462081909, "rewards/GeoVisalEntityMatch2ORM/std": 0.1514788120985031, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 630, "train_speed(iter/s)": 0.025693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 383.8020935058594, "completions/min_length": 296.0, "epoch": 0.07560508027797748, "grad_norm": 1.2228364753024363, "kl": 0.6131123602390289, "learning_rate": 9.892234991899063e-07, "loss": 0.0006158699980005622, "memory(GiB)": 165.8, "reward": 2.608217716217041, "reward_std": 0.13505063951015472, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868919670581818, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6498843431472778, "rewards/GeoVisalEntityMatch2ORM/std": 0.1475730687379837, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 631, "train_speed(iter/s)": 0.025706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 416.9270935058594, "completions/min_length": 367.0, "epoch": 0.0757248981548047, "grad_norm": 0.9379520455645173, "kl": 0.5738601088523865, "learning_rate": 9.891842037795898e-07, "loss": 0.0005749104311689734, "memory(GiB)": 165.8, "reward": 2.6107640266418457, "reward_std": 0.1139998733997345, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6274305582046509, "rewards/GeoVisalEntityMatch2ORM/std": 0.3021557927131653, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 632, "train_speed(iter/s)": 0.025717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 494.0, "completions/mean_length": 425.82293701171875, "completions/min_length": 355.0, "epoch": 0.07584471603163193, "grad_norm": 1.14716820849606, "kl": 0.6726376712322235, "learning_rate": 9.891448376395786e-07, "loss": 0.0006732146139256656, "memory(GiB)": 165.8, "reward": 1.7601239681243896, "reward_std": 0.2785433232784271, "rewards/GeoLocAccuracyV2ORM/mean": 0.4520833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.43358463048934937, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5476239919662476, "rewards/GeoVisalEntityMatch2ORM/std": 0.19501431286334991, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906975746154785, "step": 633, "train_speed(iter/s)": 0.025717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 409.22918701171875, "completions/min_length": 333.0, "epoch": 0.07596453390845914, "grad_norm": 1.0092353878888418, "kl": 0.6107733249664307, "learning_rate": 9.891054007755646e-07, "loss": 0.001208494184538722, "memory(GiB)": 165.8, "reward": 2.4286458492279053, "reward_std": 0.07274413108825684, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6286458373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.2632034122943878, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 634, "train_speed(iter/s)": 0.025729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 411.875, "completions/min_length": 353.0, "epoch": 0.07608435178528636, "grad_norm": 1.203878185066801, "kl": 0.6166938245296478, "learning_rate": 9.890658931932498e-07, "loss": 0.0006172260036692023, "memory(GiB)": 165.8, "reward": 2.4783201217651367, "reward_std": 0.1321689337491989, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6699867248535156, "rewards/GeoVisalEntityMatch2ORM/std": 0.18563801050186157, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 635, "train_speed(iter/s)": 0.025741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 413.2083435058594, "completions/min_length": 347.0, "epoch": 0.07620416966211359, "grad_norm": 1.2273510493936233, "kl": 0.5901170074939728, "learning_rate": 9.890263148983463e-07, "loss": 0.0005903914570808411, "memory(GiB)": 165.8, "reward": 2.222482681274414, "reward_std": 0.15538738667964935, "rewards/GeoLocAccuracyV2ORM/mean": 0.5666667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.46851763129234314, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6558159589767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.19366471469402313, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 636, "train_speed(iter/s)": 0.025752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.0, "completions/mean_length": 407.63543701171875, "completions/min_length": 324.0, "epoch": 0.0763239875389408, "grad_norm": 1.0321425643336426, "kl": 0.5622639954090118, "learning_rate": 9.88986665896577e-07, "loss": 0.0005631173844449222, "memory(GiB)": 165.8, "reward": 2.6944732666015625, "reward_std": 0.1481945961713791, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8028067350387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.22600597143173218, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 637, "train_speed(iter/s)": 0.025764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/mean_length": 438.44793701171875, "completions/min_length": 366.0, "epoch": 0.07644380541576803, "grad_norm": 1.132773637886615, "kl": 0.5529376864433289, "learning_rate": 9.88946946193674e-07, "loss": 0.0005543282022699714, "memory(GiB)": 165.8, "reward": 2.2815706729888916, "reward_std": 0.16077592968940735, "rewards/GeoLocAccuracyV2ORM/mean": 0.6333333253860474, "rewards/GeoLocAccuracyV2ORM/std": 0.4594428241252899, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6482372283935547, "rewards/GeoVisalEntityMatch2ORM/std": 0.14964516460895538, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 638, "train_speed(iter/s)": 0.025776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 509.0, "completions/mean_length": 438.3333435058594, "completions/min_length": 369.0, "epoch": 0.07656362329259525, "grad_norm": 1.1342318390760542, "kl": 0.6414749920368195, "learning_rate": 9.88907155795381e-07, "loss": 0.0006386464228853583, "memory(GiB)": 165.8, "reward": 2.547916889190674, "reward_std": 0.18741726875305176, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5687500238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.21578457951545715, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 639, "train_speed(iter/s)": 0.025787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 434.91668701171875, "completions/min_length": 331.0, "epoch": 0.07668344116942248, "grad_norm": 1.1962826856931492, "kl": 0.6727795600891113, "learning_rate": 9.888672947074505e-07, "loss": 0.0006696184864267707, "memory(GiB)": 165.8, "reward": 2.5556750297546387, "reward_std": 0.2697548568248749, "rewards/GeoLocAccuracyV2ORM/mean": 0.9208332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.24875126779079437, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6556750535964966, "rewards/GeoVisalEntityMatch2ORM/std": 0.26017922163009644, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 640, "train_speed(iter/s)": 0.025799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/mean_length": 433.66668701171875, "completions/min_length": 368.0, "epoch": 0.0768032590462497, "grad_norm": 1.118128280084955, "kl": 0.5469748973846436, "learning_rate": 9.888273629356465e-07, "loss": 0.00054892897605896, "memory(GiB)": 165.8, "reward": 2.490509510040283, "reward_std": 0.13382717967033386, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6821759939193726, "rewards/GeoVisalEntityMatch2ORM/std": 0.12417346239089966, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 641, "train_speed(iter/s)": 0.025814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 552.0, "completions/mean_length": 479.01043701171875, "completions/min_length": 396.0, "epoch": 0.07692307692307693, "grad_norm": 1.2024659384948313, "kl": 1.0666802525520325, "learning_rate": 9.887873604857421e-07, "loss": 0.0010521363001316786, "memory(GiB)": 165.8, "reward": 2.322197437286377, "reward_std": 0.45927321910858154, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6138641238212585, "rewards/GeoVisalEntityMatch2ORM/std": 0.22165559232234955, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 642, "train_speed(iter/s)": 0.025824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5104166666666666, "completions/max_length": 511.0, "completions/mean_length": 422.26043701171875, "completions/min_length": 368.0, "epoch": 0.07704289479990414, "grad_norm": 1.2084284532214842, "kl": 0.7277571558952332, "learning_rate": 9.887472873635216e-07, "loss": 0.0007284656167030334, "memory(GiB)": 165.8, "reward": 1.4892114400863647, "reward_std": 0.1781230866909027, "rewards/GeoLocAccuracyV2ORM/mean": 0.4895833432674408, "rewards/GeoLocAccuracyV2ORM/std": 0.5025156140327454, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5100446939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.19732443988323212, "rewards/MathFormat/mean": 0.4895833432674408, "rewards/MathFormat/std": 0.5025156140327454, "step": 643, "train_speed(iter/s)": 0.025824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 552.0, "completions/mean_length": 461.10418701171875, "completions/min_length": 393.0, "epoch": 0.07716271267673137, "grad_norm": 1.0715335697567, "kl": 0.7550158798694611, "learning_rate": 9.887071435747787e-07, "loss": 0.0007397359004244208, "memory(GiB)": 165.8, "reward": 2.67578125, "reward_std": 0.29066744446754456, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.73828125, "rewards/GeoVisalEntityMatch2ORM/std": 0.13158713281154633, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 644, "train_speed(iter/s)": 0.025834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 552.0, "completions/mean_length": 478.4895935058594, "completions/min_length": 402.0, "epoch": 0.07728253055355859, "grad_norm": 1.1279428838631302, "kl": 0.8119468092918396, "learning_rate": 9.886669291253178e-07, "loss": 0.0007992858882062137, "memory(GiB)": 165.8, "reward": 2.396122932434082, "reward_std": 0.5389748811721802, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3070802092552185, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6044560670852661, "rewards/GeoVisalEntityMatch2ORM/std": 0.11817628145217896, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 645, "train_speed(iter/s)": 0.025844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 525.0, "completions/mean_length": 438.41668701171875, "completions/min_length": 351.0, "epoch": 0.07740234843038582, "grad_norm": 1.1332067685413605, "kl": 0.6060627102851868, "learning_rate": 9.88626644020953e-07, "loss": 0.0006037528510205448, "memory(GiB)": 165.8, "reward": 2.661525249481201, "reward_std": 0.1783464103937149, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6823585033416748, "rewards/GeoVisalEntityMatch2ORM/std": 0.21250417828559875, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 646, "train_speed(iter/s)": 0.025854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3541666666666667, "completions/max_length": 538.0, "completions/mean_length": 448.78125, "completions/min_length": 367.0, "epoch": 0.07752216630721304, "grad_norm": 1.0978375720740077, "kl": 0.9272999167442322, "learning_rate": 9.8858628826751e-07, "loss": 0.000925044238101691, "memory(GiB)": 165.8, "reward": 1.9361979961395264, "reward_std": 0.5455000400543213, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.4774521291255951, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6236979365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.16535735130310059, "rewards/MathFormat/mean": 0.65625, "rewards/MathFormat/std": 0.4774521291255951, "step": 647, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/mean_length": 437.9375, "completions/min_length": 375.0, "epoch": 0.07764198418404025, "grad_norm": 1.1425834484370607, "kl": 0.5856659412384033, "learning_rate": 9.885458618708229e-07, "loss": 0.0005851791938766837, "memory(GiB)": 165.8, "reward": 2.464550495147705, "reward_std": 0.1666572242975235, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6312169432640076, "rewards/GeoVisalEntityMatch2ORM/std": 0.1733686476945877, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 648, "train_speed(iter/s)": 0.025867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 436.76043701171875, "completions/min_length": 355.0, "epoch": 0.07776180206086748, "grad_norm": 1.0205964742630642, "kl": 0.5434912741184235, "learning_rate": 9.88505364836737e-07, "loss": 0.0005448187584988773, "memory(GiB)": 165.8, "reward": 2.7306551933288574, "reward_std": 0.1345396637916565, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7514881491661072, "rewards/GeoVisalEntityMatch2ORM/std": 0.1815602034330368, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 649, "train_speed(iter/s)": 0.025877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 513.0, "completions/mean_length": 436.4583435058594, "completions/min_length": 359.0, "epoch": 0.0778816199376947, "grad_norm": 1.1995718971649694, "kl": 0.6109117865562439, "learning_rate": 9.884647971711075e-07, "loss": 0.0006123831262812018, "memory(GiB)": 165.8, "reward": 2.081423759460449, "reward_std": 0.2112690508365631, "rewards/GeoLocAccuracyV2ORM/mean": 0.5666667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.49474433064460754, "rewards/GeoVisalEntityMatch2ORM/mean": 0.514756977558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.22866937518119812, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 650, "train_speed(iter/s)": 0.025889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 421.625, "completions/min_length": 352.0, "epoch": 0.07800143781452193, "grad_norm": 1.1432860126684956, "kl": 0.6075773239135742, "learning_rate": 9.884241588798003e-07, "loss": 0.0006099840393289924, "memory(GiB)": 165.8, "reward": 2.7617766857147217, "reward_std": 0.11408698558807373, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.770110011100769, "rewards/GeoVisalEntityMatch2ORM/std": 0.1236504316329956, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 651, "train_speed(iter/s)": 0.025899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 411.5833435058594, "completions/min_length": 336.0, "epoch": 0.07812125569134915, "grad_norm": 1.0619563840504174, "kl": 0.5712612569332123, "learning_rate": 9.88383449968691e-07, "loss": 0.0005730241537094116, "memory(GiB)": 165.8, "reward": 2.675445079803467, "reward_std": 0.1644170582294464, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662347316742, "rewards/GeoVisalEntityMatch2ORM/mean": 0.71086186170578, "rewards/GeoVisalEntityMatch2ORM/std": 0.2571263611316681, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 652, "train_speed(iter/s)": 0.02591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 377.0833435058594, "completions/min_length": 320.0, "epoch": 0.07824107356817638, "grad_norm": 1.1530309450687495, "kl": 0.5951846241950989, "learning_rate": 9.883426704436656e-07, "loss": 0.0005965555901639163, "memory(GiB)": 165.8, "reward": 2.5895090103149414, "reward_std": 0.1714429259300232, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6978422999382019, "rewards/GeoVisalEntityMatch2ORM/std": 0.2535507380962372, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 653, "train_speed(iter/s)": 0.02592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 395.88543701171875, "completions/min_length": 312.0, "epoch": 0.07836089144500359, "grad_norm": 1.0376480883756964, "kl": 0.5986058712005615, "learning_rate": 9.8830182031062e-07, "loss": 0.0006000375142320991, "memory(GiB)": 165.8, "reward": 2.627199172973633, "reward_std": 0.0677829459309578, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6271990537643433, "rewards/GeoVisalEntityMatch2ORM/std": 0.19886532425880432, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 654, "train_speed(iter/s)": 0.025931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 359.6875, "completions/min_length": 292.0, "epoch": 0.07848070932183082, "grad_norm": 1.27994586335381, "kl": 0.571349561214447, "learning_rate": 9.882608995754613e-07, "loss": 0.0005724877119064331, "memory(GiB)": 165.8, "reward": 2.566145896911621, "reward_std": 0.21711455285549164, "rewards/GeoLocAccuracyV2ORM/mean": 0.9291666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.2546066641807556, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6369792222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.15410946309566498, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 655, "train_speed(iter/s)": 0.025934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 389.51043701171875, "completions/min_length": 311.0, "epoch": 0.07860052719865804, "grad_norm": 1.2744925915972023, "kl": 0.6315353512763977, "learning_rate": 9.882199082441057e-07, "loss": 0.0006334533682093024, "memory(GiB)": 165.8, "reward": 2.511183261871338, "reward_std": 0.08594471216201782, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5111832618713379, "rewards/GeoVisalEntityMatch2ORM/std": 0.19853799045085907, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 656, "train_speed(iter/s)": 0.025944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 386.21875, "completions/min_length": 323.0, "epoch": 0.07872034507548527, "grad_norm": 1.1614188127783873, "kl": 0.6345366537570953, "learning_rate": 9.881788463224797e-07, "loss": 0.0006362696876749396, "memory(GiB)": 165.8, "reward": 2.6233863830566406, "reward_std": 0.09798691421747208, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6233863830566406, "rewards/GeoVisalEntityMatch2ORM/std": 0.14983028173446655, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 657, "train_speed(iter/s)": 0.025955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 373.1770935058594, "completions/min_length": 295.0, "epoch": 0.07884016295231248, "grad_norm": 1.146834316101856, "kl": 0.5713452398777008, "learning_rate": 9.881377138165207e-07, "loss": 0.0005722641944885254, "memory(GiB)": 165.8, "reward": 2.701748847961426, "reward_std": 0.119493268430233, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7017485499382019, "rewards/GeoVisalEntityMatch2ORM/std": 0.14904417097568512, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 658, "train_speed(iter/s)": 0.025967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 361.26043701171875, "completions/min_length": 316.0, "epoch": 0.07895998082913971, "grad_norm": 1.2092983248025237, "kl": 0.6010385453701019, "learning_rate": 9.880965107321759e-07, "loss": 0.0006020069122314453, "memory(GiB)": 165.8, "reward": 2.5637154579162598, "reward_std": 0.14449051022529602, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5637153387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.19562457501888275, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 659, "train_speed(iter/s)": 0.025977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 357.60418701171875, "completions/min_length": 288.0, "epoch": 0.07907979870596693, "grad_norm": 0.9861052084415268, "kl": 0.582626223564148, "learning_rate": 9.880552370754027e-07, "loss": 0.0005839007790200412, "memory(GiB)": 165.8, "reward": 2.6811342239379883, "reward_std": 0.10902468860149384, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.699884295463562, "rewards/GeoVisalEntityMatch2ORM/std": 0.19395339488983154, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 660, "train_speed(iter/s)": 0.025988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 372.34375, "completions/min_length": 315.0, "epoch": 0.07919961658279415, "grad_norm": 1.2134361225187433, "kl": 0.6390474736690521, "learning_rate": 9.880138928521686e-07, "loss": 0.0006419519777409732, "memory(GiB)": 165.8, "reward": 2.7519679069519043, "reward_std": 0.08846861124038696, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7519676089286804, "rewards/GeoVisalEntityMatch2ORM/std": 0.11860675364732742, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 661, "train_speed(iter/s)": 0.026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 346.57293701171875, "completions/min_length": 289.0, "epoch": 0.07931943445962138, "grad_norm": 1.1459982785736138, "kl": 0.5885278582572937, "learning_rate": 9.879724780684517e-07, "loss": 0.0005891323089599609, "memory(GiB)": 165.8, "reward": 2.5870041847229004, "reward_std": 0.11947622150182724, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6036706566810608, "rewards/GeoVisalEntityMatch2ORM/std": 0.2654910385608673, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 662, "train_speed(iter/s)": 0.026015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 359.01043701171875, "completions/min_length": 306.0, "epoch": 0.0794392523364486, "grad_norm": 1.2005299960005682, "kl": 0.6252503097057343, "learning_rate": 9.879309927302399e-07, "loss": 0.0006256401538848877, "memory(GiB)": 165.8, "reward": 2.726996898651123, "reward_std": 0.09252528846263885, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7269966006278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.15936526656150818, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 663, "train_speed(iter/s)": 0.026026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 375.35418701171875, "completions/min_length": 294.0, "epoch": 0.07955907021327582, "grad_norm": 1.1559379472227878, "kl": 0.618010401725769, "learning_rate": 9.878894368435314e-07, "loss": 0.0006193965673446655, "memory(GiB)": 165.8, "reward": 2.763310194015503, "reward_std": 0.10093165934085846, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7633101940155029, "rewards/GeoVisalEntityMatch2ORM/std": 0.17974872887134552, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 664, "train_speed(iter/s)": 0.026036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 442.0, "completions/mean_length": 386.5, "completions/min_length": 330.0, "epoch": 0.07967888809010304, "grad_norm": 1.2620785151759237, "kl": 1.2096138000488281, "learning_rate": 9.878478104143345e-07, "loss": 0.001177862286567688, "memory(GiB)": 165.8, "reward": 2.555945873260498, "reward_std": 0.30150166153907776, "rewards/GeoLocAccuracyV2ORM/mean": 0.8354167342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.3346574902534485, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7517790794372559, "rewards/GeoVisalEntityMatch2ORM/std": 0.09002312272787094, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 665, "train_speed(iter/s)": 0.026038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 383.69793701171875, "completions/min_length": 335.0, "epoch": 0.07979870596693027, "grad_norm": 1.2518143958339043, "kl": 0.6555992364883423, "learning_rate": 9.878061134486684e-07, "loss": 0.0006576826563104987, "memory(GiB)": 165.8, "reward": 2.5922508239746094, "reward_std": 0.09162628650665283, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5922507047653198, "rewards/GeoVisalEntityMatch2ORM/std": 0.17608016729354858, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 666, "train_speed(iter/s)": 0.026048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 387.46875, "completions/min_length": 335.0, "epoch": 0.07991852384375749, "grad_norm": 1.115806611109728, "kl": 0.6332187354564667, "learning_rate": 9.877643459525612e-07, "loss": 0.000634215772151947, "memory(GiB)": 165.8, "reward": 2.6839656829833984, "reward_std": 0.1821550577878952, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336146235466003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7360491156578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.12537634372711182, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 667, "train_speed(iter/s)": 0.026059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 472.0, "completions/mean_length": 388.3645935058594, "completions/min_length": 326.0, "epoch": 0.08003834172058472, "grad_norm": 1.157584299973688, "kl": 1.129172831773758, "learning_rate": 9.877225079320525e-07, "loss": 0.0011248167138546705, "memory(GiB)": 165.8, "reward": 2.1206154823303223, "reward_std": 0.30984601378440857, "rewards/GeoLocAccuracyV2ORM/mean": 0.6041666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4915960431098938, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6622819304466248, "rewards/GeoVisalEntityMatch2ORM/std": 0.13694585859775543, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 668, "train_speed(iter/s)": 0.026058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 427.5833435058594, "completions/min_length": 358.0, "epoch": 0.08015815959741193, "grad_norm": 1.1813181621788087, "kl": 0.5695368945598602, "learning_rate": 9.876805993931913e-07, "loss": 0.000570575415622443, "memory(GiB)": 165.8, "reward": 2.644524574279785, "reward_std": 0.2432563155889511, "rewards/GeoLocAccuracyV2ORM/mean": 0.9104167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.2657577693462372, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7341079711914062, "rewards/GeoVisalEntityMatch2ORM/std": 0.12213897705078125, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 669, "train_speed(iter/s)": 0.026068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 406.6770935058594, "completions/min_length": 362.0, "epoch": 0.08027797747423916, "grad_norm": 1.2740098584421302, "kl": 0.5946835875511169, "learning_rate": 9.876386203420372e-07, "loss": 0.0005951722851023078, "memory(GiB)": 165.8, "reward": 2.526339292526245, "reward_std": 0.15472692251205444, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713693141937, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6346726417541504, "rewards/GeoVisalEntityMatch2ORM/std": 0.12682071328163147, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 670, "train_speed(iter/s)": 0.026079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 404.51043701171875, "completions/min_length": 343.0, "epoch": 0.08039779535106638, "grad_norm": 1.1080156841230575, "kl": 0.631158173084259, "learning_rate": 9.875965707846595e-07, "loss": 0.0006332124467007816, "memory(GiB)": 165.8, "reward": 2.733631134033203, "reward_std": 0.08868320286273956, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7336310148239136, "rewards/GeoVisalEntityMatch2ORM/std": 0.1131162941455841, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 671, "train_speed(iter/s)": 0.026089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 463.0, "completions/mean_length": 385.875, "completions/min_length": 344.0, "epoch": 0.08051761322789361, "grad_norm": 1.1689050733159574, "kl": 0.7292651832103729, "learning_rate": 9.875544507271382e-07, "loss": 0.0007293274393305182, "memory(GiB)": 165.8, "reward": 1.9565972089767456, "reward_std": 0.3011114299297333, "rewards/GeoLocAccuracyV2ORM/mean": 0.5604166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.4921444058418274, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5940972566604614, "rewards/GeoVisalEntityMatch2ORM/std": 0.19560982286930084, "rewards/MathFormat/mean": 0.8020833730697632, "rewards/MathFormat/std": 0.4005205035209656, "step": 672, "train_speed(iter/s)": 0.026087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 405.21875, "completions/min_length": 349.0, "epoch": 0.08063743110472082, "grad_norm": 1.1191187305399843, "kl": 0.6240475177764893, "learning_rate": 9.875122601755635e-07, "loss": 0.0006254290929064155, "memory(GiB)": 165.8, "reward": 2.584904193878174, "reward_std": 0.09059765934944153, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.584904134273529, "rewards/GeoVisalEntityMatch2ORM/std": 0.13775402307510376, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 673, "train_speed(iter/s)": 0.026098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 408.8333435058594, "completions/min_length": 359.0, "epoch": 0.08075724898154804, "grad_norm": 0.9567044734121763, "kl": 0.632595419883728, "learning_rate": 9.874699991360354e-07, "loss": 0.0006285732379183173, "memory(GiB)": 165.8, "reward": 2.475094795227051, "reward_std": 0.18814830482006073, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4120977520942688, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7021780014038086, "rewards/GeoVisalEntityMatch2ORM/std": 0.27296045422554016, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 674, "train_speed(iter/s)": 0.026107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 505.0, "completions/mean_length": 434.3958435058594, "completions/min_length": 373.0, "epoch": 0.08087706685837527, "grad_norm": 1.1272414937236883, "kl": 0.6238751113414764, "learning_rate": 9.874276676146642e-07, "loss": 0.0006236409535631537, "memory(GiB)": 165.8, "reward": 2.69136905670166, "reward_std": 0.1688661128282547, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7122024297714233, "rewards/GeoVisalEntityMatch2ORM/std": 0.2353050857782364, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 675, "train_speed(iter/s)": 0.026116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.0, "completions/mean_length": 436.1770935058594, "completions/min_length": 361.0, "epoch": 0.08099688473520249, "grad_norm": 1.0973510281998229, "kl": 0.6051430106163025, "learning_rate": 9.873852656175707e-07, "loss": 0.0006060203304514289, "memory(GiB)": 165.8, "reward": 2.62911319732666, "reward_std": 0.11184007674455643, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.629112958908081, "rewards/GeoVisalEntityMatch2ORM/std": 0.19771812856197357, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 676, "train_speed(iter/s)": 0.026127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 418.9895935058594, "completions/min_length": 367.0, "epoch": 0.08111670261202972, "grad_norm": 1.2242338682602492, "kl": 0.6410135328769684, "learning_rate": 9.873427931508858e-07, "loss": 0.0006423990125767887, "memory(GiB)": 165.8, "reward": 2.4715030193328857, "reward_std": 0.0907549187541008, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6715030074119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.09768202155828476, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 677, "train_speed(iter/s)": 0.026137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 517.0, "completions/mean_length": 406.4583435058594, "completions/min_length": 340.0, "epoch": 0.08123652048885693, "grad_norm": 1.1661925119474785, "kl": 0.7102248668670654, "learning_rate": 9.873002502207502e-07, "loss": 0.0007115776534192264, "memory(GiB)": 165.8, "reward": 2.188368320465088, "reward_std": 0.08523590862751007, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6883680820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.10418494045734406, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 678, "train_speed(iter/s)": 0.026137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 494.0, "completions/mean_length": 442.3020935058594, "completions/min_length": 372.0, "epoch": 0.08135633836568416, "grad_norm": 1.0537218589877524, "kl": 0.577081561088562, "learning_rate": 9.87257636833315e-07, "loss": 0.0005782073130831122, "memory(GiB)": 165.8, "reward": 2.5256283283233643, "reward_std": 0.3346273899078369, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930107116699219, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7131283283233643, "rewards/GeoVisalEntityMatch2ORM/std": 0.22332488000392914, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930107116699219, "step": 679, "train_speed(iter/s)": 0.026145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 447.28125, "completions/min_length": 398.0, "epoch": 0.08147615624251138, "grad_norm": 1.0664857494642834, "kl": 0.6577119827270508, "learning_rate": 9.872149529947416e-07, "loss": 0.0006539151072502136, "memory(GiB)": 165.8, "reward": 2.5375001430511475, "reward_std": 0.2988091707229614, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6000000238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.19135940074920654, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 680, "train_speed(iter/s)": 0.026155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/mean_length": 444.3020935058594, "completions/min_length": 384.0, "epoch": 0.08159597411933861, "grad_norm": 1.1481068494228674, "kl": 0.5750840306282043, "learning_rate": 9.871721987112017e-07, "loss": 0.0005767941474914551, "memory(GiB)": 165.8, "reward": 2.5389881134033203, "reward_std": 0.1609782576560974, "rewards/GeoLocAccuracyV2ORM/mean": 0.8500000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.3138890266418457, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6889881491661072, "rewards/GeoVisalEntityMatch2ORM/std": 0.2259424477815628, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 681, "train_speed(iter/s)": 0.026165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 443.5520935058594, "completions/min_length": 387.0, "epoch": 0.08171579199616583, "grad_norm": 1.1631274748552782, "kl": 0.6256416141986847, "learning_rate": 9.87129373988877e-07, "loss": 0.000624368607532233, "memory(GiB)": 165.8, "reward": 2.5536746978759766, "reward_std": 0.1782793402671814, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5745081305503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.2017640620470047, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 682, "train_speed(iter/s)": 0.026175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 458.22918701171875, "completions/min_length": 376.0, "epoch": 0.08183560987299306, "grad_norm": 1.0748923963443324, "kl": 0.5905427038669586, "learning_rate": 9.870864788339593e-07, "loss": 0.0005912681808695197, "memory(GiB)": 165.8, "reward": 2.517477035522461, "reward_std": 0.24989131093025208, "rewards/GeoLocAccuracyV2ORM/mean": 0.9708333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.16410307586193085, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5570602416992188, "rewards/GeoVisalEntityMatch2ORM/std": 0.1788754165172577, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 683, "train_speed(iter/s)": 0.026184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 533.0, "completions/mean_length": 471.1145935058594, "completions/min_length": 422.0, "epoch": 0.08195542774982027, "grad_norm": 1.052127980105904, "kl": 0.591623455286026, "learning_rate": 9.870435132526505e-07, "loss": 0.0005924677243456244, "memory(GiB)": 165.8, "reward": 2.5369791984558105, "reward_std": 0.17172452807426453, "rewards/GeoLocAccuracyV2ORM/mean": 0.8500000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.3138890266418457, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6869792342185974, "rewards/GeoVisalEntityMatch2ORM/std": 0.15563258528709412, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 684, "train_speed(iter/s)": 0.026194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 552.0, "completions/mean_length": 450.01043701171875, "completions/min_length": 364.0, "epoch": 0.08207524562664749, "grad_norm": 0.8458999443590512, "kl": 0.6582177579402924, "learning_rate": 9.870004772511632e-07, "loss": 0.0006529850652441382, "memory(GiB)": 165.8, "reward": 2.5399928092956543, "reward_std": 0.34949439764022827, "rewards/GeoLocAccuracyV2ORM/mean": 0.8604166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.32846665382385254, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7524926066398621, "rewards/GeoVisalEntityMatch2ORM/std": 0.2097170352935791, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 685, "train_speed(iter/s)": 0.026204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 505.0, "completions/mean_length": 425.6875, "completions/min_length": 346.0, "epoch": 0.08219506350347472, "grad_norm": 1.1610345771222232, "kl": 0.5378772020339966, "learning_rate": 9.869573708357197e-07, "loss": 0.0005388682475313544, "memory(GiB)": 165.8, "reward": 2.242447853088379, "reward_std": 0.283145010471344, "rewards/GeoLocAccuracyV2ORM/mean": 0.5562500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.46084874868392944, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6966146230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.19267497956752777, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 686, "train_speed(iter/s)": 0.026214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 417.8958435058594, "completions/min_length": 364.0, "epoch": 0.08231488138030194, "grad_norm": 1.076513715294533, "kl": 0.5823171734809875, "learning_rate": 9.869141940125529e-07, "loss": 0.0005839914083480835, "memory(GiB)": 165.8, "reward": 2.6931052207946777, "reward_std": 0.10955554991960526, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.693105161190033, "rewards/GeoVisalEntityMatch2ORM/std": 0.25510647892951965, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 687, "train_speed(iter/s)": 0.026225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 549.0, "completions/mean_length": 443.4270935058594, "completions/min_length": 387.0, "epoch": 0.08243469925712917, "grad_norm": 1.1550223541302465, "kl": 0.9590117037296295, "learning_rate": 9.86870946787905e-07, "loss": 0.0009588754619471729, "memory(GiB)": 165.8, "reward": 2.2619948387145996, "reward_std": 0.37680163979530334, "rewards/GeoLocAccuracyV2ORM/mean": 0.7479166388511658, "rewards/GeoLocAccuracyV2ORM/std": 0.4187648594379425, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6911616325378418, "rewards/GeoVisalEntityMatch2ORM/std": 0.21058617532253265, "rewards/MathFormat/mean": 0.8229166865348816, "rewards/MathFormat/std": 0.3837431073188782, "step": 688, "train_speed(iter/s)": 0.026227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 402.3125, "completions/min_length": 345.0, "epoch": 0.08255451713395638, "grad_norm": 1.1657423826031372, "kl": 0.5771132707595825, "learning_rate": 9.868276291680295e-07, "loss": 0.0005772560834884644, "memory(GiB)": 165.8, "reward": 2.752662181854248, "reward_std": 0.11104850471019745, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7526620626449585, "rewards/GeoVisalEntityMatch2ORM/std": 0.13626641035079956, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 689, "train_speed(iter/s)": 0.026237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 428.40625, "completions/min_length": 373.0, "epoch": 0.08267433501078361, "grad_norm": 1.0725395764528842, "kl": 0.5888605117797852, "learning_rate": 9.867842411591894e-07, "loss": 0.0005898959934711456, "memory(GiB)": 165.8, "reward": 2.640625, "reward_std": 0.09705595672130585, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.640625, "rewards/GeoVisalEntityMatch2ORM/std": 0.22197364270687103, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 690, "train_speed(iter/s)": 0.026247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 511.0, "completions/mean_length": 444.8958435058594, "completions/min_length": 389.0, "epoch": 0.08279415288761083, "grad_norm": 1.1034363489067125, "kl": 0.5572252571582794, "learning_rate": 9.86740782767658e-07, "loss": 0.0005582161247730255, "memory(GiB)": 165.8, "reward": 2.40685772895813, "reward_std": 0.17042872309684753, "rewards/GeoLocAccuracyV2ORM/mean": 0.7250000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4431941509246826, "rewards/GeoVisalEntityMatch2ORM/mean": 0.692274272441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.1517695039510727, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 691, "train_speed(iter/s)": 0.026257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 402.0208435058594, "completions/min_length": 355.0, "epoch": 0.08291397076443806, "grad_norm": 1.1424860297123722, "kl": 0.5569857656955719, "learning_rate": 9.86697253999719e-07, "loss": 0.0005589227075688541, "memory(GiB)": 165.8, "reward": 2.6983509063720703, "reward_std": 0.08417106419801712, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6983507871627808, "rewards/GeoVisalEntityMatch2ORM/std": 0.2409733682870865, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 692, "train_speed(iter/s)": 0.026271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 393.9895935058594, "completions/min_length": 331.0, "epoch": 0.08303378864126527, "grad_norm": 1.1169180349355434, "kl": 0.5808595716953278, "learning_rate": 9.86653654861666e-07, "loss": 0.0005835195770487189, "memory(GiB)": 165.8, "reward": 2.7587428092956543, "reward_std": 0.10804165154695511, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7587425708770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.19207574427127838, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 693, "train_speed(iter/s)": 0.026282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 401.16668701171875, "completions/min_length": 340.0, "epoch": 0.0831536065180925, "grad_norm": 1.1525233163946007, "kl": 0.5694242715835571, "learning_rate": 9.866099853598026e-07, "loss": 0.0005697757005691528, "memory(GiB)": 165.8, "reward": 2.5059194564819336, "reward_std": 0.18845926225185394, "rewards/GeoLocAccuracyV2ORM/mean": 0.9000000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.26596397161483765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.605919361114502, "rewards/GeoVisalEntityMatch2ORM/std": 0.1802251935005188, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 694, "train_speed(iter/s)": 0.026293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 425.66668701171875, "completions/min_length": 341.0, "epoch": 0.08327342439491972, "grad_norm": 1.0667479698883355, "kl": 0.7082793712615967, "learning_rate": 9.86566245500443e-07, "loss": 0.0006982907652854919, "memory(GiB)": 165.8, "reward": 2.6303977966308594, "reward_std": 0.1579081416130066, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.651231050491333, "rewards/GeoVisalEntityMatch2ORM/std": 0.21771928668022156, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 695, "train_speed(iter/s)": 0.026302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 388.2395935058594, "completions/min_length": 311.0, "epoch": 0.08339324227174695, "grad_norm": 1.1453297396237783, "kl": 0.526650607585907, "learning_rate": 9.865224352899118e-07, "loss": 0.0005278773605823517, "memory(GiB)": 165.8, "reward": 2.684375286102295, "reward_std": 0.18590441346168518, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7760416865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.18768563866615295, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 696, "train_speed(iter/s)": 0.026305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 412.40625, "completions/min_length": 337.0, "epoch": 0.08351306014857417, "grad_norm": 0.8767561151173948, "kl": 0.6116078495979309, "learning_rate": 9.86478554734543e-07, "loss": 0.0006139948964118958, "memory(GiB)": 165.8, "reward": 2.737847328186035, "reward_std": 0.06348448991775513, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7378472685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.2423224151134491, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 697, "train_speed(iter/s)": 0.026315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/mean_length": 425.3958435058594, "completions/min_length": 362.0, "epoch": 0.08363287802540138, "grad_norm": 1.1491292084648446, "kl": 0.5737575590610504, "learning_rate": 9.864346038406813e-07, "loss": 0.0005746434326283634, "memory(GiB)": 165.8, "reward": 2.579253673553467, "reward_std": 0.23168647289276123, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7125868797302246, "rewards/GeoVisalEntityMatch2ORM/std": 0.16034498810768127, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 698, "train_speed(iter/s)": 0.026325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 428.3958435058594, "completions/min_length": 360.0, "epoch": 0.08375269590222861, "grad_norm": 1.2068356984387194, "kl": 0.6081752777099609, "learning_rate": 9.863905826146811e-07, "loss": 0.0006088092923164368, "memory(GiB)": 165.8, "reward": 2.6119649410247803, "reward_std": 0.07888109982013702, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6119648814201355, "rewards/GeoVisalEntityMatch2ORM/std": 0.1909540742635727, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 699, "train_speed(iter/s)": 0.026336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 431.21875, "completions/min_length": 366.0, "epoch": 0.08387251377905583, "grad_norm": 1.0442330253191683, "kl": 0.5688576102256775, "learning_rate": 9.86346491062908e-07, "loss": 0.0005726665258407593, "memory(GiB)": 165.8, "reward": 2.337698459625244, "reward_std": 0.06616722047328949, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5876984596252441, "rewards/GeoVisalEntityMatch2ORM/std": 0.19952164590358734, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 700, "train_speed(iter/s)": 0.026345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 435.07293701171875, "completions/min_length": 381.0, "epoch": 0.08399233165588306, "grad_norm": 1.1442867983693608, "kl": 0.5915669202804565, "learning_rate": 9.863023291917364e-07, "loss": 0.0005923522403463721, "memory(GiB)": 165.8, "reward": 2.282378673553467, "reward_std": 0.18492621183395386, "rewards/GeoLocAccuracyV2ORM/mean": 0.6916666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.4513527452945709, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5907118320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.22837598621845245, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 701, "train_speed(iter/s)": 0.026355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 520.0, "completions/mean_length": 454.375, "completions/min_length": 395.0, "epoch": 0.08411214953271028, "grad_norm": 1.112436846237196, "kl": 0.5786411166191101, "learning_rate": 9.862580970075519e-07, "loss": 0.0005787722766399384, "memory(GiB)": 165.8, "reward": 2.5667784214019775, "reward_std": 0.23185965418815613, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6917781829833984, "rewards/GeoVisalEntityMatch2ORM/std": 0.20589271187782288, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 702, "train_speed(iter/s)": 0.026364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 536.0, "completions/mean_length": 435.1770935058594, "completions/min_length": 381.0, "epoch": 0.0842319674095375, "grad_norm": 1.1030095690151769, "kl": 0.9667993485927582, "learning_rate": 9.862137945167496e-07, "loss": 0.0009640877833589911, "memory(GiB)": 165.8, "reward": 2.229139804840088, "reward_std": 0.33082547783851624, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.374634325504303, "rewards/GeoVisalEntityMatch2ORM/mean": 0.562472939491272, "rewards/GeoVisalEntityMatch2ORM/std": 0.29041600227355957, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 703, "train_speed(iter/s)": 0.026366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 418.3645935058594, "completions/min_length": 333.0, "epoch": 0.08435178528636472, "grad_norm": 1.1741287589864982, "kl": 0.6207932829856873, "learning_rate": 9.861694217257352e-07, "loss": 0.0006213933229446411, "memory(GiB)": 165.8, "reward": 2.598726987838745, "reward_std": 0.12337866425514221, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5987268686294556, "rewards/GeoVisalEntityMatch2ORM/std": 0.14479821920394897, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 704, "train_speed(iter/s)": 0.026375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 433.3125, "completions/min_length": 393.0, "epoch": 0.08447160316319195, "grad_norm": 1.1168857407153976, "kl": 0.6034364998340607, "learning_rate": 9.861249786409248e-07, "loss": 0.0006036460399627686, "memory(GiB)": 165.8, "reward": 2.417448043823242, "reward_std": 0.1846570074558258, "rewards/GeoLocAccuracyV2ORM/mean": 0.8375000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.32999202609062195, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5799479484558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.12957745790481567, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 705, "train_speed(iter/s)": 0.026385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 453.1875, "completions/min_length": 404.0, "epoch": 0.08459142104001917, "grad_norm": 1.0349689160950928, "kl": 0.5790826082229614, "learning_rate": 9.860804652687436e-07, "loss": 0.000579891144298017, "memory(GiB)": 165.8, "reward": 2.467881917953491, "reward_std": 0.1585744321346283, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6345486044883728, "rewards/GeoVisalEntityMatch2ORM/std": 0.16962280869483948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 706, "train_speed(iter/s)": 0.026395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 527.0, "completions/mean_length": 440.60418701171875, "completions/min_length": 383.0, "epoch": 0.0847112389168464, "grad_norm": 1.0706847402221242, "kl": 0.5905839204788208, "learning_rate": 9.860358816156282e-07, "loss": 0.000590987503528595, "memory(GiB)": 165.8, "reward": 2.569791793823242, "reward_std": 0.16546154022216797, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5906250476837158, "rewards/GeoVisalEntityMatch2ORM/std": 0.10558575391769409, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 707, "train_speed(iter/s)": 0.026404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 435.3958435058594, "completions/min_length": 362.0, "epoch": 0.08483105679367361, "grad_norm": 1.181323832912304, "kl": 0.5911259055137634, "learning_rate": 9.859912276880248e-07, "loss": 0.0005914866924285889, "memory(GiB)": 165.8, "reward": 2.4315972328186035, "reward_std": 0.25368860363960266, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.2036401331424713, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4940972328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.15148884057998657, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 708, "train_speed(iter/s)": 0.026412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 508.0, "completions/mean_length": 435.4270935058594, "completions/min_length": 349.0, "epoch": 0.08495087467050084, "grad_norm": 0.9516404798795294, "kl": 0.9245708584785461, "learning_rate": 9.859465034923893e-07, "loss": 0.0009132276172749698, "memory(GiB)": 165.8, "reward": 2.3738179206848145, "reward_std": 0.2847413420677185, "rewards/GeoLocAccuracyV2ORM/mean": 0.6583333611488342, "rewards/GeoLocAccuracyV2ORM/std": 0.4749884605407715, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7988178133964539, "rewards/GeoVisalEntityMatch2ORM/std": 0.24613893032073975, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 709, "train_speed(iter/s)": 0.026421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 430.9895935058594, "completions/min_length": 354.0, "epoch": 0.08507069254732806, "grad_norm": 1.1455443072253588, "kl": 0.6248680055141449, "learning_rate": 9.859017090351886e-07, "loss": 0.000626787543296814, "memory(GiB)": 165.8, "reward": 2.7403275966644287, "reward_std": 0.1496821939945221, "rewards/GeoLocAccuracyV2ORM/mean": 0.9249999523162842, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8153274059295654, "rewards/GeoVisalEntityMatch2ORM/std": 0.13786984980106354, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 710, "train_speed(iter/s)": 0.02643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/mean_length": 454.51043701171875, "completions/min_length": 394.0, "epoch": 0.08519051042415528, "grad_norm": 1.061406232526656, "kl": 0.6302245855331421, "learning_rate": 9.858568443228997e-07, "loss": 0.0006326338043436408, "memory(GiB)": 165.8, "reward": 2.5208334922790527, "reward_std": 0.08304308354854584, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5208333730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.21615321934223175, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 711, "train_speed(iter/s)": 0.026441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 507.0, "completions/mean_length": 452.3645935058594, "completions/min_length": 388.0, "epoch": 0.08531032830098251, "grad_norm": 1.074378156173858, "kl": 0.6066582500934601, "learning_rate": 9.858119093620088e-07, "loss": 0.0006065803463570774, "memory(GiB)": 165.8, "reward": 2.6071181297302246, "reward_std": 0.24761757254600525, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6487847566604614, "rewards/GeoVisalEntityMatch2ORM/std": 0.22845064103603363, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 712, "train_speed(iter/s)": 0.026449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 417.78125, "completions/min_length": 337.0, "epoch": 0.08543014617780972, "grad_norm": 1.1087948400761989, "kl": 0.8057850897312164, "learning_rate": 9.857669041590132e-07, "loss": 0.0007892362773418427, "memory(GiB)": 165.8, "reward": 2.483184576034546, "reward_std": 0.15354716777801514, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979167699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.3524289131164551, "rewards/GeoVisalEntityMatch2ORM/mean": 0.695684552192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.18238723278045654, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 713, "train_speed(iter/s)": 0.026459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 486.0, "completions/mean_length": 417.41668701171875, "completions/min_length": 368.0, "epoch": 0.08554996405463695, "grad_norm": 1.1424795638363106, "kl": 0.8111039698123932, "learning_rate": 9.857218287204203e-07, "loss": 0.0008120512356981635, "memory(GiB)": 165.8, "reward": 2.110346555709839, "reward_std": 0.10004068911075592, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6103464365005493, "rewards/GeoVisalEntityMatch2ORM/std": 0.20958742499351501, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 714, "train_speed(iter/s)": 0.026457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 433.28125, "completions/min_length": 363.0, "epoch": 0.08566978193146417, "grad_norm": 1.0548258393734813, "kl": 0.6218504011631012, "learning_rate": 9.85676683052747e-07, "loss": 0.0006220638751983643, "memory(GiB)": 165.8, "reward": 2.6206018924713135, "reward_std": 0.17005282640457153, "rewards/GeoLocAccuracyV2ORM/mean": 0.8937500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.2987033426761627, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7268518805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.1942424476146698, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 715, "train_speed(iter/s)": 0.026467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 423.6875, "completions/min_length": 368.0, "epoch": 0.0857895998082914, "grad_norm": 1.0877103178944545, "kl": 0.628058135509491, "learning_rate": 9.856314671625213e-07, "loss": 0.0006295517086982727, "memory(GiB)": 165.8, "reward": 2.705857992172241, "reward_std": 0.15015365183353424, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7308577299118042, "rewards/GeoVisalEntityMatch2ORM/std": 0.17731121182441711, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 716, "train_speed(iter/s)": 0.026476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 502.0, "completions/mean_length": 414.84375, "completions/min_length": 371.0, "epoch": 0.08590941768511862, "grad_norm": 1.1034704569583975, "kl": 0.8874588012695312, "learning_rate": 9.855861810562801e-07, "loss": 0.0008780820062384009, "memory(GiB)": 165.8, "reward": 2.1261162757873535, "reward_std": 0.249619722366333, "rewards/GeoLocAccuracyV2ORM/mean": 0.6500000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128623008728, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5177827477455139, "rewards/GeoVisalEntityMatch2ORM/std": 0.2629709243774414, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 717, "train_speed(iter/s)": 0.026477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.0, "completions/mean_length": 420.35418701171875, "completions/min_length": 372.0, "epoch": 0.08602923556194585, "grad_norm": 1.033529345956881, "kl": 0.5859530866146088, "learning_rate": 9.85540824740572e-07, "loss": 0.0005869840970262885, "memory(GiB)": 165.8, "reward": 2.7320315837860107, "reward_std": 0.0971682071685791, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7403645515441895, "rewards/GeoVisalEntityMatch2ORM/std": 0.11733721941709518, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 718, "train_speed(iter/s)": 0.026489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 400.5833435058594, "completions/min_length": 354.0, "epoch": 0.08614905343877306, "grad_norm": 1.1149282916427838, "kl": 0.639933854341507, "learning_rate": 9.854953982219544e-07, "loss": 0.0006397615070454776, "memory(GiB)": 165.8, "reward": 2.5303239822387695, "reward_std": 0.08822356164455414, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7803241014480591, "rewards/GeoVisalEntityMatch2ORM/std": 0.10077643394470215, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 719, "train_speed(iter/s)": 0.026503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 413.28125, "completions/min_length": 353.0, "epoch": 0.08626887131560029, "grad_norm": 1.1338032653454964, "kl": 0.6232122480869293, "learning_rate": 9.854499015069956e-07, "loss": 0.0006239054491743445, "memory(GiB)": 165.8, "reward": 2.617013931274414, "reward_std": 0.08121463656425476, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6170139312744141, "rewards/GeoVisalEntityMatch2ORM/std": 0.18131227791309357, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 720, "train_speed(iter/s)": 0.026513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/mean_length": 402.9895935058594, "completions/min_length": 330.0, "epoch": 0.08638868919242751, "grad_norm": 1.1783695424398708, "kl": 0.6536667346954346, "learning_rate": 9.854043346022736e-07, "loss": 0.0006546179647557437, "memory(GiB)": 165.8, "reward": 2.2200894355773926, "reward_std": 0.14440125226974487, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4603583514690399, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6367559432983398, "rewards/GeoVisalEntityMatch2ORM/std": 0.1538008600473404, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 721, "train_speed(iter/s)": 0.026523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 379.2083435058594, "completions/min_length": 321.0, "epoch": 0.08650850706925474, "grad_norm": 1.089219319599731, "kl": 0.6469307541847229, "learning_rate": 9.85358697514377e-07, "loss": 0.0006478317081928253, "memory(GiB)": 165.8, "reward": 2.7357146739959717, "reward_std": 0.06340762227773666, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.735714316368103, "rewards/GeoVisalEntityMatch2ORM/std": 0.1927025318145752, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 722, "train_speed(iter/s)": 0.026533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 384.35418701171875, "completions/min_length": 326.0, "epoch": 0.08662832494608196, "grad_norm": 0.9814031797218951, "kl": 0.5804457366466522, "learning_rate": 9.853129902499044e-07, "loss": 0.0005825385451316833, "memory(GiB)": 165.8, "reward": 2.796180486679077, "reward_std": 0.06254171580076218, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7961806058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.16871511936187744, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 723, "train_speed(iter/s)": 0.02654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 378.79168701171875, "completions/min_length": 282.0, "epoch": 0.08674814282290917, "grad_norm": 1.237122674122824, "kl": 0.6381432414054871, "learning_rate": 9.852672128154642e-07, "loss": 0.0006394548108801246, "memory(GiB)": 165.8, "reward": 2.24580454826355, "reward_std": 0.10477093607187271, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49580439925193787, "rewards/GeoVisalEntityMatch2ORM/std": 0.11197688430547714, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 724, "train_speed(iter/s)": 0.02655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 356.34375, "completions/min_length": 273.0, "epoch": 0.0868679606997364, "grad_norm": 1.06926337126211, "kl": 0.6285974383354187, "learning_rate": 9.852213652176756e-07, "loss": 0.0012260216753929853, "memory(GiB)": 165.8, "reward": 2.601215362548828, "reward_std": 0.09036874026060104, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8012153506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.23674502968788147, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 725, "train_speed(iter/s)": 0.02656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 376.16668701171875, "completions/min_length": 317.0, "epoch": 0.08698777857656362, "grad_norm": 1.1611219474870949, "kl": 0.6557683050632477, "learning_rate": 9.851754474631672e-07, "loss": 0.0006573101272806525, "memory(GiB)": 165.8, "reward": 2.291505813598633, "reward_std": 0.0889308825135231, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5415058135986328, "rewards/GeoVisalEntityMatch2ORM/std": 0.14077366888523102, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 726, "train_speed(iter/s)": 0.02657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 378.51043701171875, "completions/min_length": 327.0, "epoch": 0.08710759645339085, "grad_norm": 1.155477914786721, "kl": 0.6565086841583252, "learning_rate": 9.851294595585783e-07, "loss": 0.0006575460429303348, "memory(GiB)": 165.8, "reward": 2.6656994819641113, "reward_std": 0.13625112175941467, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6761161088943481, "rewards/GeoVisalEntityMatch2ORM/std": 0.17796310782432556, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 727, "train_speed(iter/s)": 0.026583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 363.0, "completions/min_length": 302.0, "epoch": 0.08722741433021806, "grad_norm": 0.8666994213888346, "kl": 0.6603200137615204, "learning_rate": 9.850834015105581e-07, "loss": 0.0006608715048059821, "memory(GiB)": 165.8, "reward": 2.659635543823242, "reward_std": 0.15701743960380554, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7513021230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.18275557458400726, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 728, "train_speed(iter/s)": 0.026594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 370.19793701171875, "completions/min_length": 312.0, "epoch": 0.0873472322070453, "grad_norm": 0.8264852618973824, "kl": 0.5839694142341614, "learning_rate": 9.850372733257662e-07, "loss": 0.00118105742149055, "memory(GiB)": 165.8, "reward": 2.7348484992980957, "reward_std": 0.060284920036792755, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7348484992980957, "rewards/GeoVisalEntityMatch2ORM/std": 0.2892656922340393, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 729, "train_speed(iter/s)": 0.026607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 372.57293701171875, "completions/min_length": 306.0, "epoch": 0.08746705008387251, "grad_norm": 1.1875446353774228, "kl": 0.656021237373352, "learning_rate": 9.849910750108717e-07, "loss": 0.0006584823131561279, "memory(GiB)": 165.8, "reward": 2.697953939437866, "reward_std": 0.0843341201543808, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6979538798332214, "rewards/GeoVisalEntityMatch2ORM/std": 0.19241786003112793, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 730, "train_speed(iter/s)": 0.026617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 378.34375, "completions/min_length": 317.0, "epoch": 0.08758686796069974, "grad_norm": 1.1755430605439217, "kl": 0.6711696684360504, "learning_rate": 9.849448065725547e-07, "loss": 0.0006723826518282294, "memory(GiB)": 165.8, "reward": 2.7201974391937256, "reward_std": 0.06283237040042877, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7201975584030151, "rewards/GeoVisalEntityMatch2ORM/std": 0.14924544095993042, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 731, "train_speed(iter/s)": 0.026618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 381.8645935058594, "completions/min_length": 341.0, "epoch": 0.08770668583752696, "grad_norm": 1.3525177075905126, "kl": 0.654274195432663, "learning_rate": 9.848984680175048e-07, "loss": 0.0006556709995493293, "memory(GiB)": 165.8, "reward": 2.5687501430511475, "reward_std": 0.15030984580516815, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6020833849906921, "rewards/GeoVisalEntityMatch2ORM/std": 0.11868554353713989, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 732, "train_speed(iter/s)": 0.026628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 379.2708435058594, "completions/min_length": 314.0, "epoch": 0.08782650371435419, "grad_norm": 1.1788377099257215, "kl": 0.665905624628067, "learning_rate": 9.84852059352422e-07, "loss": 0.0006664618849754333, "memory(GiB)": 165.8, "reward": 2.665360450744629, "reward_std": 0.1680217683315277, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7153605222702026, "rewards/GeoVisalEntityMatch2ORM/std": 0.22603358328342438, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 733, "train_speed(iter/s)": 0.026637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 375.4270935058594, "completions/min_length": 297.0, "epoch": 0.0879463215911814, "grad_norm": 1.2582898274040306, "kl": 0.6385467052459717, "learning_rate": 9.848055805840163e-07, "loss": 0.0006395808304660022, "memory(GiB)": 165.8, "reward": 2.5883145332336426, "reward_std": 0.13559651374816895, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5987310409545898, "rewards/GeoVisalEntityMatch2ORM/std": 0.2137262374162674, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 734, "train_speed(iter/s)": 0.026639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 399.97918701171875, "completions/min_length": 354.0, "epoch": 0.08806613946800863, "grad_norm": 1.1989482431026879, "kl": 0.6496732831001282, "learning_rate": 9.847590317190082e-07, "loss": 0.0006503909826278687, "memory(GiB)": 165.8, "reward": 2.4651784896850586, "reward_std": 0.1318441778421402, "rewards/GeoLocAccuracyV2ORM/mean": 0.783333420753479, "rewards/GeoLocAccuracyV2ORM/std": 0.35737839341163635, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6818453073501587, "rewards/GeoVisalEntityMatch2ORM/std": 0.1456785947084427, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 735, "train_speed(iter/s)": 0.026649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 390.4375, "completions/min_length": 329.0, "epoch": 0.08818595734483585, "grad_norm": 1.1793815366892775, "kl": 0.6471599042415619, "learning_rate": 9.847124127641277e-07, "loss": 0.0006471052765846252, "memory(GiB)": 165.8, "reward": 2.620241641998291, "reward_std": 0.1070251315832138, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6202415227890015, "rewards/GeoVisalEntityMatch2ORM/std": 0.16592763364315033, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 736, "train_speed(iter/s)": 0.026659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 461.0, "completions/mean_length": 391.375, "completions/min_length": 295.0, "epoch": 0.08830577522166307, "grad_norm": 1.2042702191244756, "kl": 0.6940107941627502, "learning_rate": 9.846657237261155e-07, "loss": 0.0006931523676030338, "memory(GiB)": 165.8, "reward": 2.327240467071533, "reward_std": 0.3825417160987854, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3120953440666199, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5272403955459595, "rewards/GeoVisalEntityMatch2ORM/std": 0.12299178540706635, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 737, "train_speed(iter/s)": 0.026659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 400.85418701171875, "completions/min_length": 337.0, "epoch": 0.0884255930984903, "grad_norm": 1.1067959997713768, "kl": 0.6440412700176239, "learning_rate": 9.846189646117224e-07, "loss": 0.0006452575325965881, "memory(GiB)": 165.8, "reward": 2.5862860679626465, "reward_std": 0.1417086124420166, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6112858653068542, "rewards/GeoVisalEntityMatch2ORM/std": 0.13353191316127777, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 738, "train_speed(iter/s)": 0.026666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/mean_length": 377.9375, "completions/min_length": 297.0, "epoch": 0.08854541097531751, "grad_norm": 1.2092851355278829, "kl": 0.6689292192459106, "learning_rate": 9.845721354277088e-07, "loss": 0.0006712079048156738, "memory(GiB)": 165.8, "reward": 2.7238922119140625, "reward_std": 0.1268477439880371, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7322255969047546, "rewards/GeoVisalEntityMatch2ORM/std": 0.1633402705192566, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 739, "train_speed(iter/s)": 0.026667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 386.51043701171875, "completions/min_length": 326.0, "epoch": 0.08866522885214474, "grad_norm": 1.1756298681740822, "kl": 0.6333997249603271, "learning_rate": 9.845252361808459e-07, "loss": 0.0006353458156809211, "memory(GiB)": 165.8, "reward": 2.52056884765625, "reward_std": 0.13374219834804535, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5372354984283447, "rewards/GeoVisalEntityMatch2ORM/std": 0.17056404054164886, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 740, "train_speed(iter/s)": 0.026677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 447.0, "completions/mean_length": 391.82293701171875, "completions/min_length": 344.0, "epoch": 0.08878504672897196, "grad_norm": 1.1592573137606461, "kl": 0.673543393611908, "learning_rate": 9.844782668779143e-07, "loss": 0.0006748338928446174, "memory(GiB)": 165.8, "reward": 2.1521053314208984, "reward_std": 0.18233928084373474, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6312719583511353, "rewards/GeoVisalEntityMatch2ORM/std": 0.12515689432621002, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906975746154785, "step": 741, "train_speed(iter/s)": 0.026675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 385.82293701171875, "completions/min_length": 328.0, "epoch": 0.08890486460579919, "grad_norm": 1.2350956043237458, "kl": 0.6456243097782135, "learning_rate": 9.844312275257056e-07, "loss": 0.0006471028318628669, "memory(GiB)": 165.8, "reward": 2.482957363128662, "reward_std": 0.1548844277858734, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6746239066123962, "rewards/GeoVisalEntityMatch2ORM/std": 0.15026521682739258, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 742, "train_speed(iter/s)": 0.026684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 377.0, "completions/mean_length": 342.22918701171875, "completions/min_length": 299.0, "epoch": 0.0890246824826264, "grad_norm": 1.3045925383682093, "kl": 0.7202032804489136, "learning_rate": 9.84384118131021e-07, "loss": 0.0007185023277997971, "memory(GiB)": 165.8, "reward": 2.424537181854248, "reward_std": 0.40159910917282104, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.3195391297340393, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6328703761100769, "rewards/GeoVisalEntityMatch2ORM/std": 0.23835283517837524, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 743, "train_speed(iter/s)": 0.026682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 370.51043701171875, "completions/min_length": 323.0, "epoch": 0.08914450035945364, "grad_norm": 1.2017980554109555, "kl": 0.6401015818119049, "learning_rate": 9.84336938700672e-07, "loss": 0.0006413552910089493, "memory(GiB)": 165.8, "reward": 2.4120371341705322, "reward_std": 0.313212513923645, "rewards/GeoLocAccuracyV2ORM/mean": 0.7562500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.39491838216781616, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6557870507240295, "rewards/GeoVisalEntityMatch2ORM/std": 0.16111396253108978, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 744, "train_speed(iter/s)": 0.026692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 410.0, "completions/mean_length": 357.19793701171875, "completions/min_length": 301.0, "epoch": 0.08926431823628085, "grad_norm": 1.2267888689360968, "kl": 0.806259274482727, "learning_rate": 9.842896892414798e-07, "loss": 0.0008053581113927066, "memory(GiB)": 165.8, "reward": 2.3800597190856934, "reward_std": 0.33262234926223755, "rewards/GeoLocAccuracyV2ORM/mean": 0.8041666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.3968074321746826, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7633929252624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.13289615511894226, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236128330230713, "step": 745, "train_speed(iter/s)": 0.02669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/mean_length": 348.9583435058594, "completions/min_length": 274.0, "epoch": 0.08938413611310808, "grad_norm": 1.2622928692051705, "kl": 0.6943385601043701, "learning_rate": 9.842423697602761e-07, "loss": 0.0006957327714189887, "memory(GiB)": 165.8, "reward": 2.734189033508301, "reward_std": 0.08659359812736511, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7341890335083008, "rewards/GeoVisalEntityMatch2ORM/std": 0.2424735575914383, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 746, "train_speed(iter/s)": 0.026691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 322.82293701171875, "completions/min_length": 257.0, "epoch": 0.0895039539899353, "grad_norm": 1.3939231659462668, "kl": 0.6540563702583313, "learning_rate": 9.84194980263903e-07, "loss": 0.000655055046081543, "memory(GiB)": 165.8, "reward": 2.680655002593994, "reward_std": 0.19142156839370728, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7473214864730835, "rewards/GeoVisalEntityMatch2ORM/std": 0.22410163283348083, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 747, "train_speed(iter/s)": 0.026693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 346.96875, "completions/min_length": 286.0, "epoch": 0.08962377186676253, "grad_norm": 1.2884106995332139, "kl": 0.6100676953792572, "learning_rate": 9.84147520759212e-07, "loss": 0.0006122539634816349, "memory(GiB)": 165.8, "reward": 2.5554213523864746, "reward_std": 0.17661017179489136, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6970881223678589, "rewards/GeoVisalEntityMatch2ORM/std": 0.12601438164710999, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 748, "train_speed(iter/s)": 0.026703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.0, "completions/mean_length": 318.85418701171875, "completions/min_length": 270.0, "epoch": 0.08974358974358974, "grad_norm": 1.2604957248464421, "kl": 0.7105562388896942, "learning_rate": 9.840999912530654e-07, "loss": 0.0007108052959665656, "memory(GiB)": 165.8, "reward": 2.5803818702697754, "reward_std": 0.19474168121814728, "rewards/GeoLocAccuracyV2ORM/mean": 0.9249999523162842, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6553819179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.17908546328544617, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 749, "train_speed(iter/s)": 0.026713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 328.375, "completions/min_length": 265.0, "epoch": 0.08986340762041696, "grad_norm": 1.3417625980123589, "kl": 0.648783415555954, "learning_rate": 9.840523917523353e-07, "loss": 0.0006503574550151825, "memory(GiB)": 165.8, "reward": 2.659635543823242, "reward_std": 0.22228741645812988, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7346353530883789, "rewards/GeoVisalEntityMatch2ORM/std": 0.13230594992637634, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 750, "train_speed(iter/s)": 0.026723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 326.47918701171875, "completions/min_length": 278.0, "epoch": 0.08998322549724419, "grad_norm": 1.3017109619871567, "kl": 0.6873601675033569, "learning_rate": 9.840047222639041e-07, "loss": 0.0006885926122777164, "memory(GiB)": 165.8, "reward": 2.3729166984558105, "reward_std": 0.09978873282670975, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6229166984558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.11544543504714966, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 751, "train_speed(iter/s)": 0.026733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 319.96875, "completions/min_length": 280.0, "epoch": 0.0901030433740714, "grad_norm": 1.263929850571329, "kl": 0.7101839780807495, "learning_rate": 9.83956982794664e-07, "loss": 0.000712332664988935, "memory(GiB)": 165.8, "reward": 2.6110494136810303, "reward_std": 0.0916849672794342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6110491752624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.18946479260921478, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 752, "train_speed(iter/s)": 0.026743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 346.7083435058594, "completions/min_length": 300.0, "epoch": 0.09022286125089864, "grad_norm": 1.2777402626289307, "kl": 0.6872395277023315, "learning_rate": 9.839091733515176e-07, "loss": 0.0006885727634653449, "memory(GiB)": 165.8, "reward": 2.5874009132385254, "reward_std": 0.16516941785812378, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.29970747232437134, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7207341194152832, "rewards/GeoVisalEntityMatch2ORM/std": 0.17361922562122345, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 753, "train_speed(iter/s)": 0.026753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 313.9270935058594, "completions/min_length": 244.0, "epoch": 0.09034267912772585, "grad_norm": 1.1873002084998823, "kl": 0.7446980774402618, "learning_rate": 9.838612939413773e-07, "loss": 0.0007332960958592594, "memory(GiB)": 165.8, "reward": 2.6872520446777344, "reward_std": 0.16803041100502014, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.708085298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.16888883709907532, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 754, "train_speed(iter/s)": 0.026762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 323.29168701171875, "completions/min_length": 262.0, "epoch": 0.09046249700455308, "grad_norm": 22.39910616064649, "kl": 39.40802934765816, "learning_rate": 9.838133445711663e-07, "loss": 0.027888938784599304, "memory(GiB)": 165.8, "reward": 2.7205729484558105, "reward_std": 0.1795501410961151, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7414063215255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.12464836239814758, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 755, "train_speed(iter/s)": 0.026768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/mean_length": 332.19793701171875, "completions/min_length": 276.0, "epoch": 0.0905823148813803, "grad_norm": 1.3809769896729338, "kl": 0.7068421244621277, "learning_rate": 9.837653252478168e-07, "loss": 0.000709041953086853, "memory(GiB)": 165.8, "reward": 2.424156904220581, "reward_std": 0.2460012137889862, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3252461850643158, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5949901342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.12961815297603607, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 756, "train_speed(iter/s)": 0.026769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/mean_length": 338.32293701171875, "completions/min_length": 274.0, "epoch": 0.09070213275820753, "grad_norm": 1.2691188662585735, "kl": 0.6808218359947205, "learning_rate": 9.837172359782726e-07, "loss": 0.0006826793542131782, "memory(GiB)": 165.8, "reward": 2.7008681297302246, "reward_std": 0.10250648856163025, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7008680701255798, "rewards/GeoVisalEntityMatch2ORM/std": 0.139015331864357, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 757, "train_speed(iter/s)": 0.02677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 384.0, "completions/mean_length": 329.9270935058594, "completions/min_length": 264.0, "epoch": 0.09082195063503475, "grad_norm": 1.3287397960098812, "kl": 0.7315866351127625, "learning_rate": 9.83669076769486e-07, "loss": 0.0007323374738916755, "memory(GiB)": 165.8, "reward": 2.6173529624938965, "reward_std": 0.08003650605678558, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6173528432846069, "rewards/GeoVisalEntityMatch2ORM/std": 0.16057509183883667, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 758, "train_speed(iter/s)": 0.026769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/mean_length": 341.4375, "completions/min_length": 300.0, "epoch": 0.09094176851186198, "grad_norm": 1.3569314577801526, "kl": 0.6936019957065582, "learning_rate": 9.836208476284206e-07, "loss": 0.000695581256877631, "memory(GiB)": 165.8, "reward": 2.4053239822387695, "reward_std": 0.2217591106891632, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.38707178831100464, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6969908475875854, "rewards/GeoVisalEntityMatch2ORM/std": 0.20683248341083527, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 759, "train_speed(iter/s)": 0.026783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 412.0, "completions/mean_length": 357.96875, "completions/min_length": 310.0, "epoch": 0.09106158638868919, "grad_norm": 1.2129268295242512, "kl": 0.7798097729682922, "learning_rate": 9.8357254856205e-07, "loss": 0.0007782678003422916, "memory(GiB)": 165.8, "reward": 2.550231695175171, "reward_std": 0.25648313760757446, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336149215698242, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6543982028961182, "rewards/GeoVisalEntityMatch2ORM/std": 0.08500680327415466, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 760, "train_speed(iter/s)": 0.026778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 358.5625, "completions/min_length": 300.0, "epoch": 0.09118140426551642, "grad_norm": 1.233359573727995, "kl": 0.6406034231185913, "learning_rate": 9.835241795773568e-07, "loss": 0.0006409188499674201, "memory(GiB)": 165.8, "reward": 2.6217398643493652, "reward_std": 0.1001470535993576, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.630073070526123, "rewards/GeoVisalEntityMatch2ORM/std": 0.08298475295305252, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 761, "train_speed(iter/s)": 0.026787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 351.16668701171875, "completions/min_length": 292.0, "epoch": 0.09130122214234364, "grad_norm": 1.214535983860408, "kl": 0.6656517386436462, "learning_rate": 9.834757406813352e-07, "loss": 0.0006664842367172241, "memory(GiB)": 165.8, "reward": 2.753720760345459, "reward_std": 0.08995635062456131, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7537202835083008, "rewards/GeoVisalEntityMatch2ORM/std": 0.1205582395195961, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 762, "train_speed(iter/s)": 0.026793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 361.76043701171875, "completions/min_length": 292.0, "epoch": 0.09142104001917085, "grad_norm": 1.2451389591378568, "kl": 0.6833775639533997, "learning_rate": 9.834272318809888e-07, "loss": 0.0006841272115707397, "memory(GiB)": 165.8, "reward": 2.474801540374756, "reward_std": 0.2157260775566101, "rewards/GeoLocAccuracyV2ORM/mean": 0.8979166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.27221477031707764, "rewards/GeoVisalEntityMatch2ORM/mean": 0.576884925365448, "rewards/GeoVisalEntityMatch2ORM/std": 0.11439631134271622, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 763, "train_speed(iter/s)": 0.026803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 347.0625, "completions/min_length": 285.0, "epoch": 0.09154085789599808, "grad_norm": 1.275309198305631, "kl": 0.7159310579299927, "learning_rate": 9.833786531833311e-07, "loss": 0.0007173767080530524, "memory(GiB)": 165.8, "reward": 2.4849579334259033, "reward_std": 0.23345567286014557, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.568291187286377, "rewards/GeoVisalEntityMatch2ORM/std": 0.19544513523578644, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 764, "train_speed(iter/s)": 0.026814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/mean_length": 345.6875, "completions/min_length": 282.0, "epoch": 0.0916606757728253, "grad_norm": 1.1054073101446353, "kl": 0.6791173219680786, "learning_rate": 9.833300045953858e-07, "loss": 0.0006787230959162116, "memory(GiB)": 165.8, "reward": 2.692460536956787, "reward_std": 0.0590699277818203, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.692460298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.27676987648010254, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 765, "train_speed(iter/s)": 0.026827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/mean_length": 348.5625, "completions/min_length": 294.0, "epoch": 0.09178049364965253, "grad_norm": 1.038127373219081, "kl": 0.6802767217159271, "learning_rate": 9.832812861241872e-07, "loss": 0.0006821602582931519, "memory(GiB)": 165.8, "reward": 2.636979103088379, "reward_std": 0.1074572280049324, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8203125, "rewards/GeoVisalEntityMatch2ORM/std": 0.2009037882089615, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 766, "train_speed(iter/s)": 0.026836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/mean_length": 328.16668701171875, "completions/min_length": 286.0, "epoch": 0.09190031152647975, "grad_norm": 1.3480834252948697, "kl": 0.7323199212551117, "learning_rate": 9.832324977767792e-07, "loss": 0.0007324504549615085, "memory(GiB)": 165.8, "reward": 2.196093797683716, "reward_std": 0.1583912968635559, "rewards/GeoLocAccuracyV2ORM/mean": 0.6166667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.4017505645751953, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5794271230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.28434279561042786, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 767, "train_speed(iter/s)": 0.026847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 352.53125, "completions/min_length": 309.0, "epoch": 0.09202012940330698, "grad_norm": 1.229934476250607, "kl": 0.7246689796447754, "learning_rate": 9.831836395602162e-07, "loss": 0.0007256033713929355, "memory(GiB)": 165.8, "reward": 2.4305806159973145, "reward_std": 0.10101155191659927, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6305803656578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.22212739288806915, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 768, "train_speed(iter/s)": 0.026856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 340.76043701171875, "completions/min_length": 292.0, "epoch": 0.0921399472801342, "grad_norm": 1.0285530281739073, "kl": 0.7311049103736877, "learning_rate": 9.83134711481562e-07, "loss": 0.000733889639377594, "memory(GiB)": 165.8, "reward": 2.711805820465088, "reward_std": 0.16534923017024994, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.732638955116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.26292234659194946, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 769, "train_speed(iter/s)": 0.026865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 356.79168701171875, "completions/min_length": 291.0, "epoch": 0.09225976515696142, "grad_norm": 1.3024536284623023, "kl": 0.7117897570133209, "learning_rate": 9.830857135478913e-07, "loss": 0.0007128616562113166, "memory(GiB)": 165.8, "reward": 2.7491939067840576, "reward_std": 0.10941441357135773, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7491940259933472, "rewards/GeoVisalEntityMatch2ORM/std": 0.195033997297287, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 770, "train_speed(iter/s)": 0.026875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/mean_length": 338.51043701171875, "completions/min_length": 277.0, "epoch": 0.09237958303378864, "grad_norm": 1.079203341284389, "kl": 0.7256700396537781, "learning_rate": 9.830366457662887e-07, "loss": 0.0007264366140589118, "memory(GiB)": 165.8, "reward": 2.486607313156128, "reward_std": 0.057858966290950775, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.486607164144516, "rewards/GeoVisalEntityMatch2ORM/std": 0.15648500621318817, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 771, "train_speed(iter/s)": 0.026884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/mean_length": 349.8125, "completions/min_length": 301.0, "epoch": 0.09249940091061587, "grad_norm": 1.361014839110552, "kl": 0.6873579025268555, "learning_rate": 9.829875081438486e-07, "loss": 0.0006906812777742743, "memory(GiB)": 165.8, "reward": 2.5630526542663574, "reward_std": 0.08914382755756378, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5630524158477783, "rewards/GeoVisalEntityMatch2ORM/std": 0.12913568317890167, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 772, "train_speed(iter/s)": 0.026894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 378.0, "completions/mean_length": 329.78125, "completions/min_length": 267.0, "epoch": 0.09261921878744309, "grad_norm": 1.328275136710732, "kl": 0.9229414463043213, "learning_rate": 9.829383006876753e-07, "loss": 0.000921060680411756, "memory(GiB)": 165.8, "reward": 2.356213092803955, "reward_std": 0.3080122470855713, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6062127947807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.11610205471515656, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 773, "train_speed(iter/s)": 0.026891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.0, "completions/mean_length": 334.44793701171875, "completions/min_length": 274.0, "epoch": 0.0927390366642703, "grad_norm": 1.2842111018883384, "kl": 0.7336406111717224, "learning_rate": 9.828890234048841e-07, "loss": 0.0007357547874562442, "memory(GiB)": 165.8, "reward": 2.575173854827881, "reward_std": 0.0882725939154625, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5751736164093018, "rewards/GeoVisalEntityMatch2ORM/std": 0.16283302009105682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 774, "train_speed(iter/s)": 0.026901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 325.76043701171875, "completions/min_length": 289.0, "epoch": 0.09285885454109753, "grad_norm": 1.240968183943518, "kl": 0.7251130640506744, "learning_rate": 9.828396763025996e-07, "loss": 0.0007262354483827949, "memory(GiB)": 165.8, "reward": 2.5703125, "reward_std": 0.10540209710597992, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5703125, "rewards/GeoVisalEntityMatch2ORM/std": 0.17382358014583588, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 775, "train_speed(iter/s)": 0.026907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 352.13543701171875, "completions/min_length": 289.0, "epoch": 0.09297867241792475, "grad_norm": 1.3066575211369662, "kl": 0.6700382232666016, "learning_rate": 9.827902593879568e-07, "loss": 0.0006717294454574585, "memory(GiB)": 165.8, "reward": 2.669229507446289, "reward_std": 0.11392542719841003, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6692295074462891, "rewards/GeoVisalEntityMatch2ORM/std": 0.2646660804748535, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 776, "train_speed(iter/s)": 0.02692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 348.875, "completions/min_length": 297.0, "epoch": 0.09309849029475198, "grad_norm": 1.267566766235381, "kl": 0.7504476308822632, "learning_rate": 9.827407726681006e-07, "loss": 0.000751890242099762, "memory(GiB)": 165.8, "reward": 2.5139882564544678, "reward_std": 0.15092220902442932, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6889880895614624, "rewards/GeoVisalEntityMatch2ORM/std": 0.22149524092674255, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 777, "train_speed(iter/s)": 0.02693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/mean_length": 352.0520935058594, "completions/min_length": 302.0, "epoch": 0.0932183081715792, "grad_norm": 1.2079628684740207, "kl": 0.731226235628128, "learning_rate": 9.826912161501866e-07, "loss": 0.0007327249040827155, "memory(GiB)": 165.8, "reward": 2.563727855682373, "reward_std": 0.14222969114780426, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5887277126312256, "rewards/GeoVisalEntityMatch2ORM/std": 0.12778721749782562, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 778, "train_speed(iter/s)": 0.026938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 427.0, "completions/mean_length": 365.5833435058594, "completions/min_length": 324.0, "epoch": 0.09333812604840643, "grad_norm": 1.193050528403987, "kl": 0.8025931417942047, "learning_rate": 9.826415898413795e-07, "loss": 0.0008004407281987369, "memory(GiB)": 165.8, "reward": 2.367018461227417, "reward_std": 0.28871697187423706, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136448979377747, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5128517746925354, "rewards/GeoVisalEntityMatch2ORM/std": 0.23484843969345093, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 779, "train_speed(iter/s)": 0.026935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 374.8020935058594, "completions/min_length": 332.0, "epoch": 0.09345794392523364, "grad_norm": 1.0100753185084614, "kl": 0.69931760430336, "learning_rate": 9.825918937488549e-07, "loss": 0.000702057674061507, "memory(GiB)": 165.8, "reward": 2.6409144401550293, "reward_std": 0.0555640384554863, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6409143805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.12040751427412033, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 780, "train_speed(iter/s)": 0.026945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 363.76043701171875, "completions/min_length": 298.0, "epoch": 0.09357776180206087, "grad_norm": 1.2122190115906, "kl": 0.7023690938949585, "learning_rate": 9.825421278797982e-07, "loss": 0.0007049044361338019, "memory(GiB)": 165.8, "reward": 2.7326266765594482, "reward_std": 0.08695004135370255, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7326264381408691, "rewards/GeoVisalEntityMatch2ORM/std": 0.1349201202392578, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 781, "train_speed(iter/s)": 0.026955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 391.78125, "completions/min_length": 324.0, "epoch": 0.09369757967888809, "grad_norm": 1.2107444052171625, "kl": 0.7004360556602478, "learning_rate": 9.824922922414047e-07, "loss": 0.0007018757751211524, "memory(GiB)": 165.8, "reward": 2.3359193801879883, "reward_std": 0.17130286991596222, "rewards/GeoLocAccuracyV2ORM/mean": 0.8229166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3370082676410675, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5234192609786987, "rewards/GeoVisalEntityMatch2ORM/std": 0.11144769936800003, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 782, "train_speed(iter/s)": 0.026961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 386.3645935058594, "completions/min_length": 318.0, "epoch": 0.09381739755571532, "grad_norm": 1.3035074454127105, "kl": 0.7199980616569519, "learning_rate": 9.824423868408802e-07, "loss": 0.0007211628253571689, "memory(GiB)": 165.8, "reward": 2.5026910305023193, "reward_std": 0.14973405003547668, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204163610935211, "rewards/GeoVisalEntityMatch2ORM/mean": 0.661024272441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.1660756915807724, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 783, "train_speed(iter/s)": 0.02697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 409.40625, "completions/min_length": 357.0, "epoch": 0.09393721543254253, "grad_norm": 1.0755860497789356, "kl": 0.6553468704223633, "learning_rate": 9.823924116854405e-07, "loss": 0.0006565874209627509, "memory(GiB)": 165.8, "reward": 2.6362104415893555, "reward_std": 0.13576436042785645, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204163908958435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7945436835289001, "rewards/GeoVisalEntityMatch2ORM/std": 0.2208620309829712, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 784, "train_speed(iter/s)": 0.026977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 439.0, "completions/mean_length": 383.5, "completions/min_length": 310.0, "epoch": 0.09405703330936976, "grad_norm": 1.1521066102350797, "kl": 0.8635349571704865, "learning_rate": 9.823423667823108e-07, "loss": 0.0008561685681343079, "memory(GiB)": 165.8, "reward": 2.128269672393799, "reward_std": 0.40783464908599854, "rewards/GeoLocAccuracyV2ORM/mean": 0.5708333253860474, "rewards/GeoLocAccuracyV2ORM/std": 0.48208245635032654, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6616029739379883, "rewards/GeoVisalEntityMatch2ORM/std": 0.1877731829881668, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070801794528961, "step": 785, "train_speed(iter/s)": 0.026972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 397.72918701171875, "completions/min_length": 339.0, "epoch": 0.09417685118619698, "grad_norm": 1.0503467576304415, "kl": 0.6700235605239868, "learning_rate": 9.822922521387276e-07, "loss": 0.0006718685617670417, "memory(GiB)": 165.8, "reward": 2.5337867736816406, "reward_std": 0.06518011540174484, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7837866544723511, "rewards/GeoVisalEntityMatch2ORM/std": 0.18495343625545502, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 786, "train_speed(iter/s)": 0.026982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 395.47918701171875, "completions/min_length": 329.0, "epoch": 0.0942966690630242, "grad_norm": 1.2552553395215074, "kl": 0.6300227344036102, "learning_rate": 9.822420677619363e-07, "loss": 0.0006321035325527191, "memory(GiB)": 165.8, "reward": 2.693948268890381, "reward_std": 0.07850207388401031, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6939484477043152, "rewards/GeoVisalEntityMatch2ORM/std": 0.24224402010440826, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 787, "train_speed(iter/s)": 0.02699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 404.79168701171875, "completions/min_length": 347.0, "epoch": 0.09441648693985143, "grad_norm": 1.1863236952516916, "kl": 0.676589846611023, "learning_rate": 9.821918136591934e-07, "loss": 0.0006787205929867923, "memory(GiB)": 165.8, "reward": 2.616319417953491, "reward_std": 0.17704977095127106, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7079861164093018, "rewards/GeoVisalEntityMatch2ORM/std": 0.1225813627243042, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 788, "train_speed(iter/s)": 0.026999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 401.97918701171875, "completions/min_length": 339.0, "epoch": 0.09453630481667864, "grad_norm": 1.1192913601635013, "kl": 0.7055537402629852, "learning_rate": 9.821414898377644e-07, "loss": 0.0007071743602864444, "memory(GiB)": 165.8, "reward": 2.69385027885437, "reward_std": 0.12929344177246094, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7105168700218201, "rewards/GeoVisalEntityMatch2ORM/std": 0.2213418334722519, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 789, "train_speed(iter/s)": 0.027008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 409.5208435058594, "completions/min_length": 340.0, "epoch": 0.09465612269350587, "grad_norm": 1.2190810089835005, "kl": 0.7369301617145538, "learning_rate": 9.820910963049261e-07, "loss": 0.0007379154558293521, "memory(GiB)": 165.8, "reward": 2.715806722640991, "reward_std": 0.10233508795499802, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7324735522270203, "rewards/GeoVisalEntityMatch2ORM/std": 0.19941073656082153, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 790, "train_speed(iter/s)": 0.027016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 415.3125, "completions/min_length": 326.0, "epoch": 0.09477594057033309, "grad_norm": 1.1824006534870353, "kl": 0.6695607900619507, "learning_rate": 9.820406330679644e-07, "loss": 0.0006708353757858276, "memory(GiB)": 165.8, "reward": 2.3616321086883545, "reward_std": 0.18774551153182983, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3964757025241852, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6949653625488281, "rewards/GeoVisalEntityMatch2ORM/std": 0.2164049744606018, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 791, "train_speed(iter/s)": 0.027022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 425.0520935058594, "completions/min_length": 360.0, "epoch": 0.09489575844716032, "grad_norm": 1.1436577880039873, "kl": 0.7083826661109924, "learning_rate": 9.819901001341757e-07, "loss": 0.000709937303327024, "memory(GiB)": 165.8, "reward": 2.3923611640930176, "reward_std": 0.07038500159978867, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6423611044883728, "rewards/GeoVisalEntityMatch2ORM/std": 0.11594037711620331, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 792, "train_speed(iter/s)": 0.02703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 417.4583435058594, "completions/min_length": 345.0, "epoch": 0.09501557632398754, "grad_norm": 1.0533622519698043, "kl": 0.657543957233429, "learning_rate": 9.819394975108664e-07, "loss": 0.0006581818452104926, "memory(GiB)": 165.8, "reward": 2.8204989433288574, "reward_std": 0.0445362851023674, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8204984664916992, "rewards/GeoVisalEntityMatch2ORM/std": 0.12048422545194626, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 793, "train_speed(iter/s)": 0.027035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 491.0, "completions/mean_length": 415.71875, "completions/min_length": 328.0, "epoch": 0.09513539420081477, "grad_norm": 1.113939518840084, "kl": 0.860901951789856, "learning_rate": 9.818888252053529e-07, "loss": 0.0008563300361856818, "memory(GiB)": 165.8, "reward": 2.3917784690856934, "reward_std": 0.31913992762565613, "rewards/GeoLocAccuracyV2ORM/mean": 0.8979166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.30156755447387695, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5876116156578064, "rewards/GeoVisalEntityMatch2ORM/std": 0.24207466840744019, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 794, "train_speed(iter/s)": 0.027032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 486.0, "completions/mean_length": 406.13543701171875, "completions/min_length": 342.0, "epoch": 0.09525521207764198, "grad_norm": 0.9628412045812876, "kl": 0.7636497020721436, "learning_rate": 9.818380832249618e-07, "loss": 0.000764021067880094, "memory(GiB)": 165.8, "reward": 2.2558491230010986, "reward_std": 0.06085612624883652, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7558490633964539, "rewards/GeoVisalEntityMatch2ORM/std": 0.1628253012895584, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 795, "train_speed(iter/s)": 0.02703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 420.3020935058594, "completions/min_length": 341.0, "epoch": 0.09537502995446921, "grad_norm": 1.1639864779301201, "kl": 0.6822652518749237, "learning_rate": 9.8178727157703e-07, "loss": 0.0006839434499852359, "memory(GiB)": 165.8, "reward": 2.6819446086883545, "reward_std": 0.1447412073612213, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6923611164093018, "rewards/GeoVisalEntityMatch2ORM/std": 0.2489500343799591, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 796, "train_speed(iter/s)": 0.027039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 491.0, "completions/mean_length": 433.7083435058594, "completions/min_length": 359.0, "epoch": 0.09549484783129643, "grad_norm": 1.0230371983840505, "kl": 0.7688708603382111, "learning_rate": 9.817363902689037e-07, "loss": 0.0007668336620554328, "memory(GiB)": 165.8, "reward": 2.7536749839782715, "reward_std": 0.22895780205726624, "rewards/GeoLocAccuracyV2ORM/mean": 0.9708333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.16410310566425323, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8036748766899109, "rewards/GeoVisalEntityMatch2ORM/std": 0.22755497694015503, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 797, "train_speed(iter/s)": 0.027042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 431.35418701171875, "completions/min_length": 366.0, "epoch": 0.09561466570812366, "grad_norm": 1.0559738865837784, "kl": 0.715170294046402, "learning_rate": 9.816854393079402e-07, "loss": 0.0007172748446464539, "memory(GiB)": 165.8, "reward": 2.734292507171631, "reward_std": 0.10058923810720444, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7342922687530518, "rewards/GeoVisalEntityMatch2ORM/std": 0.13314712047576904, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 798, "train_speed(iter/s)": 0.02705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 440.7708435058594, "completions/min_length": 365.0, "epoch": 0.09573448358495087, "grad_norm": 1.049986295587768, "kl": 0.7216155230998993, "learning_rate": 9.816344187015059e-07, "loss": 0.0007217911770567298, "memory(GiB)": 165.8, "reward": 2.6490328311920166, "reward_std": 0.29033803939819336, "rewards/GeoLocAccuracyV2ORM/mean": 0.9395833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.23596571385860443, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7615327835083008, "rewards/GeoVisalEntityMatch2ORM/std": 0.16436108946800232, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 799, "train_speed(iter/s)": 0.027057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 427.5520935058594, "completions/min_length": 367.0, "epoch": 0.09585430146177809, "grad_norm": 1.0963298806659494, "kl": 0.7215777039527893, "learning_rate": 9.81583328456978e-07, "loss": 0.0007220854749903083, "memory(GiB)": 165.8, "reward": 2.389042377471924, "reward_std": 0.11981567740440369, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6494588851928711, "rewards/GeoVisalEntityMatch2ORM/std": 0.22079768776893616, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 800, "train_speed(iter/s)": 0.027064 }, { "epoch": 0.09585430146177809, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.031001984126984124, "eval_completions/max_length": 493.3154761904762, "eval_completions/mean_length": 428.1880059015183, "eval_completions/min_length": 367.5952380952381, "eval_kl": 0.8362563554020155, "eval_loss": 0.0008419748628512025, "eval_reward": 2.5194363516001475, "eval_reward_std": 0.16802316773239345, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.8850942548541796, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.15257072785780543, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.6643519626841659, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.14814393244506346, "eval_rewards/MathFormat/mean": 0.9699900819077378, "eval_rewards/MathFormat/std": 0.05119000721190657, "eval_runtime": 1793.8533, "eval_samples_per_second": 0.188, "eval_steps_per_second": 0.004, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 508.0, "completions/mean_length": 432.10418701171875, "completions/min_length": 367.0, "epoch": 0.09597411933860532, "grad_norm": 1.1625858022322624, "kl": 0.6850329041481018, "learning_rate": 9.815321685817436e-07, "loss": 0.0006868541240692139, "memory(GiB)": 165.8, "reward": 2.22265625, "reward_std": 0.15284261107444763, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4603583812713623, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6393229365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.1454007476568222, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 801, "train_speed(iter/s)": 0.025482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 429.13543701171875, "completions/min_length": 357.0, "epoch": 0.09609393721543254, "grad_norm": 1.1235603253233681, "kl": 0.7362806797027588, "learning_rate": 9.814809390831992e-07, "loss": 0.0007322977180592716, "memory(GiB)": 165.8, "reward": 2.5211806297302246, "reward_std": 0.2181631177663803, "rewards/GeoLocAccuracyV2ORM/mean": 0.7937500476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.3606463372707367, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7586805820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.2042061984539032, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 802, "train_speed(iter/s)": 0.025491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/mean_length": 437.2083435058594, "completions/min_length": 348.0, "epoch": 0.09621375509225977, "grad_norm": 1.1272750605386137, "kl": 0.6952968835830688, "learning_rate": 9.814296399687525e-07, "loss": 0.0006973048439249396, "memory(GiB)": 165.8, "reward": 2.731337070465088, "reward_std": 0.10539110004901886, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7313368320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.12621770799160004, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 803, "train_speed(iter/s)": 0.025503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 423.3020935058594, "completions/min_length": 327.0, "epoch": 0.09633357296908698, "grad_norm": 0.7823367559737072, "kl": 0.6933853626251221, "learning_rate": 9.813782712458205e-07, "loss": 0.00069427490234375, "memory(GiB)": 165.8, "reward": 2.7526042461395264, "reward_std": 0.04099676012992859, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7526041865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.2714233994483948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 804, "train_speed(iter/s)": 0.025512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 438.875, "completions/min_length": 360.0, "epoch": 0.09645339084591421, "grad_norm": 1.0346064943406164, "kl": 0.8545630276203156, "learning_rate": 9.813268329218306e-07, "loss": 0.0008429785957559943, "memory(GiB)": 165.8, "reward": 2.5229978561401367, "reward_std": 0.2644677758216858, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.290069580078125, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6521645784378052, "rewards/GeoVisalEntityMatch2ORM/std": 0.19151894748210907, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 805, "train_speed(iter/s)": 0.025521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 418.16668701171875, "completions/min_length": 344.0, "epoch": 0.09657320872274143, "grad_norm": 1.113301617584409, "kl": 0.6826164722442627, "learning_rate": 9.812753250042197e-07, "loss": 0.0006845146417617798, "memory(GiB)": 165.8, "reward": 2.607060194015503, "reward_std": 0.10207189619541168, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6070601940155029, "rewards/GeoVisalEntityMatch2ORM/std": 0.188545823097229, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 806, "train_speed(iter/s)": 0.025529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 448.47918701171875, "completions/min_length": 374.0, "epoch": 0.09669302659956866, "grad_norm": 0.9587219770832742, "kl": 0.6932081282138824, "learning_rate": 9.812237475004354e-07, "loss": 0.000694005168043077, "memory(GiB)": 165.8, "reward": 2.7329862117767334, "reward_std": 0.0828896015882492, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7329861521720886, "rewards/GeoVisalEntityMatch2ORM/std": 0.18684795498847961, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 807, "train_speed(iter/s)": 0.025538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2708333333333333, "completions/max_length": 506.0, "completions/mean_length": 431.57293701171875, "completions/min_length": 355.0, "epoch": 0.09681284447639588, "grad_norm": 1.2030876877949037, "kl": 1.9302713871002197, "learning_rate": 9.811721004179352e-07, "loss": 0.0018975536804646254, "memory(GiB)": 165.8, "reward": 2.0771701335906982, "reward_std": 0.41616618633270264, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4411657154560089, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5980035066604614, "rewards/GeoVisalEntityMatch2ORM/std": 0.2018074095249176, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657154560089, "step": 808, "train_speed(iter/s)": 0.025538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/mean_length": 455.5625, "completions/min_length": 360.0, "epoch": 0.0969326623532231, "grad_norm": 1.0713414786876176, "kl": 0.6731992065906525, "learning_rate": 9.811203837641865e-07, "loss": 0.0006744563579559326, "memory(GiB)": 165.8, "reward": 2.7221479415893555, "reward_std": 0.17747840285301208, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7471479177474976, "rewards/GeoVisalEntityMatch2ORM/std": 0.13756373524665833, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 809, "train_speed(iter/s)": 0.02555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 512.0, "completions/mean_length": 456.90625, "completions/min_length": 360.0, "epoch": 0.09705248023005032, "grad_norm": 1.093876296590763, "kl": 1.686409592628479, "learning_rate": 9.81068597546667e-07, "loss": 0.0016740411520004272, "memory(GiB)": 165.8, "reward": 2.022796630859375, "reward_std": 0.30900758504867554, "rewards/GeoLocAccuracyV2ORM/mean": 0.6041666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4915960431098938, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5644634962081909, "rewards/GeoVisalEntityMatch2ORM/std": 0.1580386608839035, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 810, "train_speed(iter/s)": 0.025559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2916666666666667, "completions/max_length": 535.0, "completions/mean_length": 456.375, "completions/min_length": 396.0, "epoch": 0.09717229810687755, "grad_norm": 0.956017574771894, "kl": 0.874984472990036, "learning_rate": 9.81016741772864e-07, "loss": 0.0008695225114934146, "memory(GiB)": 165.8, "reward": 2.1384055614471436, "reward_std": 0.3447505831718445, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.45691564679145813, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7217389345169067, "rewards/GeoVisalEntityMatch2ORM/std": 0.20447185635566711, "rewards/MathFormat/mean": 0.7083333730697632, "rewards/MathFormat/std": 0.45691564679145813, "step": 811, "train_speed(iter/s)": 0.025561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 536.0, "completions/mean_length": 454.5625, "completions/min_length": 381.0, "epoch": 0.09729211598370477, "grad_norm": 1.121452787147256, "kl": 1.4128061532974243, "learning_rate": 9.809648164502758e-07, "loss": 0.001394090591929853, "memory(GiB)": 165.8, "reward": 2.5131654739379883, "reward_std": 0.31257662177085876, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783623337745667, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6798321604728699, "rewards/GeoVisalEntityMatch2ORM/std": 0.1636219173669815, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 812, "train_speed(iter/s)": 0.02557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 506.0, "completions/mean_length": 450.7708435058594, "completions/min_length": 387.0, "epoch": 0.09741193386053199, "grad_norm": 0.9186220628248792, "kl": 1.0322541892528534, "learning_rate": 9.809128215864096e-07, "loss": 0.0010003000497817993, "memory(GiB)": 165.8, "reward": 2.5684027671813965, "reward_std": 0.44513386487960815, "rewards/GeoLocAccuracyV2ORM/mean": 0.90625, "rewards/GeoLocAccuracyV2ORM/std": 0.2930106818675995, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7350694537162781, "rewards/GeoVisalEntityMatch2ORM/std": 0.23457661271095276, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 813, "train_speed(iter/s)": 0.025579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 521.0, "completions/mean_length": 448.0833435058594, "completions/min_length": 383.0, "epoch": 0.09753175173735922, "grad_norm": 1.0397276259807549, "kl": 0.714561939239502, "learning_rate": 9.808607571887832e-07, "loss": 0.0007160256500355899, "memory(GiB)": 165.8, "reward": 2.657291889190674, "reward_std": 0.31199347972869873, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7510416507720947, "rewards/GeoVisalEntityMatch2ORM/std": 0.21295516192913055, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 814, "train_speed(iter/s)": 0.025588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2604166666666667, "completions/max_length": 506.0, "completions/mean_length": 426.57293701171875, "completions/min_length": 349.0, "epoch": 0.09765156961418643, "grad_norm": 1.071351730329565, "kl": 0.8578711748123169, "learning_rate": 9.808086232649246e-07, "loss": 0.0008552521467208862, "memory(GiB)": 165.8, "reward": 2.0762648582458496, "reward_std": 0.5219510793685913, "rewards/GeoLocAccuracyV2ORM/mean": 0.6375000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.4726743996143341, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6887649297714233, "rewards/GeoVisalEntityMatch2ORM/std": 0.16868019104003906, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 815, "train_speed(iter/s)": 0.025591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3020833333333333, "completions/max_length": 506.0, "completions/mean_length": 420.97918701171875, "completions/min_length": 354.0, "epoch": 0.09777138749101366, "grad_norm": 1.0331396600921436, "kl": 1.3894484043121338, "learning_rate": 9.807564198223716e-07, "loss": 0.0013861905317753553, "memory(GiB)": 165.8, "reward": 2.1449899673461914, "reward_std": 0.30432257056236267, "rewards/GeoLocAccuracyV2ORM/mean": 0.6645833253860474, "rewards/GeoLocAccuracyV2ORM/std": 0.46746331453323364, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7824901342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.19986814260482788, "rewards/MathFormat/mean": 0.6979166865348816, "rewards/MathFormat/std": 0.46157151460647583, "step": 816, "train_speed(iter/s)": 0.025591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 480.0, "completions/mean_length": 409.72918701171875, "completions/min_length": 320.0, "epoch": 0.09789120536784088, "grad_norm": 1.1112404104979325, "kl": 0.8214714825153351, "learning_rate": 9.807041468686723e-07, "loss": 0.0008166333427652717, "memory(GiB)": 165.8, "reward": 2.472135543823242, "reward_std": 0.3655180335044861, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904892921448, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6908854246139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.21510985493659973, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 817, "train_speed(iter/s)": 0.025593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 411.5520935058594, "completions/min_length": 296.0, "epoch": 0.09801102324466811, "grad_norm": 0.9449795894953811, "kl": 0.6951142251491547, "learning_rate": 9.806518044113843e-07, "loss": 0.0006976574659347534, "memory(GiB)": 165.8, "reward": 2.6581597328186035, "reward_std": 0.08165587484836578, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6581597328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.1971803456544876, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 818, "train_speed(iter/s)": 0.025602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 391.0, "completions/min_length": 316.0, "epoch": 0.09813084112149532, "grad_norm": 1.1143144574424837, "kl": 0.7021368443965912, "learning_rate": 9.805993924580762e-07, "loss": 0.0007035434246063232, "memory(GiB)": 165.8, "reward": 2.5809895992279053, "reward_std": 0.11142076551914215, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5809895992279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.20181740820407867, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 819, "train_speed(iter/s)": 0.025614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 374.88543701171875, "completions/min_length": 291.0, "epoch": 0.09825065899832255, "grad_norm": 1.1515394381669835, "kl": 0.7428059875965118, "learning_rate": 9.805469110163257e-07, "loss": 0.0007452405989170074, "memory(GiB)": 165.8, "reward": 2.6174681186676025, "reward_std": 0.09491997957229614, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.617467999458313, "rewards/GeoVisalEntityMatch2ORM/std": 0.11421800404787064, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 820, "train_speed(iter/s)": 0.025617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 366.54168701171875, "completions/min_length": 272.0, "epoch": 0.09837047687514977, "grad_norm": 1.1875550255199268, "kl": 0.7035399377346039, "learning_rate": 9.80494360093721e-07, "loss": 0.0007047305698506534, "memory(GiB)": 165.8, "reward": 2.522001266479492, "reward_std": 0.10778084397315979, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5220012664794922, "rewards/GeoVisalEntityMatch2ORM/std": 0.15154528617858887, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 821, "train_speed(iter/s)": 0.025626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 383.2083435058594, "completions/min_length": 323.0, "epoch": 0.098490294751977, "grad_norm": 1.090506874423112, "kl": 0.7432008385658264, "learning_rate": 9.804417396978604e-07, "loss": 0.0007457336178049445, "memory(GiB)": 165.8, "reward": 2.4906249046325684, "reward_std": 0.18040135502815247, "rewards/GeoLocAccuracyV2ORM/mean": 0.8229166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3837430775165558, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6677083969116211, "rewards/GeoVisalEntityMatch2ORM/std": 0.10605578869581223, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 822, "train_speed(iter/s)": 0.025635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 371.79168701171875, "completions/min_length": 312.0, "epoch": 0.09861011262880422, "grad_norm": 1.2222750121406332, "kl": 0.7163333296775818, "learning_rate": 9.80389049836352e-07, "loss": 0.0007175853243097663, "memory(GiB)": 165.8, "reward": 2.597743034362793, "reward_std": 0.0993753969669342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.597743034362793, "rewards/GeoVisalEntityMatch2ORM/std": 0.21232809126377106, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 823, "train_speed(iter/s)": 0.025647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 356.2708435058594, "completions/min_length": 299.0, "epoch": 0.09872993050563145, "grad_norm": 1.1544372685285256, "kl": 0.70990189909935, "learning_rate": 9.80336290516814e-07, "loss": 0.0007113019819371402, "memory(GiB)": 165.8, "reward": 2.772001266479492, "reward_std": 0.10226041078567505, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7720012664794922, "rewards/GeoVisalEntityMatch2ORM/std": 0.18812502920627594, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 824, "train_speed(iter/s)": 0.025656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 375.13543701171875, "completions/min_length": 308.0, "epoch": 0.09884974838245866, "grad_norm": 1.1971505769586956, "kl": 0.7600668370723724, "learning_rate": 9.802834617468751e-07, "loss": 0.000761866569519043, "memory(GiB)": 165.8, "reward": 2.60845947265625, "reward_std": 0.12147989869117737, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6167929172515869, "rewards/GeoVisalEntityMatch2ORM/std": 0.12579919397830963, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 825, "train_speed(iter/s)": 0.025658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/mean_length": 355.60418701171875, "completions/min_length": 295.0, "epoch": 0.09896956625928588, "grad_norm": 1.2410467634270383, "kl": 0.721316784620285, "learning_rate": 9.802305635341732e-07, "loss": 0.000723252713214606, "memory(GiB)": 165.8, "reward": 2.6315884590148926, "reward_std": 0.10310249030590057, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.631588339805603, "rewards/GeoVisalEntityMatch2ORM/std": 0.22262974083423615, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 826, "train_speed(iter/s)": 0.02567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/mean_length": 347.0, "completions/min_length": 294.0, "epoch": 0.09908938413611311, "grad_norm": 1.3527101387504397, "kl": 0.7340156137943268, "learning_rate": 9.80177595886357e-07, "loss": 0.0007359354640357196, "memory(GiB)": 165.8, "reward": 2.5256946086883545, "reward_std": 0.14238062500953674, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7173611521720886, "rewards/GeoVisalEntityMatch2ORM/std": 0.15408504009246826, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 827, "train_speed(iter/s)": 0.025678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 349.5520935058594, "completions/min_length": 273.0, "epoch": 0.09920920201294033, "grad_norm": 1.2058573202263487, "kl": 0.7407042980194092, "learning_rate": 9.801245588110847e-07, "loss": 0.0007422516937367618, "memory(GiB)": 165.8, "reward": 2.566145896911621, "reward_std": 0.09019321948289871, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5661458373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.18128982186317444, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 828, "train_speed(iter/s)": 0.025689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 409.0, "completions/mean_length": 354.5833435058594, "completions/min_length": 274.0, "epoch": 0.09932901988976756, "grad_norm": 1.1711008472308493, "kl": 0.8042263388633728, "learning_rate": 9.80071452316025e-07, "loss": 0.0008048142190091312, "memory(GiB)": 165.8, "reward": 1.8759260177612305, "reward_std": 0.3386938273906708, "rewards/GeoLocAccuracyV2ORM/mean": 0.4166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4955946207046509, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5530092716217041, "rewards/GeoVisalEntityMatch2ORM/std": 0.2176755964756012, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 829, "train_speed(iter/s)": 0.025689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 347.1145935058594, "completions/min_length": 286.0, "epoch": 0.09944883776659477, "grad_norm": 1.1814037322820445, "kl": 0.7506916522979736, "learning_rate": 9.800182764088562e-07, "loss": 0.0007527495617978275, "memory(GiB)": 165.8, "reward": 2.739806652069092, "reward_std": 0.10919896513223648, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7481398582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.15492884814739227, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 830, "train_speed(iter/s)": 0.025698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 360.6770935058594, "completions/min_length": 291.0, "epoch": 0.099568655643422, "grad_norm": 1.2712343896257894, "kl": 0.7397331297397614, "learning_rate": 9.79965031097267e-07, "loss": 0.0007411142578348517, "memory(GiB)": 165.8, "reward": 2.6091601848602295, "reward_std": 0.15275657176971436, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6258267164230347, "rewards/GeoVisalEntityMatch2ORM/std": 0.17553098499774933, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 831, "train_speed(iter/s)": 0.025708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/mean_length": 327.3958435058594, "completions/min_length": 286.0, "epoch": 0.09968847352024922, "grad_norm": 1.0636830902309398, "kl": 0.7684681117534637, "learning_rate": 9.79911716388956e-07, "loss": 0.0007697194814682007, "memory(GiB)": 165.8, "reward": 2.75390625, "reward_std": 0.10653422772884369, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.75390625, "rewards/GeoVisalEntityMatch2ORM/std": 0.2639442980289459, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 832, "train_speed(iter/s)": 0.025717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/mean_length": 350.8020935058594, "completions/min_length": 306.0, "epoch": 0.09980829139707645, "grad_norm": 1.1419664996550989, "kl": 0.8112591803073883, "learning_rate": 9.798583322916317e-07, "loss": 0.0008125851745717227, "memory(GiB)": 165.8, "reward": 2.382291793823242, "reward_std": 0.16101640462875366, "rewards/GeoLocAccuracyV2ORM/mean": 0.7166666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.4454841613769531, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6656249761581421, "rewards/GeoVisalEntityMatch2ORM/std": 0.1278252899646759, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 833, "train_speed(iter/s)": 0.025726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 355.2395935058594, "completions/min_length": 273.0, "epoch": 0.09992810927390366, "grad_norm": 1.3542092977952918, "kl": 0.7498005032539368, "learning_rate": 9.798048788130128e-07, "loss": 0.0007513314485549927, "memory(GiB)": 165.8, "reward": 2.7533481121063232, "reward_std": 0.08693189918994904, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7533482313156128, "rewards/GeoVisalEntityMatch2ORM/std": 0.1669505536556244, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 834, "train_speed(iter/s)": 0.025735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 357.875, "completions/min_length": 311.0, "epoch": 0.1000479271507309, "grad_norm": 1.130927428044508, "kl": 0.7357676923274994, "learning_rate": 9.797513559608278e-07, "loss": 0.0007375392015092075, "memory(GiB)": 165.8, "reward": 2.539583683013916, "reward_std": 0.1200723722577095, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5395833253860474, "rewards/GeoVisalEntityMatch2ORM/std": 0.17870460450649261, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 835, "train_speed(iter/s)": 0.025745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/mean_length": 371.1770935058594, "completions/min_length": 295.0, "epoch": 0.10016774502755811, "grad_norm": 1.2402745477509407, "kl": 0.7650399208068848, "learning_rate": 9.796977637428157e-07, "loss": 0.0007672850042581558, "memory(GiB)": 165.8, "reward": 2.5843875408172607, "reward_std": 0.0913354828953743, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5843874216079712, "rewards/GeoVisalEntityMatch2ORM/std": 0.1534428745508194, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 836, "train_speed(iter/s)": 0.025753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 383.90625, "completions/min_length": 323.0, "epoch": 0.10028756290438534, "grad_norm": 1.1375024826175484, "kl": 0.7484368681907654, "learning_rate": 9.796441021667253e-07, "loss": 0.0007503802771680057, "memory(GiB)": 165.8, "reward": 2.576242446899414, "reward_std": 0.08307468891143799, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5762423872947693, "rewards/GeoVisalEntityMatch2ORM/std": 0.13377436995506287, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 837, "train_speed(iter/s)": 0.025762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 365.97918701171875, "completions/min_length": 312.0, "epoch": 0.10040738078121256, "grad_norm": 1.2288437517952693, "kl": 0.7880021929740906, "learning_rate": 9.79590371240315e-07, "loss": 0.0007894064183346927, "memory(GiB)": 165.8, "reward": 2.061201572418213, "reward_std": 0.09900767356157303, "rewards/GeoLocAccuracyV2ORM/mean": 0.49166667461395264, "rewards/GeoLocAccuracyV2ORM/std": 0.5008764266967773, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5695346593856812, "rewards/GeoVisalEntityMatch2ORM/std": 0.21445970237255096, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 838, "train_speed(iter/s)": 0.025771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 347.4895935058594, "completions/min_length": 300.0, "epoch": 0.10052719865803977, "grad_norm": 1.0594727579927143, "kl": 0.7818737924098969, "learning_rate": 9.795365709713539e-07, "loss": 0.0007836645236238837, "memory(GiB)": 165.8, "reward": 2.585317611694336, "reward_std": 0.07062099128961563, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5853174924850464, "rewards/GeoVisalEntityMatch2ORM/std": 0.15011490881443024, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 839, "train_speed(iter/s)": 0.025773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 339.65625, "completions/min_length": 277.0, "epoch": 0.100647016534867, "grad_norm": 0.9094119472008437, "kl": 0.7857533991336823, "learning_rate": 9.794827013676205e-07, "loss": 0.000786186195909977, "memory(GiB)": 165.8, "reward": 2.819643020629883, "reward_std": 0.07657043635845184, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8196429014205933, "rewards/GeoVisalEntityMatch2ORM/std": 0.16298219561576843, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 840, "train_speed(iter/s)": 0.025781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 419.0, "completions/mean_length": 370.29168701171875, "completions/min_length": 320.0, "epoch": 0.10076683441169422, "grad_norm": 1.2294581128926247, "kl": 0.7600698471069336, "learning_rate": 9.79428762436904e-07, "loss": 0.0007608483429066837, "memory(GiB)": 165.8, "reward": 2.444436550140381, "reward_std": 0.21422284841537476, "rewards/GeoLocAccuracyV2ORM/mean": 0.731249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4437430500984192, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7236032485961914, "rewards/GeoVisalEntityMatch2ORM/std": 0.19569151103496552, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 841, "train_speed(iter/s)": 0.025783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 370.69793701171875, "completions/min_length": 291.0, "epoch": 0.10088665228852145, "grad_norm": 1.2268835277573786, "kl": 0.7935673594474792, "learning_rate": 9.793747541870033e-07, "loss": 0.000796159147284925, "memory(GiB)": 165.8, "reward": 2.710069417953491, "reward_std": 0.08573868870735168, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7100695371627808, "rewards/GeoVisalEntityMatch2ORM/std": 0.19490639865398407, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 842, "train_speed(iter/s)": 0.025791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 370.22918701171875, "completions/min_length": 311.0, "epoch": 0.10100647016534867, "grad_norm": 1.28011986497095, "kl": 0.7749982178211212, "learning_rate": 9.793206766257269e-07, "loss": 0.0007760673761367798, "memory(GiB)": 165.8, "reward": 2.240231990814209, "reward_std": 0.15741325914859772, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3964757025241852, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5735653638839722, "rewards/GeoVisalEntityMatch2ORM/std": 0.1915847808122635, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 843, "train_speed(iter/s)": 0.0258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 368.1770935058594, "completions/min_length": 309.0, "epoch": 0.1011262880421759, "grad_norm": 1.003898006734613, "kl": 0.7679952085018158, "learning_rate": 9.79266529760894e-07, "loss": 0.0007700572605244815, "memory(GiB)": 165.8, "reward": 2.7788772583007812, "reward_std": 0.0625481903553009, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7788773775100708, "rewards/GeoVisalEntityMatch2ORM/std": 0.1336251050233841, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 844, "train_speed(iter/s)": 0.025808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 381.0520935058594, "completions/min_length": 315.0, "epoch": 0.10124610591900311, "grad_norm": 1.161138209468862, "kl": 0.8041879236698151, "learning_rate": 9.792123136003335e-07, "loss": 0.0008058436214923859, "memory(GiB)": 165.8, "reward": 2.532440662384033, "reward_std": 0.19883227348327637, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.665773868560791, "rewards/GeoVisalEntityMatch2ORM/std": 0.17164123058319092, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 845, "train_speed(iter/s)": 0.025818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 375.19793701171875, "completions/min_length": 332.0, "epoch": 0.10136592379583034, "grad_norm": 1.1773281143320793, "kl": 0.7643585503101349, "learning_rate": 9.791580281518843e-07, "loss": 0.0007658278336748481, "memory(GiB)": 165.8, "reward": 2.75644850730896, "reward_std": 0.09918489307165146, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7564484477043152, "rewards/GeoVisalEntityMatch2ORM/std": 0.15832576155662537, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 846, "train_speed(iter/s)": 0.02583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 390.625, "completions/min_length": 310.0, "epoch": 0.10148574167265756, "grad_norm": 1.1283844489991948, "kl": 0.7639819085597992, "learning_rate": 9.791036734233953e-07, "loss": 0.0007656216621398926, "memory(GiB)": 165.8, "reward": 2.4338128566741943, "reward_std": 0.13769873976707458, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321255087852478, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6817295551300049, "rewards/GeoVisalEntityMatch2ORM/std": 0.15905296802520752, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 847, "train_speed(iter/s)": 0.025839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 372.9583435058594, "completions/min_length": 295.0, "epoch": 0.10160555954948479, "grad_norm": 1.229654467447444, "kl": 0.7834371030330658, "learning_rate": 9.790492494227257e-07, "loss": 0.0007849037647247314, "memory(GiB)": 165.8, "reward": 2.5951390266418457, "reward_std": 0.1882781684398651, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7701389193534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.22572709619998932, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 848, "train_speed(iter/s)": 0.025847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 391.22918701171875, "completions/min_length": 320.0, "epoch": 0.101725377426312, "grad_norm": 1.2610396621781106, "kl": 0.7914490699768066, "learning_rate": 9.789947561577443e-07, "loss": 0.0007920364732854068, "memory(GiB)": 165.8, "reward": 2.418849229812622, "reward_std": 0.2486235350370407, "rewards/GeoLocAccuracyV2ORM/mean": 0.8479167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.3562906086444855, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5709325671195984, "rewards/GeoVisalEntityMatch2ORM/std": 0.2594204246997833, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 849, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 413.2395935058594, "completions/min_length": 348.0, "epoch": 0.10184519530313922, "grad_norm": 1.0505975797883331, "kl": 0.7452861964702606, "learning_rate": 9.789401936363304e-07, "loss": 0.0007473801961168647, "memory(GiB)": 165.8, "reward": 2.722966194152832, "reward_std": 0.1529253125190735, "rewards/GeoLocAccuracyV2ORM/mean": 0.9541667699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.19783920049667358, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7687996625900269, "rewards/GeoVisalEntityMatch2ORM/std": 0.16647088527679443, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 850, "train_speed(iter/s)": 0.025865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 428.2395935058594, "completions/min_length": 376.0, "epoch": 0.10196501317996645, "grad_norm": 1.0967773099946898, "kl": 0.8012511730194092, "learning_rate": 9.788855618663727e-07, "loss": 0.0008028782904148102, "memory(GiB)": 165.8, "reward": 2.5563244819641113, "reward_std": 0.11527213454246521, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5563244819641113, "rewards/GeoVisalEntityMatch2ORM/std": 0.14050860702991486, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 851, "train_speed(iter/s)": 0.025873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 522.0, "completions/mean_length": 433.1458435058594, "completions/min_length": 374.0, "epoch": 0.10208483105679367, "grad_norm": 1.1705121442963422, "kl": 0.8180438578128815, "learning_rate": 9.788308608557705e-07, "loss": 0.0008199227740988135, "memory(GiB)": 165.8, "reward": 2.6420304775238037, "reward_std": 0.09501350671052933, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6420304179191589, "rewards/GeoVisalEntityMatch2ORM/std": 0.19899322092533112, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 852, "train_speed(iter/s)": 0.025881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 551.0, "completions/mean_length": 450.25, "completions/min_length": 389.0, "epoch": 0.1022046489336209, "grad_norm": 1.0146844246057716, "kl": 0.7608224153518677, "learning_rate": 9.787760906124327e-07, "loss": 0.0007631704211235046, "memory(GiB)": 165.8, "reward": 2.6822509765625, "reward_std": 0.10463506728410721, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7072510719299316, "rewards/GeoVisalEntityMatch2ORM/std": 0.18727213144302368, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 853, "train_speed(iter/s)": 0.02589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 547.0, "completions/mean_length": 463.79168701171875, "completions/min_length": 383.0, "epoch": 0.10232446681044811, "grad_norm": 1.0523116756753472, "kl": 0.7351088523864746, "learning_rate": 9.787212511442782e-07, "loss": 0.000736268877517432, "memory(GiB)": 165.8, "reward": 2.848263740539551, "reward_std": 0.1565890908241272, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8690972328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.14551576972007751, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 854, "train_speed(iter/s)": 0.025898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 529.0, "completions/mean_length": 464.38543701171875, "completions/min_length": 391.0, "epoch": 0.10244428468727534, "grad_norm": 1.0958187401334525, "kl": 0.7272173166275024, "learning_rate": 9.786663424592364e-07, "loss": 0.0007279167766682804, "memory(GiB)": 165.8, "reward": 2.5745534896850586, "reward_std": 0.4657207727432251, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.34396424889564514, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8453869223594666, "rewards/GeoVisalEntityMatch2ORM/std": 0.20057912170886993, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 855, "train_speed(iter/s)": 0.025904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 530.0, "completions/mean_length": 462.90625, "completions/min_length": 402.0, "epoch": 0.10256410256410256, "grad_norm": 1.0721734532091292, "kl": 0.7745400965213776, "learning_rate": 9.786113645652464e-07, "loss": 0.0007746468181721866, "memory(GiB)": 165.8, "reward": 2.5760416984558105, "reward_std": 0.5021592974662781, "rewards/GeoLocAccuracyV2ORM/mean": 0.8604166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3410445749759674, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8302083015441895, "rewards/GeoVisalEntityMatch2ORM/std": 0.1433376520872116, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904892921448, "step": 856, "train_speed(iter/s)": 0.025912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 541.0, "completions/mean_length": 474.28125, "completions/min_length": 397.0, "epoch": 0.10268392044092979, "grad_norm": 0.9665992226971285, "kl": 0.760211318731308, "learning_rate": 9.78556317470257e-07, "loss": 0.0007604969432577491, "memory(GiB)": 165.8, "reward": 2.600074529647827, "reward_std": 0.3454824686050415, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336146235466003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7042410373687744, "rewards/GeoVisalEntityMatch2ORM/std": 0.22132734954357147, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336146235466003, "step": 857, "train_speed(iter/s)": 0.02592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 534.0, "completions/mean_length": 463.15625, "completions/min_length": 412.0, "epoch": 0.102803738317757, "grad_norm": 1.1170159119375567, "kl": 0.8316424489021301, "learning_rate": 9.785012011822278e-07, "loss": 0.0008294681902043521, "memory(GiB)": 165.8, "reward": 2.4883761405944824, "reward_std": 0.41075998544692993, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6550426483154297, "rewards/GeoVisalEntityMatch2ORM/std": 0.18842633068561554, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 858, "train_speed(iter/s)": 0.025928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 521.0, "completions/mean_length": 457.63543701171875, "completions/min_length": 375.0, "epoch": 0.10292355619458424, "grad_norm": 1.0533237215130817, "kl": 0.7590121328830719, "learning_rate": 9.784460157091273e-07, "loss": 0.0007606595754623413, "memory(GiB)": 165.8, "reward": 2.6678314208984375, "reward_std": 0.1662083864212036, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6886649131774902, "rewards/GeoVisalEntityMatch2ORM/std": 0.14767217636108398, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 859, "train_speed(iter/s)": 0.025936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 565.0, "completions/mean_length": 454.71875, "completions/min_length": 379.0, "epoch": 0.10304337407141145, "grad_norm": 1.1526191367361935, "kl": 0.7351888716220856, "learning_rate": 9.783907610589347e-07, "loss": 0.0007386729121208191, "memory(GiB)": 165.8, "reward": 2.576507568359375, "reward_std": 0.06457961350679398, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5765072703361511, "rewards/GeoVisalEntityMatch2ORM/std": 0.20158950984477997, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 860, "train_speed(iter/s)": 0.025945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 440.9375, "completions/min_length": 367.0, "epoch": 0.10316319194823868, "grad_norm": 1.1849491879914513, "kl": 0.7981425821781158, "learning_rate": 9.783354372396397e-07, "loss": 0.0007972369785420597, "memory(GiB)": 165.8, "reward": 2.340898036956787, "reward_std": 0.19011223316192627, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6117311716079712, "rewards/GeoVisalEntityMatch2ORM/std": 0.24367943406105042, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 861, "train_speed(iter/s)": 0.025953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 415.0625, "completions/min_length": 338.0, "epoch": 0.1032830098250659, "grad_norm": 0.9468390968575284, "kl": 0.7355070412158966, "learning_rate": 9.782800442592406e-07, "loss": 0.0007367804646492004, "memory(GiB)": 165.8, "reward": 2.7677953243255615, "reward_std": 0.08433466404676437, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7677950859069824, "rewards/GeoVisalEntityMatch2ORM/std": 0.2363019734621048, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 862, "train_speed(iter/s)": 0.025961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 469.0, "completions/mean_length": 396.0520935058594, "completions/min_length": 335.0, "epoch": 0.10340282770189312, "grad_norm": 1.1670514625439679, "kl": 0.9366286098957062, "learning_rate": 9.782245821257472e-07, "loss": 0.0009357258677482605, "memory(GiB)": 165.8, "reward": 2.254253387451172, "reward_std": 0.3960443139076233, "rewards/GeoLocAccuracyV2ORM/mean": 0.6770833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4700457453727722, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7646702527999878, "rewards/GeoVisalEntityMatch2ORM/std": 0.23003222048282623, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236128330230713, "step": 863, "train_speed(iter/s)": 0.025961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.22916666666666666, "completions/max_length": 506.0, "completions/mean_length": 412.79168701171875, "completions/min_length": 342.0, "epoch": 0.10352264557872035, "grad_norm": 1.2580191466638808, "kl": 1.905872881412506, "learning_rate": 9.78169050847178e-07, "loss": 0.0018907090416178107, "memory(GiB)": 165.8, "reward": 2.276808500289917, "reward_std": 0.31078100204467773, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4225029945373535, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7351416349411011, "rewards/GeoVisalEntityMatch2ORM/std": 0.1401897668838501, "rewards/MathFormat/mean": 0.7708333730697632, "rewards/MathFormat/std": 0.4225029945373535, "step": 864, "train_speed(iter/s)": 0.025962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 431.07293701171875, "completions/min_length": 371.0, "epoch": 0.10364246345554756, "grad_norm": 1.1398148166182356, "kl": 0.7372539043426514, "learning_rate": 9.781134504315625e-07, "loss": 0.000738970935344696, "memory(GiB)": 165.8, "reward": 2.820535659790039, "reward_std": 0.10342353582382202, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8205357789993286, "rewards/GeoVisalEntityMatch2ORM/std": 0.17020630836486816, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 865, "train_speed(iter/s)": 0.025971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 422.69793701171875, "completions/min_length": 352.0, "epoch": 0.10376228133237479, "grad_norm": 1.1485902681729774, "kl": 0.7589841485023499, "learning_rate": 9.780577808869398e-07, "loss": 0.0007595146889798343, "memory(GiB)": 165.8, "reward": 2.597966194152832, "reward_std": 0.084324911236763, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5979663133621216, "rewards/GeoVisalEntityMatch2ORM/std": 0.16055718064308167, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 866, "train_speed(iter/s)": 0.02598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 502.0, "completions/mean_length": 421.7395935058594, "completions/min_length": 368.0, "epoch": 0.10388209920920201, "grad_norm": 1.1783687071940923, "kl": 1.3886381089687347, "learning_rate": 9.78002042221359e-07, "loss": 0.0013895630836486816, "memory(GiB)": 165.8, "reward": 1.930609107017517, "reward_std": 0.1454927623271942, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4476057291030884, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47227567434310913, "rewards/GeoVisalEntityMatch2ORM/std": 0.2399984747171402, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 867, "train_speed(iter/s)": 0.025979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 466.0, "completions/mean_length": 399.29168701171875, "completions/min_length": 361.0, "epoch": 0.10400191708602924, "grad_norm": 1.1369172343549636, "kl": 0.784021258354187, "learning_rate": 9.779462344428788e-07, "loss": 0.0007845833897590637, "memory(GiB)": 165.8, "reward": 2.116933822631836, "reward_std": 0.09585012495517731, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6169337630271912, "rewards/GeoVisalEntityMatch2ORM/std": 0.2567245066165924, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 868, "train_speed(iter/s)": 0.025978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 407.9375, "completions/min_length": 346.0, "epoch": 0.10412173496285645, "grad_norm": 1.2444499099115875, "kl": 0.8188822567462921, "learning_rate": 9.778903575595684e-07, "loss": 0.0008208503713831306, "memory(GiB)": 165.8, "reward": 2.4369213581085205, "reward_std": 0.13281631469726562, "rewards/GeoLocAccuracyV2ORM/mean": 0.8020833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3789123594760895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6348379850387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.1379193663597107, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 869, "train_speed(iter/s)": 0.025987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 421.8333435058594, "completions/min_length": 348.0, "epoch": 0.10424155283968368, "grad_norm": 1.1465420058343645, "kl": 0.8035299777984619, "learning_rate": 9.778344115795073e-07, "loss": 0.0008056983351707458, "memory(GiB)": 165.8, "reward": 2.6680870056152344, "reward_std": 0.08703476190567017, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6680871844291687, "rewards/GeoVisalEntityMatch2ORM/std": 0.11947621405124664, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 870, "train_speed(iter/s)": 0.025995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 439.29168701171875, "completions/min_length": 364.0, "epoch": 0.1043613707165109, "grad_norm": 1.1700916805727917, "kl": 0.9180891811847687, "learning_rate": 9.777783965107843e-07, "loss": 0.0009089845116250217, "memory(GiB)": 165.8, "reward": 2.4649484157562256, "reward_std": 0.3954375982284546, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3332982659339905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6586983799934387, "rewards/GeoVisalEntityMatch2ORM/std": 0.15700861811637878, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 871, "train_speed(iter/s)": 0.026003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 433.65625, "completions/min_length": 349.0, "epoch": 0.10448118859333813, "grad_norm": 1.103274517385541, "kl": 0.9001239836215973, "learning_rate": 9.777223123614984e-07, "loss": 0.0008932898635976017, "memory(GiB)": 165.8, "reward": 2.472931385040283, "reward_std": 0.26080232858657837, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.29575207829475403, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6354312896728516, "rewards/GeoVisalEntityMatch2ORM/std": 0.16585290431976318, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 872, "train_speed(iter/s)": 0.026012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 446.71875, "completions/min_length": 396.0, "epoch": 0.10460100647016535, "grad_norm": 1.162095786280302, "kl": 0.8348291516304016, "learning_rate": 9.776661591397589e-07, "loss": 0.0008321454515680671, "memory(GiB)": 165.8, "reward": 2.7012031078338623, "reward_std": 0.23755553364753723, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7428695559501648, "rewards/GeoVisalEntityMatch2ORM/std": 0.17724153399467468, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 873, "train_speed(iter/s)": 0.02602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 502.0, "completions/mean_length": 429.53125, "completions/min_length": 358.0, "epoch": 0.10472082434699258, "grad_norm": 1.1533576774677285, "kl": 1.045291006565094, "learning_rate": 9.776099368536843e-07, "loss": 0.0010443329811096191, "memory(GiB)": 165.8, "reward": 2.3418898582458496, "reward_std": 0.21139968931674957, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4191417694091797, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7793898582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.11557677388191223, "rewards/MathFormat/mean": 0.8020833730697632, "rewards/MathFormat/std": 0.4005205035209656, "step": 874, "train_speed(iter/s)": 0.026023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 437.85418701171875, "completions/min_length": 370.0, "epoch": 0.1048406422238198, "grad_norm": 1.1684825426012597, "kl": 0.7519728243350983, "learning_rate": 9.775536455114042e-07, "loss": 0.0007543737883679569, "memory(GiB)": 165.8, "reward": 2.687319755554199, "reward_std": 0.11770865321159363, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6873196959495544, "rewards/GeoVisalEntityMatch2ORM/std": 0.1682404726743698, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 875, "train_speed(iter/s)": 0.026031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 519.0, "completions/mean_length": 442.79168701171875, "completions/min_length": 373.0, "epoch": 0.10496046010064701, "grad_norm": 0.5131456326364144, "kl": 0.7923764288425446, "learning_rate": 9.77497285121057e-07, "loss": 0.0007905041566118598, "memory(GiB)": 165.8, "reward": 2.549999952316284, "reward_std": 0.21289923787117004, "rewards/GeoLocAccuracyV2ORM/mean": 0.887499988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.3149770200252533, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7250000834465027, "rewards/GeoVisalEntityMatch2ORM/std": 0.18524521589279175, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 876, "train_speed(iter/s)": 0.026039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 422.03125, "completions/min_length": 340.0, "epoch": 0.10508027797747424, "grad_norm": 1.1786589228197186, "kl": 0.7183233797550201, "learning_rate": 9.774408556907922e-07, "loss": 0.0007183998823165894, "memory(GiB)": 165.8, "reward": 2.6950087547302246, "reward_std": 0.1883234679698944, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7241753935813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.1877981275320053, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 877, "train_speed(iter/s)": 0.026047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3020833333333333, "completions/max_length": 506.0, "completions/mean_length": 425.875, "completions/min_length": 367.0, "epoch": 0.10520009585430146, "grad_norm": 1.2704759307835176, "kl": 0.9915647804737091, "learning_rate": 9.773843572287686e-07, "loss": 0.0009871001821011305, "memory(GiB)": 165.8, "reward": 1.941232681274414, "reward_std": 0.37711015343666077, "rewards/GeoLocAccuracyV2ORM/mean": 0.6395833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4702696204185486, "rewards/GeoVisalEntityMatch2ORM/mean": 0.603732705116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.18297362327575684, "rewards/MathFormat/mean": 0.6979166865348816, "rewards/MathFormat/std": 0.46157151460647583, "step": 878, "train_speed(iter/s)": 0.026047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/mean_length": 435.8958435058594, "completions/min_length": 370.0, "epoch": 0.10531991373112869, "grad_norm": 1.1209959098250803, "kl": 0.7685617804527283, "learning_rate": 9.77327789743155e-07, "loss": 0.0007705366006121039, "memory(GiB)": 165.8, "reward": 2.404745578765869, "reward_std": 0.13671928644180298, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6047453880310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.20089463889598846, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 879, "train_speed(iter/s)": 0.026055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 506.0, "completions/mean_length": 433.41668701171875, "completions/min_length": 367.0, "epoch": 0.1054397316079559, "grad_norm": 1.1831972870384921, "kl": 1.024651825428009, "learning_rate": 9.772711532421307e-07, "loss": 0.0010177170624956489, "memory(GiB)": 165.8, "reward": 1.8640873432159424, "reward_std": 0.40407323837280273, "rewards/GeoLocAccuracyV2ORM/mean": 0.47291669249534607, "rewards/GeoLocAccuracyV2ORM/std": 0.4900009036064148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6099206805229187, "rewards/GeoVisalEntityMatch2ORM/std": 0.13498559594154358, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 880, "train_speed(iter/s)": 0.026057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 429.38543701171875, "completions/min_length": 361.0, "epoch": 0.10555954948478313, "grad_norm": 1.1379322842523012, "kl": 0.7472483813762665, "learning_rate": 9.77214447733884e-07, "loss": 0.0007498363847844303, "memory(GiB)": 165.8, "reward": 2.724785327911377, "reward_std": 0.08333798497915268, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7247850298881531, "rewards/GeoVisalEntityMatch2ORM/std": 0.1627579778432846, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 881, "train_speed(iter/s)": 0.026065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 497.0, "completions/mean_length": 422.60418701171875, "completions/min_length": 369.0, "epoch": 0.10567936736161035, "grad_norm": 1.291459429035887, "kl": 0.838200181722641, "learning_rate": 9.771576732266144e-07, "loss": 0.0008378314669243991, "memory(GiB)": 165.8, "reward": 2.3245785236358643, "reward_std": 0.38396310806274414, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824830532073975, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6995784640312195, "rewards/GeoVisalEntityMatch2ORM/std": 0.1547655612230301, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 882, "train_speed(iter/s)": 0.026067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 502.0, "completions/mean_length": 432.94793701171875, "completions/min_length": 358.0, "epoch": 0.10579918523843758, "grad_norm": 0.9645250629084166, "kl": 0.7914374768733978, "learning_rate": 9.771008297285306e-07, "loss": 0.0007888749241828918, "memory(GiB)": 165.8, "reward": 2.500347137451172, "reward_std": 0.23437707126140594, "rewards/GeoLocAccuracyV2ORM/mean": 0.8812500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.30377015471458435, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6711806058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.18703339993953705, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 883, "train_speed(iter/s)": 0.026073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 410.4375, "completions/min_length": 348.0, "epoch": 0.1059190031152648, "grad_norm": 1.2467234595792436, "kl": 0.7355993092060089, "learning_rate": 9.77043917247851e-07, "loss": 0.000736946880351752, "memory(GiB)": 165.8, "reward": 2.643817901611328, "reward_std": 0.1734895408153534, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783623337745667, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7271510362625122, "rewards/GeoVisalEntityMatch2ORM/std": 0.12703508138656616, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 884, "train_speed(iter/s)": 0.026081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 481.0, "completions/mean_length": 404.26043701171875, "completions/min_length": 352.0, "epoch": 0.10603882099209203, "grad_norm": 1.1577357211981907, "kl": 0.6600209176540375, "learning_rate": 9.76986935792805e-07, "loss": 0.0006610056152567267, "memory(GiB)": 165.8, "reward": 2.081448554992676, "reward_std": 0.08127743005752563, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.781448483467102, "rewards/GeoVisalEntityMatch2ORM/std": 0.13278527557849884, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 885, "train_speed(iter/s)": 0.026081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 423.5208435058594, "completions/min_length": 350.0, "epoch": 0.10615863886891924, "grad_norm": 1.123878372751659, "kl": 0.7601026594638824, "learning_rate": 9.769298853716308e-07, "loss": 0.0007634771754965186, "memory(GiB)": 165.8, "reward": 2.708829402923584, "reward_std": 0.10061454772949219, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7254959940910339, "rewards/GeoVisalEntityMatch2ORM/std": 0.19310833513736725, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 886, "train_speed(iter/s)": 0.026089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 405.9375, "completions/min_length": 348.0, "epoch": 0.10627845674574647, "grad_norm": 1.1695139414948177, "kl": 0.7591830492019653, "learning_rate": 9.768727659925777e-07, "loss": 0.0007614443893544376, "memory(GiB)": 165.8, "reward": 2.508362293243408, "reward_std": 0.18191435933113098, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6500289440155029, "rewards/GeoVisalEntityMatch2ORM/std": 0.19788514077663422, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 887, "train_speed(iter/s)": 0.026096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 440.41668701171875, "completions/min_length": 340.0, "epoch": 0.10639827462257369, "grad_norm": 1.044605465712247, "kl": 0.7431666851043701, "learning_rate": 9.768155776639042e-07, "loss": 0.0007429967517964542, "memory(GiB)": 165.8, "reward": 2.765798568725586, "reward_std": 0.1582806557416916, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7866319417953491, "rewards/GeoVisalEntityMatch2ORM/std": 0.1929248422384262, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 888, "train_speed(iter/s)": 0.026104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/mean_length": 452.9375, "completions/min_length": 386.0, "epoch": 0.1065180924994009, "grad_norm": 1.0683920982531525, "kl": 0.7506747543811798, "learning_rate": 9.76758320393879e-07, "loss": 0.0007516841287724674, "memory(GiB)": 165.8, "reward": 2.2667930126190186, "reward_std": 0.24649745225906372, "rewards/GeoLocAccuracyV2ORM/mean": 0.625, "rewards/GeoLocAccuracyV2ORM/std": 0.4866642653942108, "rewards/GeoVisalEntityMatch2ORM/mean": 0.641792893409729, "rewards/GeoVisalEntityMatch2ORM/std": 0.1670253425836563, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 889, "train_speed(iter/s)": 0.026112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.17708333333333334, "completions/max_length": 455.0, "completions/mean_length": 405.04168701171875, "completions/min_length": 326.0, "epoch": 0.10663791037622813, "grad_norm": 1.0757408619674422, "kl": 0.8524913489818573, "learning_rate": 9.767009941907805e-07, "loss": 0.000852048397064209, "memory(GiB)": 165.8, "reward": 2.3451554775238037, "reward_std": 0.29445546865463257, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.374634325504303, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6784887313842773, "rewards/GeoVisalEntityMatch2ORM/std": 0.1254713237285614, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 890, "train_speed(iter/s)": 0.026113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 441.47918701171875, "completions/min_length": 383.0, "epoch": 0.10675772825305535, "grad_norm": 1.1029443926764935, "kl": 0.7071935534477234, "learning_rate": 9.766435990628977e-07, "loss": 0.0007100651855580509, "memory(GiB)": 165.8, "reward": 2.806349515914917, "reward_std": 0.09356614947319031, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8063492774963379, "rewards/GeoVisalEntityMatch2ORM/std": 0.12344783544540405, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 891, "train_speed(iter/s)": 0.026121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.0, "completions/mean_length": 438.22918701171875, "completions/min_length": 367.0, "epoch": 0.10687754612988258, "grad_norm": 1.1395880632716406, "kl": 0.7290302217006683, "learning_rate": 9.76586135018529e-07, "loss": 0.0007303456659428775, "memory(GiB)": 165.8, "reward": 2.5957961082458496, "reward_std": 0.15749654173851013, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6457961797714233, "rewards/GeoVisalEntityMatch2ORM/std": 0.12431767582893372, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 892, "train_speed(iter/s)": 0.02613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 446.03125, "completions/min_length": 367.0, "epoch": 0.1069973640067098, "grad_norm": 1.0749862709453435, "kl": 0.70704185962677, "learning_rate": 9.76528602065983e-07, "loss": 0.0007086396217346191, "memory(GiB)": 165.8, "reward": 2.3659722805023193, "reward_std": 0.22104808688163757, "rewards/GeoLocAccuracyV2ORM/mean": 0.7479166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.4187649190425873, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6180555820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.1496421992778778, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 893, "train_speed(iter/s)": 0.026138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 506.0, "completions/mean_length": 453.29168701171875, "completions/min_length": 376.0, "epoch": 0.10711718188353703, "grad_norm": 1.1193162815149569, "kl": 0.7856532335281372, "learning_rate": 9.764710002135782e-07, "loss": 0.0007835626602172852, "memory(GiB)": 165.8, "reward": 2.325260639190674, "reward_std": 0.2989456057548523, "rewards/GeoLocAccuracyV2ORM/mean": 0.7687500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.37312692403793335, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5877604484558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.15631425380706787, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 894, "train_speed(iter/s)": 0.026146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 434.35418701171875, "completions/min_length": 363.0, "epoch": 0.10723699976036424, "grad_norm": 1.1548690374578172, "kl": 0.7332872748374939, "learning_rate": 9.764133294696432e-07, "loss": 0.0007350345840677619, "memory(GiB)": 165.8, "reward": 2.5850017070770264, "reward_std": 0.12938714027404785, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5954184532165527, "rewards/GeoVisalEntityMatch2ORM/std": 0.20893897116184235, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 895, "train_speed(iter/s)": 0.026153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/mean_length": 437.6458435058594, "completions/min_length": 368.0, "epoch": 0.10735681763719147, "grad_norm": 1.0898190153732663, "kl": 0.6945061087608337, "learning_rate": 9.763555898425164e-07, "loss": 0.0006975780124776065, "memory(GiB)": 165.8, "reward": 2.537797689437866, "reward_std": 0.13415540754795074, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7044643759727478, "rewards/GeoVisalEntityMatch2ORM/std": 0.12059865891933441, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 896, "train_speed(iter/s)": 0.026162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 488.0, "completions/mean_length": 407.4583435058594, "completions/min_length": 323.0, "epoch": 0.10747663551401869, "grad_norm": 1.0885700590135219, "kl": 0.8554794192314148, "learning_rate": 9.76297781340546e-07, "loss": 0.0008559152483940125, "memory(GiB)": 165.8, "reward": 2.16656756401062, "reward_std": 0.12047301232814789, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321255087852478, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6540674567222595, "rewards/GeoVisalEntityMatch2ORM/std": 0.12808221578598022, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906975746154785, "step": 897, "train_speed(iter/s)": 0.026161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/mean_length": 438.8645935058594, "completions/min_length": 341.0, "epoch": 0.10759645339084592, "grad_norm": 1.1242592774479254, "kl": 0.7562251687049866, "learning_rate": 9.762399039720905e-07, "loss": 0.0007584914565086365, "memory(GiB)": 165.8, "reward": 2.515401840209961, "reward_std": 0.10502971708774567, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5237351655960083, "rewards/GeoVisalEntityMatch2ORM/std": 0.2660800814628601, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 898, "train_speed(iter/s)": 0.026169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 411.63543701171875, "completions/min_length": 316.0, "epoch": 0.10771627126767314, "grad_norm": 1.160232150142172, "kl": 0.6936668157577515, "learning_rate": 9.761819577455183e-07, "loss": 0.0006958271260373294, "memory(GiB)": 165.8, "reward": 2.5600695610046387, "reward_std": 0.1028798520565033, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5600694417953491, "rewards/GeoVisalEntityMatch2ORM/std": 0.14322316646575928, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 899, "train_speed(iter/s)": 0.026178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 427.3333435058594, "completions/min_length": 367.0, "epoch": 0.10783608914450037, "grad_norm": 1.1781658823558205, "kl": 0.705057680606842, "learning_rate": 9.761239426692076e-07, "loss": 0.0007065435638651252, "memory(GiB)": 165.8, "reward": 2.287351131439209, "reward_std": 0.21158850193023682, "rewards/GeoLocAccuracyV2ORM/mean": 0.6583333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.45691564679145813, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6290178298950195, "rewards/GeoVisalEntityMatch2ORM/std": 0.19788242876529694, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 900, "train_speed(iter/s)": 0.026186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/mean_length": 427.01043701171875, "completions/min_length": 324.0, "epoch": 0.10795590702132758, "grad_norm": 1.127572796723999, "kl": 0.7430865168571472, "learning_rate": 9.760658587515466e-07, "loss": 0.0007438796455971897, "memory(GiB)": 165.8, "reward": 2.737408399581909, "reward_std": 0.09304936230182648, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7374083995819092, "rewards/GeoVisalEntityMatch2ORM/std": 0.13180795311927795, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 901, "train_speed(iter/s)": 0.026194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 432.5833435058594, "completions/min_length": 365.0, "epoch": 0.1080757248981548, "grad_norm": 1.1322896789514822, "kl": 0.7104572057723999, "learning_rate": 9.760077060009335e-07, "loss": 0.0007122705574147403, "memory(GiB)": 165.8, "reward": 2.531770944595337, "reward_std": 0.13436897099018097, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5401042103767395, "rewards/GeoVisalEntityMatch2ORM/std": 0.14070267975330353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 902, "train_speed(iter/s)": 0.026205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 428.3020935058594, "completions/min_length": 367.0, "epoch": 0.10819554277498203, "grad_norm": 1.1451348134198183, "kl": 0.7310461401939392, "learning_rate": 9.759494844257766e-07, "loss": 0.0007317314739339054, "memory(GiB)": 165.8, "reward": 2.491666793823242, "reward_std": 0.1383146494626999, "rewards/GeoLocAccuracyV2ORM/mean": 0.7416666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.4380979835987091, "rewards/GeoVisalEntityMatch2ORM/mean": 0.75, "rewards/GeoVisalEntityMatch2ORM/std": 0.20088984072208405, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 903, "train_speed(iter/s)": 0.026212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 442.1770935058594, "completions/min_length": 373.0, "epoch": 0.10831536065180924, "grad_norm": 1.1250090270983262, "kl": 0.730003833770752, "learning_rate": 9.758911940344936e-07, "loss": 0.000731930136680603, "memory(GiB)": 165.8, "reward": 2.5196924209594727, "reward_std": 0.18927079439163208, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6113591194152832, "rewards/GeoVisalEntityMatch2ORM/std": 0.19771058857440948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 904, "train_speed(iter/s)": 0.02622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 507.0, "completions/mean_length": 454.82293701171875, "completions/min_length": 379.0, "epoch": 0.10843517852863647, "grad_norm": 1.0815993459029813, "kl": 1.0576947033405304, "learning_rate": 9.75832834835513e-07, "loss": 0.001030709594488144, "memory(GiB)": 165.8, "reward": 2.681197166442871, "reward_std": 0.3435375690460205, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087526738643646, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7645307779312134, "rewards/GeoVisalEntityMatch2ORM/std": 0.17945507168769836, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 905, "train_speed(iter/s)": 0.026228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 519.0, "completions/mean_length": 430.0208435058594, "completions/min_length": 368.0, "epoch": 0.10855499640546369, "grad_norm": 1.0064891615609826, "kl": 0.8291004300117493, "learning_rate": 9.757744068372723e-07, "loss": 0.0008300972986035049, "memory(GiB)": 165.8, "reward": 2.265277862548828, "reward_std": 0.10918395221233368, "rewards/GeoLocAccuracyV2ORM/mean": 0.7250000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.443194180727005, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7902778387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.0973128154873848, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 906, "train_speed(iter/s)": 0.026227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 541.0, "completions/mean_length": 471.16668701171875, "completions/min_length": 407.0, "epoch": 0.10867481428229092, "grad_norm": 1.0245559410054699, "kl": 1.5725467503070831, "learning_rate": 9.757159100482198e-07, "loss": 0.0015302971005439758, "memory(GiB)": 165.8, "reward": 2.2289187908172607, "reward_std": 0.5165020227432251, "rewards/GeoLocAccuracyV2ORM/mean": 0.6520833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.4267328083515167, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6914186477661133, "rewards/GeoVisalEntityMatch2ORM/std": 0.1688043773174286, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 907, "train_speed(iter/s)": 0.026234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 506.0, "completions/mean_length": 446.65625, "completions/min_length": 358.0, "epoch": 0.10879463215911814, "grad_norm": 1.2740008034496966, "kl": 2.4286906719207764, "learning_rate": 9.756573444768132e-07, "loss": 0.0023954487405717373, "memory(GiB)": 165.8, "reward": 2.006795644760132, "reward_std": 0.4488369822502136, "rewards/GeoLocAccuracyV2ORM/mean": 0.5750000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.4599771201610565, "rewards/GeoVisalEntityMatch2ORM/mean": 0.681795597076416, "rewards/GeoVisalEntityMatch2ORM/std": 0.20856308937072754, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 908, "train_speed(iter/s)": 0.026236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 552.0, "completions/mean_length": 456.53125, "completions/min_length": 373.0, "epoch": 0.10891445003594537, "grad_norm": 1.0327830464043506, "kl": 0.8578017055988312, "learning_rate": 9.755987101315205e-07, "loss": 0.0008495102520100772, "memory(GiB)": 165.8, "reward": 2.569878578186035, "reward_std": 0.3194178342819214, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6532118320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.07997048646211624, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 909, "train_speed(iter/s)": 0.026243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 520.0, "completions/mean_length": 474.7708435058594, "completions/min_length": 420.0, "epoch": 0.10903426791277258, "grad_norm": 1.160763297005236, "kl": 1.143745481967926, "learning_rate": 9.755400070208192e-07, "loss": 0.0011239474406465888, "memory(GiB)": 165.8, "reward": 2.3226547241210938, "reward_std": 0.5935933589935303, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6143213510513306, "rewards/GeoVisalEntityMatch2ORM/std": 0.14456725120544434, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 910, "train_speed(iter/s)": 0.02625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.40625, "completions/max_length": 506.0, "completions/mean_length": 460.59375, "completions/min_length": 412.0, "epoch": 0.10915408578959981, "grad_norm": 1.0886366235382945, "kl": 1.0245086252689362, "learning_rate": 9.754812351531974e-07, "loss": 0.001018242328427732, "memory(GiB)": 165.8, "reward": 1.739843726158142, "reward_std": 0.48551273345947266, "rewards/GeoLocAccuracyV2ORM/mean": 0.6041666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4915960431098938, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5315104722976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.20063570141792297, "rewards/MathFormat/mean": 0.6041666865348816, "rewards/MathFormat/std": 0.4915960431098938, "step": 911, "train_speed(iter/s)": 0.026251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 544.0, "completions/mean_length": 463.125, "completions/min_length": 390.0, "epoch": 0.10927390366642703, "grad_norm": 1.0963553539492148, "kl": 0.7772253751754761, "learning_rate": 9.754223945371522e-07, "loss": 0.0007777164573781192, "memory(GiB)": 165.8, "reward": 2.7005603313446045, "reward_std": 0.20156994462013245, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7422269582748413, "rewards/GeoVisalEntityMatch2ORM/std": 0.22006654739379883, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 912, "train_speed(iter/s)": 0.026259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 526.0, "completions/mean_length": 437.32293701171875, "completions/min_length": 379.0, "epoch": 0.10939372154325426, "grad_norm": 1.0817006329359051, "kl": 0.7809997797012329, "learning_rate": 9.753634851811916e-07, "loss": 0.0007812008261680603, "memory(GiB)": 165.8, "reward": 2.343956708908081, "reward_std": 0.10130695253610611, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4017505645751953, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7606233358383179, "rewards/GeoVisalEntityMatch2ORM/std": 0.20091032981872559, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 913, "train_speed(iter/s)": 0.026267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 496.0, "completions/mean_length": 417.21875, "completions/min_length": 337.0, "epoch": 0.10951353942008148, "grad_norm": 1.0306810619869993, "kl": 0.9493636190891266, "learning_rate": 9.75304507093833e-07, "loss": 0.0009512926335446537, "memory(GiB)": 165.8, "reward": 2.2395834922790527, "reward_std": 0.168804332613945, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.71875, "rewards/GeoVisalEntityMatch2ORM/std": 0.14639092981815338, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906975746154785, "step": 914, "train_speed(iter/s)": 0.026268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 418.9270935058594, "completions/min_length": 340.0, "epoch": 0.10963335729690869, "grad_norm": 1.1108618981411988, "kl": 0.7133020162582397, "learning_rate": 9.75245460283604e-07, "loss": 0.0007147615542635322, "memory(GiB)": 165.8, "reward": 2.1674273014068604, "reward_std": 0.11907916516065598, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.40701061487197876, "rewards/GeoVisalEntityMatch2ORM/std": 0.2186434119939804, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 915, "train_speed(iter/s)": 0.026276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 390.9583435058594, "completions/min_length": 328.0, "epoch": 0.10975317517373592, "grad_norm": 1.023612416800288, "kl": 0.7246411740779877, "learning_rate": 9.75186344759042e-07, "loss": 0.0007261186838150024, "memory(GiB)": 165.8, "reward": 2.679947853088379, "reward_std": 0.09077554941177368, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6799479722976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.22412461042404175, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 916, "train_speed(iter/s)": 0.026284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 389.66668701171875, "completions/min_length": 320.0, "epoch": 0.10987299305056314, "grad_norm": 1.1626033441082075, "kl": 0.7617080211639404, "learning_rate": 9.751271605286939e-07, "loss": 0.0007624191930517554, "memory(GiB)": 165.8, "reward": 2.5906248092651367, "reward_std": 0.12231259793043137, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5989583730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.22971747815608978, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 917, "train_speed(iter/s)": 0.026291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 372.34375, "completions/min_length": 310.0, "epoch": 0.10999281092739037, "grad_norm": 1.1330835425273844, "kl": 0.7778538167476654, "learning_rate": 9.750679076011174e-07, "loss": 0.0007804682245478034, "memory(GiB)": 165.8, "reward": 2.69921875, "reward_std": 0.08516998589038849, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.69921875, "rewards/GeoVisalEntityMatch2ORM/std": 0.2166837900876999, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 918, "train_speed(iter/s)": 0.026299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 376.79168701171875, "completions/min_length": 310.0, "epoch": 0.11011262880421759, "grad_norm": 1.3044202426999936, "kl": 0.773552417755127, "learning_rate": 9.750085859848798e-07, "loss": 0.0007749957730993629, "memory(GiB)": 165.8, "reward": 2.558767318725586, "reward_std": 0.24979059398174286, "rewards/GeoLocAccuracyV2ORM/mean": 0.7875000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.3725445866584778, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7712674140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.20510610938072205, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 919, "train_speed(iter/s)": 0.026308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 361.71875, "completions/min_length": 303.0, "epoch": 0.11023244668104482, "grad_norm": 1.2254650004466223, "kl": 0.7289806008338928, "learning_rate": 9.749491956885578e-07, "loss": 0.0007296577095985413, "memory(GiB)": 165.8, "reward": 2.722222328186035, "reward_std": 0.09328976273536682, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7222222685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.20328682661056519, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 920, "train_speed(iter/s)": 0.026315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 359.8645935058594, "completions/min_length": 314.0, "epoch": 0.11035226455787203, "grad_norm": 1.2366236474696715, "kl": 0.7401922345161438, "learning_rate": 9.748897367207389e-07, "loss": 0.0007425025105476379, "memory(GiB)": 165.8, "reward": 2.6478590965270996, "reward_std": 0.13088160753250122, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6478588581085205, "rewards/GeoVisalEntityMatch2ORM/std": 0.18352793157100677, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 921, "train_speed(iter/s)": 0.026323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/mean_length": 363.3125, "completions/min_length": 307.0, "epoch": 0.11047208243469926, "grad_norm": 1.2272090235747009, "kl": 0.8284155130386353, "learning_rate": 9.748302090900199e-07, "loss": 0.0008298307657241821, "memory(GiB)": 165.8, "reward": 2.72383451461792, "reward_std": 0.08856689184904099, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7238343954086304, "rewards/GeoVisalEntityMatch2ORM/std": 0.12100667506456375, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 922, "train_speed(iter/s)": 0.026331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 349.04168701171875, "completions/min_length": 304.0, "epoch": 0.11059190031152648, "grad_norm": 1.358609191308435, "kl": 0.8088208734989166, "learning_rate": 9.747706128050075e-07, "loss": 0.0008108963957056403, "memory(GiB)": 165.8, "reward": 2.600405216217041, "reward_std": 0.11520342528820038, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6004051566123962, "rewards/GeoVisalEntityMatch2ORM/std": 0.12877628207206726, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 923, "train_speed(iter/s)": 0.026339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 377.0, "completions/mean_length": 335.60418701171875, "completions/min_length": 287.0, "epoch": 0.11071171818835371, "grad_norm": 1.2093337328078901, "kl": 0.7876559197902679, "learning_rate": 9.74710947874319e-07, "loss": 0.0007890164852142334, "memory(GiB)": 165.8, "reward": 2.281987190246582, "reward_std": 0.2319883555173874, "rewards/GeoLocAccuracyV2ORM/mean": 0.8520833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3221569359302521, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4611539840698242, "rewards/GeoVisalEntityMatch2ORM/std": 0.15256047248840332, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 924, "train_speed(iter/s)": 0.026337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/mean_length": 351.19793701171875, "completions/min_length": 306.0, "epoch": 0.11083153606518092, "grad_norm": 1.1805443036742154, "kl": 0.8214719593524933, "learning_rate": 9.746512143065808e-07, "loss": 0.0008228669757954776, "memory(GiB)": 165.8, "reward": 2.673635959625244, "reward_std": 0.11965887993574142, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6736359596252441, "rewards/GeoVisalEntityMatch2ORM/std": 0.1414240002632141, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 925, "train_speed(iter/s)": 0.026339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 426.0, "completions/mean_length": 362.85418701171875, "completions/min_length": 311.0, "epoch": 0.11095135394200814, "grad_norm": 1.2378457014285251, "kl": 0.827683687210083, "learning_rate": 9.7459141211043e-07, "loss": 0.000829160213470459, "memory(GiB)": 165.8, "reward": 2.620225667953491, "reward_std": 0.09217537194490433, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6202256679534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.15787674486637115, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 926, "train_speed(iter/s)": 0.026347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/mean_length": 340.40625, "completions/min_length": 281.0, "epoch": 0.11107117181883537, "grad_norm": 1.1623380909762573, "kl": 0.7827064692974091, "learning_rate": 9.745315412945128e-07, "loss": 0.0007844542851671576, "memory(GiB)": 165.8, "reward": 2.6469244956970215, "reward_std": 0.07678216695785522, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.646924614906311, "rewards/GeoVisalEntityMatch2ORM/std": 0.20014646649360657, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 927, "train_speed(iter/s)": 0.026355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 341.28125, "completions/min_length": 261.0, "epoch": 0.11119098969566259, "grad_norm": 1.2867924987406243, "kl": 0.8284416794776917, "learning_rate": 9.744716018674862e-07, "loss": 0.0008306751842610538, "memory(GiB)": 165.8, "reward": 2.467945098876953, "reward_std": 0.16885295510292053, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5012784004211426, "rewards/GeoVisalEntityMatch2ORM/std": 0.23419292271137238, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 928, "train_speed(iter/s)": 0.026363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/mean_length": 347.41668701171875, "completions/min_length": 287.0, "epoch": 0.11131080757248982, "grad_norm": 1.1988328229653542, "kl": 0.7604085505008698, "learning_rate": 9.74411593838016e-07, "loss": 0.0007626278093084693, "memory(GiB)": 165.8, "reward": 2.588095188140869, "reward_std": 0.07721634209156036, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5880952477455139, "rewards/GeoVisalEntityMatch2ORM/std": 0.22852128744125366, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 929, "train_speed(iter/s)": 0.02637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/mean_length": 350.96875, "completions/min_length": 288.0, "epoch": 0.11143062544931703, "grad_norm": 1.4045818459844603, "kl": 0.7962191998958588, "learning_rate": 9.743515172147791e-07, "loss": 0.0007973946630954742, "memory(GiB)": 165.8, "reward": 2.416379451751709, "reward_std": 0.31740880012512207, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.35702842473983765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6372125744819641, "rewards/GeoVisalEntityMatch2ORM/std": 0.1564643383026123, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 930, "train_speed(iter/s)": 0.026379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 351.7708435058594, "completions/min_length": 287.0, "epoch": 0.11155044332614426, "grad_norm": 1.217538716783547, "kl": 0.7620047330856323, "learning_rate": 9.742913720064618e-07, "loss": 0.0007633864879608154, "memory(GiB)": 165.8, "reward": 2.6799769401550293, "reward_std": 0.1212795227766037, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6903935670852661, "rewards/GeoVisalEntityMatch2ORM/std": 0.20487792789936066, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 931, "train_speed(iter/s)": 0.026387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 362.1875, "completions/min_length": 314.0, "epoch": 0.11167026120297148, "grad_norm": 1.2893869276529146, "kl": 0.7596261501312256, "learning_rate": 9.7423115822176e-07, "loss": 0.0007617573137395084, "memory(GiB)": 165.8, "reward": 2.636359214782715, "reward_std": 0.14850622415542603, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6530258655548096, "rewards/GeoVisalEntityMatch2ORM/std": 0.17926745116710663, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 932, "train_speed(iter/s)": 0.026395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 411.0, "completions/mean_length": 351.53125, "completions/min_length": 293.0, "epoch": 0.11179007907979871, "grad_norm": 1.1764542037894614, "kl": 0.7516121566295624, "learning_rate": 9.741708758693803e-07, "loss": 0.0007526924600824714, "memory(GiB)": 165.8, "reward": 2.476651668548584, "reward_std": 0.2642102837562561, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5391515493392944, "rewards/GeoVisalEntityMatch2ORM/std": 0.23332038521766663, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 933, "train_speed(iter/s)": 0.026394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 377.9895935058594, "completions/min_length": 321.0, "epoch": 0.11190989695662593, "grad_norm": 1.133903984094236, "kl": 0.7788310945034027, "learning_rate": 9.74110524958038e-07, "loss": 0.0007805514032952487, "memory(GiB)": 165.8, "reward": 2.4022693634033203, "reward_std": 0.12694543600082397, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5772693753242493, "rewards/GeoVisalEntityMatch2ORM/std": 0.11424355208873749, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 934, "train_speed(iter/s)": 0.026402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 359.71875, "completions/min_length": 270.0, "epoch": 0.11202971483345316, "grad_norm": 1.059583951997451, "kl": 0.7588721513748169, "learning_rate": 9.7405010549646e-07, "loss": 0.0007598648662678897, "memory(GiB)": 165.8, "reward": 2.645688772201538, "reward_std": 0.17651347815990448, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6665219664573669, "rewards/GeoVisalEntityMatch2ORM/std": 0.2632494270801544, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 935, "train_speed(iter/s)": 0.026409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 366.8333435058594, "completions/min_length": 303.0, "epoch": 0.11214953271028037, "grad_norm": 1.1756860175527675, "kl": 0.7509470582008362, "learning_rate": 9.739896174933815e-07, "loss": 0.0007532885065302253, "memory(GiB)": 165.8, "reward": 2.672222137451172, "reward_std": 0.15514469146728516, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087526738643646, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7138888835906982, "rewards/GeoVisalEntityMatch2ORM/std": 0.2781275510787964, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 936, "train_speed(iter/s)": 0.026417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 372.46875, "completions/min_length": 318.0, "epoch": 0.1122693505871076, "grad_norm": 1.1462571797803602, "kl": 0.7897224128246307, "learning_rate": 9.739290609575486e-07, "loss": 0.0007915347814559937, "memory(GiB)": 165.8, "reward": 2.5342249870300293, "reward_std": 0.17229673266410828, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.609224796295166, "rewards/GeoVisalEntityMatch2ORM/std": 0.15426421165466309, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 937, "train_speed(iter/s)": 0.026421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 377.16668701171875, "completions/min_length": 322.0, "epoch": 0.11238916846393482, "grad_norm": 1.16474836158287, "kl": 0.788381814956665, "learning_rate": 9.738684358977167e-07, "loss": 0.0007901291246525943, "memory(GiB)": 165.8, "reward": 2.5367188453674316, "reward_std": 0.09842167794704437, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5367187261581421, "rewards/GeoVisalEntityMatch2ORM/std": 0.1452341377735138, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 938, "train_speed(iter/s)": 0.026429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 436.0, "completions/mean_length": 373.1458435058594, "completions/min_length": 319.0, "epoch": 0.11250898634076203, "grad_norm": 1.1439634615701382, "kl": 0.7858052551746368, "learning_rate": 9.738077423226518e-07, "loss": 0.0007866075029596686, "memory(GiB)": 165.8, "reward": 2.424452781677246, "reward_std": 0.26284927129745483, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336149215698242, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5286195278167725, "rewards/GeoVisalEntityMatch2ORM/std": 0.13231277465820312, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 939, "train_speed(iter/s)": 0.026429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 367.7395935058594, "completions/min_length": 326.0, "epoch": 0.11262880421758927, "grad_norm": 1.1912924141375065, "kl": 0.782940000295639, "learning_rate": 9.73746980241129e-07, "loss": 0.0007849261164665222, "memory(GiB)": 165.8, "reward": 2.538789749145508, "reward_std": 0.17468662559986115, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.588789701461792, "rewards/GeoVisalEntityMatch2ORM/std": 0.21718256175518036, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 940, "train_speed(iter/s)": 0.026437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 373.9375, "completions/min_length": 293.0, "epoch": 0.11274862209441648, "grad_norm": 1.1860566067311729, "kl": 0.8113332986831665, "learning_rate": 9.73686149661934e-07, "loss": 0.0008130831411108375, "memory(GiB)": 165.8, "reward": 2.3365988731384277, "reward_std": 0.13136844336986542, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4225029945373535, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5657652616500854, "rewards/GeoVisalEntityMatch2ORM/std": 0.21657444536685944, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 941, "train_speed(iter/s)": 0.026445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 376.1458435058594, "completions/min_length": 327.0, "epoch": 0.11286843997124371, "grad_norm": 1.1032322878341863, "kl": 0.7824528217315674, "learning_rate": 9.736252505938618e-07, "loss": 0.0007818043231964111, "memory(GiB)": 165.8, "reward": 2.41137957572937, "reward_std": 0.08312217146158218, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4113794267177582, "rewards/GeoVisalEntityMatch2ORM/std": 0.23803052306175232, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 942, "train_speed(iter/s)": 0.026453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 363.4270935058594, "completions/min_length": 249.0, "epoch": 0.11298825784807093, "grad_norm": 1.0411686861065996, "kl": 0.7559645473957062, "learning_rate": 9.735642830457178e-07, "loss": 0.0007565608248114586, "memory(GiB)": 165.8, "reward": 2.6916542053222656, "reward_std": 0.05556107312440872, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6916543245315552, "rewards/GeoVisalEntityMatch2ORM/std": 0.20745670795440674, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 943, "train_speed(iter/s)": 0.026455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 371.84375, "completions/min_length": 303.0, "epoch": 0.11310807572489816, "grad_norm": 0.9619041069726659, "kl": 0.7298468053340912, "learning_rate": 9.735032470263174e-07, "loss": 0.0007326907361857593, "memory(GiB)": 165.8, "reward": 2.548032522201538, "reward_std": 0.18451079726219177, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2447250634431839, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6209490895271301, "rewards/GeoVisalEntityMatch2ORM/std": 0.12409378588199615, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 944, "train_speed(iter/s)": 0.026463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 418.0, "completions/mean_length": 374.5833435058594, "completions/min_length": 297.0, "epoch": 0.11322789360172537, "grad_norm": 1.243576780735888, "kl": 0.8987683355808258, "learning_rate": 9.734421425444851e-07, "loss": 0.0008974100346677005, "memory(GiB)": 165.8, "reward": 2.2805285453796387, "reward_std": 0.31705060601234436, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904594898224, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5096951723098755, "rewards/GeoVisalEntityMatch2ORM/std": 0.20682232081890106, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904594898224, "step": 945, "train_speed(iter/s)": 0.026462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 366.9375, "completions/min_length": 312.0, "epoch": 0.1133477114785526, "grad_norm": 1.1316364159580352, "kl": 0.7617400288581848, "learning_rate": 9.73380969609056e-07, "loss": 0.0007615834474563599, "memory(GiB)": 165.8, "reward": 2.672111988067627, "reward_std": 0.07943788915872574, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6721117496490479, "rewards/GeoVisalEntityMatch2ORM/std": 0.10119552165269852, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 946, "train_speed(iter/s)": 0.02647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 375.85418701171875, "completions/min_length": 331.0, "epoch": 0.11346752935537982, "grad_norm": 1.1704895515539353, "kl": 0.7545706331729889, "learning_rate": 9.733197282288751e-07, "loss": 0.0007558676297776401, "memory(GiB)": 165.8, "reward": 2.618354320526123, "reward_std": 0.23141463100910187, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566416442394257, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7016876935958862, "rewards/GeoVisalEntityMatch2ORM/std": 0.15524700284004211, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 947, "train_speed(iter/s)": 0.026471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 372.6875, "completions/min_length": 308.0, "epoch": 0.11358734723220705, "grad_norm": 1.1444252260605177, "kl": 0.7256012260913849, "learning_rate": 9.732584184127973e-07, "loss": 0.0007285898318514228, "memory(GiB)": 165.8, "reward": 2.530245542526245, "reward_std": 0.07313768565654755, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5302455425262451, "rewards/GeoVisalEntityMatch2ORM/std": 0.21608825027942657, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 948, "train_speed(iter/s)": 0.026479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 356.19793701171875, "completions/min_length": 287.0, "epoch": 0.11370716510903427, "grad_norm": 1.052180575994504, "kl": 0.7262965440750122, "learning_rate": 9.731970401696864e-07, "loss": 0.0007270922651514411, "memory(GiB)": 165.8, "reward": 2.7213542461395264, "reward_std": 0.07638498395681381, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7213541865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.19339536130428314, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 949, "train_speed(iter/s)": 0.026487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 363.6875, "completions/min_length": 278.0, "epoch": 0.1138269829858615, "grad_norm": 1.139540339918258, "kl": 0.7636532783508301, "learning_rate": 9.731355935084176e-07, "loss": 0.0007655620574951172, "memory(GiB)": 165.8, "reward": 2.668712615966797, "reward_std": 0.06990477442741394, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6687127947807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.20235298573970795, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 950, "train_speed(iter/s)": 0.026495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 431.0, "completions/mean_length": 368.4583435058594, "completions/min_length": 307.0, "epoch": 0.11394680086268871, "grad_norm": 1.175324706584208, "kl": 0.9542794525623322, "learning_rate": 9.730740784378752e-07, "loss": 0.0009465900948271155, "memory(GiB)": 165.8, "reward": 2.4271512031555176, "reward_std": 0.28869202733039856, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336147725582123, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5313176512718201, "rewards/GeoVisalEntityMatch2ORM/std": 0.1898527294397354, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336147725582123, "step": 951, "train_speed(iter/s)": 0.026493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 382.82293701171875, "completions/min_length": 297.0, "epoch": 0.11406661873951593, "grad_norm": 1.1621701465768124, "kl": 0.7658447027206421, "learning_rate": 9.730124949669532e-07, "loss": 0.0007679437985643744, "memory(GiB)": 165.8, "reward": 2.827723979949951, "reward_std": 0.08785122632980347, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8277239799499512, "rewards/GeoVisalEntityMatch2ORM/std": 0.11717819422483444, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 952, "train_speed(iter/s)": 0.026501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 379.63543701171875, "completions/min_length": 302.0, "epoch": 0.11418643661634316, "grad_norm": 1.2404680323670707, "kl": 0.7403070032596588, "learning_rate": 9.72950843104556e-07, "loss": 0.0007422169437631965, "memory(GiB)": 165.8, "reward": 2.574045181274414, "reward_std": 0.10780268907546997, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5740451216697693, "rewards/GeoVisalEntityMatch2ORM/std": 0.18816933035850525, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 953, "train_speed(iter/s)": 0.026502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 420.79168701171875, "completions/min_length": 355.0, "epoch": 0.11430625449317038, "grad_norm": 1.0792790840306645, "kl": 0.7402018308639526, "learning_rate": 9.728891228595974e-07, "loss": 0.0007415587897412479, "memory(GiB)": 165.8, "reward": 2.5444445610046387, "reward_std": 0.1305011808872223, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7361111640930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.1261773705482483, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 954, "train_speed(iter/s)": 0.026509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 448.0, "completions/mean_length": 389.125, "completions/min_length": 246.0, "epoch": 0.1144260723699976, "grad_norm": 1.3852270339944643, "kl": 1.0195600092411041, "learning_rate": 9.72827334241002e-07, "loss": 0.0010109494905918837, "memory(GiB)": 165.8, "reward": 2.60546875, "reward_std": 0.30552536249160767, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136448979377747, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7513021230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.1786045879125595, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 955, "train_speed(iter/s)": 0.02651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 398.13543701171875, "completions/min_length": 336.0, "epoch": 0.11454589024682482, "grad_norm": 1.0487876950641155, "kl": 0.7562524676322937, "learning_rate": 9.727654772577028e-07, "loss": 0.0007578085060231388, "memory(GiB)": 165.8, "reward": 2.767824172973633, "reward_std": 0.06441160291433334, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7678240537643433, "rewards/GeoVisalEntityMatch2ORM/std": 0.10052717477083206, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 956, "train_speed(iter/s)": 0.026518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 389.6458435058594, "completions/min_length": 330.0, "epoch": 0.11466570812365205, "grad_norm": 1.2176384252049062, "kl": 0.7413187026977539, "learning_rate": 9.72703551918644e-07, "loss": 0.0007437020540237427, "memory(GiB)": 165.8, "reward": 2.5362350940704346, "reward_std": 0.20383107662200928, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5862351655960083, "rewards/GeoVisalEntityMatch2ORM/std": 0.23784063756465912, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 957, "train_speed(iter/s)": 0.026519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 418.625, "completions/min_length": 368.0, "epoch": 0.11478552600047927, "grad_norm": 1.2362387461341255, "kl": 0.7897710800170898, "learning_rate": 9.726415582327788e-07, "loss": 0.0007910678978078067, "memory(GiB)": 165.8, "reward": 2.5871529579162598, "reward_std": 0.08802234381437302, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5871528387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.21144835650920868, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 958, "train_speed(iter/s)": 0.026526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 427.5625, "completions/min_length": 349.0, "epoch": 0.1149053438773065, "grad_norm": 1.067759253591194, "kl": 0.6683136820793152, "learning_rate": 9.725794962090714e-07, "loss": 0.0006707447464577854, "memory(GiB)": 165.8, "reward": 2.6208438873291016, "reward_std": 0.10532141476869583, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6208438873291016, "rewards/GeoVisalEntityMatch2ORM/std": 0.15193411707878113, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 959, "train_speed(iter/s)": 0.026537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 523.0, "completions/mean_length": 429.9270935058594, "completions/min_length": 321.0, "epoch": 0.11502516175413371, "grad_norm": 1.1219206629111136, "kl": 0.7557952404022217, "learning_rate": 9.725173658564945e-07, "loss": 0.000757294415961951, "memory(GiB)": 165.8, "reward": 2.468686819076538, "reward_std": 0.08319500088691711, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6686868667602539, "rewards/GeoVisalEntityMatch2ORM/std": 0.12166157364845276, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 960, "train_speed(iter/s)": 0.026543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.0, "completions/mean_length": 453.47918701171875, "completions/min_length": 393.0, "epoch": 0.11514497963096094, "grad_norm": 0.8361517409527165, "kl": 0.7437337934970856, "learning_rate": 9.724551671840318e-07, "loss": 0.0007458453765138984, "memory(GiB)": 165.8, "reward": 2.7440972328186035, "reward_std": 0.045134611427783966, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7440972328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.18350745737552643, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 961, "train_speed(iter/s)": 0.026553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 526.0, "completions/mean_length": 454.57293701171875, "completions/min_length": 339.0, "epoch": 0.11526479750778816, "grad_norm": 1.1436773627450671, "kl": 0.7015519738197327, "learning_rate": 9.72392900200676e-07, "loss": 0.0007017056341283023, "memory(GiB)": 165.8, "reward": 2.651475667953491, "reward_std": 0.3617924451828003, "rewards/GeoLocAccuracyV2ORM/mean": 0.9375, "rewards/GeoLocAccuracyV2ORM/std": 0.2433321326971054, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7764756679534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.18427982926368713, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 962, "train_speed(iter/s)": 0.02656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 553.0, "completions/mean_length": 461.1875, "completions/min_length": 364.0, "epoch": 0.11538461538461539, "grad_norm": 1.052853734130413, "kl": 0.7456941306591034, "learning_rate": 9.723305649154303e-07, "loss": 0.0007463246583938599, "memory(GiB)": 165.8, "reward": 2.5843255519866943, "reward_std": 0.18247735500335693, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.605158805847168, "rewards/GeoVisalEntityMatch2ORM/std": 0.130597785115242, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 963, "train_speed(iter/s)": 0.026566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 580.0, "completions/mean_length": 498.8958435058594, "completions/min_length": 424.0, "epoch": 0.11550443326144261, "grad_norm": 1.0113703021871954, "kl": 0.8349959552288055, "learning_rate": 9.722681613373078e-07, "loss": 0.000831661163829267, "memory(GiB)": 165.8, "reward": 2.4160056114196777, "reward_std": 0.48733243346214294, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7076719999313354, "rewards/GeoVisalEntityMatch2ORM/std": 0.09474367648363113, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 964, "train_speed(iter/s)": 0.026573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3333333333333333, "completions/max_length": 506.0, "completions/mean_length": 451.01043701171875, "completions/min_length": 391.0, "epoch": 0.11562425113826982, "grad_norm": 1.1183598432493562, "kl": 0.8221509456634521, "learning_rate": 9.722056894753307e-07, "loss": 0.0008197613060474396, "memory(GiB)": 165.8, "reward": 2.02993106842041, "reward_std": 0.3964817523956299, "rewards/GeoLocAccuracyV2ORM/mean": 0.6770833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4700457453727722, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6757644414901733, "rewards/GeoVisalEntityMatch2ORM/std": 0.24263130128383636, "rewards/MathFormat/mean": 0.6770833730697632, "rewards/MathFormat/std": 0.4700457453727722, "step": 965, "train_speed(iter/s)": 0.026572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 520.0, "completions/mean_length": 437.82293701171875, "completions/min_length": 368.0, "epoch": 0.11574406901509705, "grad_norm": 0.9932399193940781, "kl": 0.7682936787605286, "learning_rate": 9.72143149338532e-07, "loss": 0.0007685075397603214, "memory(GiB)": 165.8, "reward": 2.245833396911621, "reward_std": 0.08812893182039261, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7458333969116211, "rewards/GeoVisalEntityMatch2ORM/std": 0.1920597106218338, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 966, "train_speed(iter/s)": 0.02657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 456.04168701171875, "completions/min_length": 394.0, "epoch": 0.11586388689192427, "grad_norm": 1.048282483601817, "kl": 0.7205431163311005, "learning_rate": 9.720805409359546e-07, "loss": 0.0007205232977867126, "memory(GiB)": 165.8, "reward": 2.658482313156128, "reward_std": 0.16219618916511536, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.679315447807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.13277611136436462, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 967, "train_speed(iter/s)": 0.026577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 522.0, "completions/mean_length": 447.76043701171875, "completions/min_length": 373.0, "epoch": 0.1159837047687515, "grad_norm": 0.9348212944096246, "kl": 0.7457632124423981, "learning_rate": 9.720178642766498e-07, "loss": 0.0007460961933247745, "memory(GiB)": 165.8, "reward": 2.5700645446777344, "reward_std": 0.13560016453266144, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.590897798538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.2662230432033539, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 968, "train_speed(iter/s)": 0.026584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 504.0, "completions/mean_length": 435.7708435058594, "completions/min_length": 379.0, "epoch": 0.11610352264557872, "grad_norm": 1.212757614491668, "kl": 0.9675587117671967, "learning_rate": 9.719551193696809e-07, "loss": 0.0009674355387687683, "memory(GiB)": 165.8, "reward": 2.241406202316284, "reward_std": 0.2301831692457199, "rewards/GeoLocAccuracyV2ORM/mean": 0.78125, "rewards/GeoLocAccuracyV2ORM/std": 0.4155687391757965, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6789063215255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.15883325040340424, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 969, "train_speed(iter/s)": 0.026585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 447.16668701171875, "completions/min_length": 382.0, "epoch": 0.11622334052240595, "grad_norm": 1.3125011815070344, "kl": 0.7368811964988708, "learning_rate": 9.718923062241194e-07, "loss": 0.0007401282782666385, "memory(GiB)": 165.8, "reward": 2.683246612548828, "reward_std": 0.08552722632884979, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6832465529441833, "rewards/GeoVisalEntityMatch2ORM/std": 0.09873095154762268, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 970, "train_speed(iter/s)": 0.026595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 399.85418701171875, "completions/min_length": 325.0, "epoch": 0.11634315839923316, "grad_norm": 1.151541527974493, "kl": 0.72027388215065, "learning_rate": 9.718294248490474e-07, "loss": 0.0007220854749903083, "memory(GiB)": 165.8, "reward": 2.736624240875244, "reward_std": 0.11633770912885666, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.744957447052002, "rewards/GeoVisalEntityMatch2ORM/std": 0.16057009994983673, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 971, "train_speed(iter/s)": 0.026602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 483.0, "completions/mean_length": 424.2395935058594, "completions/min_length": 377.0, "epoch": 0.11646297627606039, "grad_norm": 1.1148263678555697, "kl": 0.759352445602417, "learning_rate": 9.717664752535566e-07, "loss": 0.000761261850129813, "memory(GiB)": 165.8, "reward": 2.8147971630096436, "reward_std": 0.1578303575515747, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8356303572654724, "rewards/GeoVisalEntityMatch2ORM/std": 0.19282162189483643, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 972, "train_speed(iter/s)": 0.026608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 408.1458435058594, "completions/min_length": 342.0, "epoch": 0.11658279415288761, "grad_norm": 1.076482922948438, "kl": 0.7577323615550995, "learning_rate": 9.717034574467488e-07, "loss": 0.0007601107354275882, "memory(GiB)": 165.8, "reward": 2.6004340648651123, "reward_std": 0.09590113162994385, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6004340648651123, "rewards/GeoVisalEntityMatch2ORM/std": 0.19163183867931366, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 973, "train_speed(iter/s)": 0.026615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 360.8020935058594, "completions/min_length": 269.0, "epoch": 0.11670261202971484, "grad_norm": 1.0436536420110447, "kl": 0.7753794193267822, "learning_rate": 9.716403714377358e-07, "loss": 0.0007783907349221408, "memory(GiB)": 165.8, "reward": 2.7555556297302246, "reward_std": 0.06391198933124542, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7555555105209351, "rewards/GeoVisalEntityMatch2ORM/std": 0.11006155610084534, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 974, "train_speed(iter/s)": 0.026622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 391.40625, "completions/min_length": 339.0, "epoch": 0.11682242990654206, "grad_norm": 1.130868886053697, "kl": 0.8012729287147522, "learning_rate": 9.715772172356386e-07, "loss": 0.000802094757091254, "memory(GiB)": 165.8, "reward": 2.625793695449829, "reward_std": 0.11619533598423004, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6341270208358765, "rewards/GeoVisalEntityMatch2ORM/std": 0.14161138236522675, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 975, "train_speed(iter/s)": 0.026629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 373.875, "completions/min_length": 309.0, "epoch": 0.11694224778336929, "grad_norm": 1.2065871898575624, "kl": 0.7803145349025726, "learning_rate": 9.71513994849589e-07, "loss": 0.0007812852854840457, "memory(GiB)": 165.8, "reward": 2.528385639190674, "reward_std": 0.25511109828948975, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6533854007720947, "rewards/GeoVisalEntityMatch2ORM/std": 0.20877525210380554, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 976, "train_speed(iter/s)": 0.026637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 368.63543701171875, "completions/min_length": 314.0, "epoch": 0.1170620656601965, "grad_norm": 1.243264046702627, "kl": 0.7953060567378998, "learning_rate": 9.714507042887275e-07, "loss": 0.0007954041357152164, "memory(GiB)": 165.8, "reward": 2.4631075859069824, "reward_std": 0.10955817252397537, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.44073307514190674, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7297743558883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.17728395760059357, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 977, "train_speed(iter/s)": 0.026644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 364.10418701171875, "completions/min_length": 300.0, "epoch": 0.11718188353702372, "grad_norm": 1.2663888557376635, "kl": 0.7957361042499542, "learning_rate": 9.713873455622055e-07, "loss": 0.0007974083418957889, "memory(GiB)": 165.8, "reward": 2.653831958770752, "reward_std": 0.0898163765668869, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6538318395614624, "rewards/GeoVisalEntityMatch2ORM/std": 0.12800084054470062, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 978, "train_speed(iter/s)": 0.026651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 378.21875, "completions/min_length": 322.0, "epoch": 0.11730170141385095, "grad_norm": 1.14795944546457, "kl": 0.7901874482631683, "learning_rate": 9.71323918679184e-07, "loss": 0.0007933179731480777, "memory(GiB)": 165.8, "reward": 2.626124382019043, "reward_std": 0.1709369570016861, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6927911043167114, "rewards/GeoVisalEntityMatch2ORM/std": 0.118832528591156, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 979, "train_speed(iter/s)": 0.026657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 375.9375, "completions/min_length": 328.0, "epoch": 0.11742151929067816, "grad_norm": 1.1784250399681815, "kl": 0.8424962759017944, "learning_rate": 9.712604236488332e-07, "loss": 0.000844443857204169, "memory(GiB)": 165.8, "reward": 2.5216147899627686, "reward_std": 0.08745279163122177, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5216146111488342, "rewards/GeoVisalEntityMatch2ORM/std": 0.19904589653015137, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 980, "train_speed(iter/s)": 0.026665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 402.0, "completions/mean_length": 367.32293701171875, "completions/min_length": 329.0, "epoch": 0.1175413371675054, "grad_norm": 1.1584412129344703, "kl": 0.7993293702602386, "learning_rate": 9.71196860480334e-07, "loss": 0.0007997130742296576, "memory(GiB)": 165.8, "reward": 2.402116537094116, "reward_std": 0.07109986990690231, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6521164178848267, "rewards/GeoVisalEntityMatch2ORM/std": 0.2059124857187271, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 981, "train_speed(iter/s)": 0.026672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 381.375, "completions/min_length": 325.0, "epoch": 0.11766115504433261, "grad_norm": 1.295300657210492, "kl": 0.7652766704559326, "learning_rate": 9.711332291828768e-07, "loss": 0.0007658253307454288, "memory(GiB)": 165.8, "reward": 2.664930582046509, "reward_std": 0.16590942442417145, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6982638835906982, "rewards/GeoVisalEntityMatch2ORM/std": 0.21734009683132172, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 982, "train_speed(iter/s)": 0.026679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 375.10418701171875, "completions/min_length": 317.0, "epoch": 0.11778097292115984, "grad_norm": 1.2406063004701327, "kl": 0.7655704021453857, "learning_rate": 9.710695297656619e-07, "loss": 0.0007663841242901981, "memory(GiB)": 165.8, "reward": 2.7383432388305664, "reward_std": 0.11539363861083984, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7383432388305664, "rewards/GeoVisalEntityMatch2ORM/std": 0.19193348288536072, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 983, "train_speed(iter/s)": 0.026689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 369.85418701171875, "completions/min_length": 321.0, "epoch": 0.11790079079798706, "grad_norm": 1.3051402817901923, "kl": 0.7734074592590332, "learning_rate": 9.710057622378992e-07, "loss": 0.0007743475725874305, "memory(GiB)": 165.8, "reward": 2.6242189407348633, "reward_std": 0.0916125625371933, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.624218761920929, "rewards/GeoVisalEntityMatch2ORM/std": 0.24726007878780365, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 984, "train_speed(iter/s)": 0.026694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 393.90625, "completions/min_length": 338.0, "epoch": 0.11802060867481429, "grad_norm": 1.2840454434894508, "kl": 0.7613667249679565, "learning_rate": 9.709419266088087e-07, "loss": 0.0007622664561495185, "memory(GiB)": 165.8, "reward": 2.338773250579834, "reward_std": 0.1066717803478241, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5887731909751892, "rewards/GeoVisalEntityMatch2ORM/std": 0.23730944097042084, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 985, "train_speed(iter/s)": 0.026701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 406.0, "completions/min_length": 347.0, "epoch": 0.1181404265516415, "grad_norm": 1.2009739611363164, "kl": 0.7972490787506104, "learning_rate": 9.708780228876204e-07, "loss": 0.0007990151643753052, "memory(GiB)": 165.8, "reward": 2.4580979347229004, "reward_std": 0.20647850632667542, "rewards/GeoLocAccuracyV2ORM/mean": 0.9312500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.24764683842658997, "rewards/GeoVisalEntityMatch2ORM/mean": 0.526847779750824, "rewards/GeoVisalEntityMatch2ORM/std": 0.1348404437303543, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 986, "train_speed(iter/s)": 0.026708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3020833333333333, "completions/max_length": 423.0, "completions/mean_length": 382.9895935058594, "completions/min_length": 352.0, "epoch": 0.11826024442846873, "grad_norm": 1.338834577289264, "kl": 1.1059983968734741, "learning_rate": 9.708140510835737e-07, "loss": 0.00109660136513412, "memory(GiB)": 165.8, "reward": 2.2517993450164795, "reward_std": 0.3552577495574951, "rewards/GeoLocAccuracyV2ORM/mean": 0.7083333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.45691564679145813, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8351325988769531, "rewards/GeoVisalEntityMatch2ORM/std": 0.10545194894075394, "rewards/MathFormat/mean": 0.7083333730697632, "rewards/MathFormat/std": 0.45691564679145813, "step": 987, "train_speed(iter/s)": 0.026705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 416.07293701171875, "completions/min_length": 371.0, "epoch": 0.11838006230529595, "grad_norm": 0.9383463076734343, "kl": 0.71836718916893, "learning_rate": 9.707500112059183e-07, "loss": 0.0007192244520410895, "memory(GiB)": 165.8, "reward": 2.227790355682373, "reward_std": 0.1624167114496231, "rewards/GeoLocAccuracyV2ORM/mean": 0.6333333253860474, "rewards/GeoLocAccuracyV2ORM/std": 0.4594428241252899, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5944568514823914, "rewards/GeoVisalEntityMatch2ORM/std": 0.12528471648693085, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 988, "train_speed(iter/s)": 0.026713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 424.66668701171875, "completions/min_length": 364.0, "epoch": 0.11849988018212318, "grad_norm": 1.0811759244526402, "kl": 0.7275669872760773, "learning_rate": 9.706859032639133e-07, "loss": 0.0007287835469469428, "memory(GiB)": 165.8, "reward": 2.75390625, "reward_std": 0.0752425268292427, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.75390625, "rewards/GeoVisalEntityMatch2ORM/std": 0.1376173198223114, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 989, "train_speed(iter/s)": 0.026722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 412.7083435058594, "completions/min_length": 331.0, "epoch": 0.1186196980589504, "grad_norm": 1.1742817110465034, "kl": 0.6844373345375061, "learning_rate": 9.706217272668283e-07, "loss": 0.0006851417711004615, "memory(GiB)": 165.8, "reward": 2.573611259460449, "reward_std": 0.25626862049102783, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7152778506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.1832810491323471, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 990, "train_speed(iter/s)": 0.026729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 482.0, "completions/mean_length": 402.53125, "completions/min_length": 361.0, "epoch": 0.11873951593577761, "grad_norm": 1.4799718623669742, "kl": 2.000022053718567, "learning_rate": 9.705574832239419e-07, "loss": 0.002000862266868353, "memory(GiB)": 165.8, "reward": 2.1459078788757324, "reward_std": 0.09949764609336853, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6459077596664429, "rewards/GeoVisalEntityMatch2ORM/std": 0.20637629926204681, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 991, "train_speed(iter/s)": 0.026728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 434.40625, "completions/min_length": 371.0, "epoch": 0.11885933381260484, "grad_norm": 1.1900225265197135, "kl": 0.7212047576904297, "learning_rate": 9.704931711445432e-07, "loss": 0.0007223188877105713, "memory(GiB)": 165.8, "reward": 2.6424412727355957, "reward_std": 0.07436162978410721, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6424412727355957, "rewards/GeoVisalEntityMatch2ORM/std": 0.1453382521867752, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 992, "train_speed(iter/s)": 0.026738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 430.84375, "completions/min_length": 379.0, "epoch": 0.11897915168943206, "grad_norm": 1.139001668746458, "kl": 0.711368203163147, "learning_rate": 9.704287910379308e-07, "loss": 0.0007135483319871128, "memory(GiB)": 165.8, "reward": 2.624586820602417, "reward_std": 0.1008414775133133, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6245867013931274, "rewards/GeoVisalEntityMatch2ORM/std": 0.12223454564809799, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 993, "train_speed(iter/s)": 0.026744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 490.0, "completions/mean_length": 430.90625, "completions/min_length": 375.0, "epoch": 0.11909896956625929, "grad_norm": 1.151219323065272, "kl": 1.2430919706821442, "learning_rate": 9.703643429134131e-07, "loss": 0.0012386541347950697, "memory(GiB)": 165.8, "reward": 2.1001737117767334, "reward_std": 0.3360211253166199, "rewards/GeoLocAccuracyV2ORM/mean": 0.6375000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.445149302482605, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6501736044883728, "rewards/GeoVisalEntityMatch2ORM/std": 0.17928946018218994, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236128330230713, "step": 994, "train_speed(iter/s)": 0.026744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 506.0, "completions/mean_length": 454.8020935058594, "completions/min_length": 361.0, "epoch": 0.1192187874430865, "grad_norm": 1.097747419883216, "kl": 0.8335250616073608, "learning_rate": 9.702998267803086e-07, "loss": 0.0008275186410173774, "memory(GiB)": 165.8, "reward": 2.616459846496582, "reward_std": 0.3923349678516388, "rewards/GeoLocAccuracyV2ORM/mean": 0.9291666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.2546066641807556, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7497934103012085, "rewards/GeoVisalEntityMatch2ORM/std": 0.15763653814792633, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 995, "train_speed(iter/s)": 0.026751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 454.3020935058594, "completions/min_length": 413.0, "epoch": 0.11933860531991373, "grad_norm": 1.0307249658305513, "kl": 0.6867500841617584, "learning_rate": 9.702352426479457e-07, "loss": 0.0006867647171020508, "memory(GiB)": 165.8, "reward": 2.7223689556121826, "reward_std": 0.05387680605053902, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7223688364028931, "rewards/GeoVisalEntityMatch2ORM/std": 0.19786949455738068, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 996, "train_speed(iter/s)": 0.026758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 454.9895935058594, "completions/min_length": 402.0, "epoch": 0.11945842319674095, "grad_norm": 1.1484577733839716, "kl": 0.7128037214279175, "learning_rate": 9.70170590525662e-07, "loss": 0.0007122507086023688, "memory(GiB)": 165.8, "reward": 2.6482062339782715, "reward_std": 0.08818875253200531, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6482060551643372, "rewards/GeoVisalEntityMatch2ORM/std": 0.22544951736927032, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 997, "train_speed(iter/s)": 0.026764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 448.40625, "completions/min_length": 387.0, "epoch": 0.11957824107356818, "grad_norm": 1.1306364411442518, "kl": 0.6913319826126099, "learning_rate": 9.701058704228055e-07, "loss": 0.0006917293067090213, "memory(GiB)": 165.8, "reward": 2.201967716217041, "reward_std": 0.19420330226421356, "rewards/GeoLocAccuracyV2ORM/mean": 0.625, "rewards/GeoLocAccuracyV2ORM/std": 0.45997709035873413, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5769676566123962, "rewards/GeoVisalEntityMatch2ORM/std": 0.1547115445137024, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 998, "train_speed(iter/s)": 0.026771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/mean_length": 435.8958435058594, "completions/min_length": 385.0, "epoch": 0.1196980589503954, "grad_norm": 1.1081771086063161, "kl": 0.6871024072170258, "learning_rate": 9.700410823487342e-07, "loss": 0.0006884585018269718, "memory(GiB)": 165.8, "reward": 2.1983509063720703, "reward_std": 0.14847859740257263, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.44672298431396484, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4691840410232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.193010613322258, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 999, "train_speed(iter/s)": 0.026778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 451.9583435058594, "completions/min_length": 411.0, "epoch": 0.11981787682722263, "grad_norm": 1.1183832003309822, "kl": 0.7335068583488464, "learning_rate": 9.699762263128154e-07, "loss": 0.0007346744532696903, "memory(GiB)": 165.8, "reward": 2.534848690032959, "reward_std": 0.10210149735212326, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5348485112190247, "rewards/GeoVisalEntityMatch2ORM/std": 0.12347117066383362, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1000, "train_speed(iter/s)": 0.026784 }, { "epoch": 0.11981787682722263, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.04675099206349205, "eval_completions/max_length": 501.4166666666667, "eval_completions/mean_length": 448.299736567906, "eval_completions/min_length": 398.2559523809524, "eval_kl": 0.8050945617613339, "eval_loss": 0.0008123678271658719, "eval_reward": 2.4923818959366706, "eval_reward_std": 0.18279742163472942, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.8711061603167937, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.16503831881674982, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.6665385872835204, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.14555303339979478, "eval_rewards/MathFormat/mean": 0.9547371081564398, "eval_rewards/MathFormat/std": 0.0733878980612471, "eval_runtime": 1799.5167, "eval_samples_per_second": 0.187, "eval_steps_per_second": 0.004, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 524.0, "completions/mean_length": 446.35418701171875, "completions/min_length": 386.0, "epoch": 0.11993769470404984, "grad_norm": 1.0952666380481928, "kl": 0.6999925374984741, "learning_rate": 9.699113023244263e-07, "loss": 0.0007024730439297855, "memory(GiB)": 165.8, "reward": 2.644704818725586, "reward_std": 0.14076337218284607, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6863715052604675, "rewards/GeoVisalEntityMatch2ORM/std": 0.20954252779483795, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1001, "train_speed(iter/s)": 0.025528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 455.71875, "completions/min_length": 394.0, "epoch": 0.12005751258087707, "grad_norm": 0.9941343096286119, "kl": 0.711719810962677, "learning_rate": 9.698463103929541e-07, "loss": 0.0007138513028621674, "memory(GiB)": 165.8, "reward": 2.774576187133789, "reward_std": 0.057550907135009766, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7745761275291443, "rewards/GeoVisalEntityMatch2ORM/std": 0.2180618792772293, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1002, "train_speed(iter/s)": 0.025538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 510.0, "completions/mean_length": 449.21875, "completions/min_length": 386.0, "epoch": 0.12017733045770429, "grad_norm": 1.0201555438366638, "kl": 0.7002114653587341, "learning_rate": 9.69781250527796e-07, "loss": 0.0007009431719779968, "memory(GiB)": 165.8, "reward": 2.3105666637420654, "reward_std": 0.23710636794567108, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.44672298431396484, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6022332906723022, "rewards/GeoVisalEntityMatch2ORM/std": 0.1643221229314804, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 1003, "train_speed(iter/s)": 0.025544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2708333333333333, "completions/max_length": 540.0, "completions/mean_length": 451.03125, "completions/min_length": 413.0, "epoch": 0.1202971483345315, "grad_norm": 1.130816584164284, "kl": 0.7258397340774536, "learning_rate": 9.697161227383589e-07, "loss": 0.0007257449324242771, "memory(GiB)": 165.8, "reward": 1.8638889789581299, "reward_std": 0.19981065392494202, "rewards/GeoLocAccuracyV2ORM/mean": 0.5375000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.460948646068573, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5972222685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.26148879528045654, "rewards/MathFormat/mean": 0.7291666865348816, "rewards/MathFormat/std": 0.44672298431396484, "step": 1004, "train_speed(iter/s)": 0.025546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/mean_length": 443.16668701171875, "completions/min_length": 381.0, "epoch": 0.12041696621135874, "grad_norm": 1.0791671946625403, "kl": 0.6770108342170715, "learning_rate": 9.696509270340587e-07, "loss": 0.0006782959098927677, "memory(GiB)": 165.8, "reward": 2.3870534896850586, "reward_std": 0.09502710402011871, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6370536088943481, "rewards/GeoVisalEntityMatch2ORM/std": 0.1299242228269577, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1005, "train_speed(iter/s)": 0.025553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 438.7395935058594, "completions/min_length": 380.0, "epoch": 0.12053678408818595, "grad_norm": 0.9887206889190279, "kl": 0.6844756007194519, "learning_rate": 9.695856634243228e-07, "loss": 0.0006856471300125122, "memory(GiB)": 165.8, "reward": 2.3938658237457275, "reward_std": 0.11433736979961395, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6438657641410828, "rewards/GeoVisalEntityMatch2ORM/std": 0.17026233673095703, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1006, "train_speed(iter/s)": 0.025563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 499.0, "completions/mean_length": 426.5625, "completions/min_length": 376.0, "epoch": 0.12065660196501318, "grad_norm": 1.0667914771604725, "kl": 0.8184642195701599, "learning_rate": 9.695203319185868e-07, "loss": 0.0008180936565622687, "memory(GiB)": 165.8, "reward": 2.310664653778076, "reward_std": 0.31943684816360474, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.34396424889564514, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5814980864524841, "rewards/GeoVisalEntityMatch2ORM/std": 0.10688958317041397, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1007, "train_speed(iter/s)": 0.025566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 490.0, "completions/mean_length": 437.57293701171875, "completions/min_length": 381.0, "epoch": 0.1207764198418404, "grad_norm": 1.094197158794701, "kl": 0.710178792476654, "learning_rate": 9.694549325262973e-07, "loss": 0.0007116993656381965, "memory(GiB)": 165.8, "reward": 2.621922492980957, "reward_std": 0.16238027811050415, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6427556872367859, "rewards/GeoVisalEntityMatch2ORM/std": 0.18319465219974518, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1008, "train_speed(iter/s)": 0.025572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 431.90625, "completions/min_length": 372.0, "epoch": 0.12089623771866763, "grad_norm": 1.0694136960833027, "kl": 0.697763204574585, "learning_rate": 9.6938946525691e-07, "loss": 0.0006991128320805728, "memory(GiB)": 165.8, "reward": 2.364062786102295, "reward_std": 0.15720632672309875, "rewards/GeoLocAccuracyV2ORM/mean": 0.7020833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.446737676858902, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6619791388511658, "rewards/GeoVisalEntityMatch2ORM/std": 0.19813388586044312, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1009, "train_speed(iter/s)": 0.025579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 494.0, "completions/mean_length": 426.7395935058594, "completions/min_length": 349.0, "epoch": 0.12101605559549485, "grad_norm": 1.086405508184471, "kl": 0.8821886479854584, "learning_rate": 9.693239301198903e-07, "loss": 0.0008782893419265747, "memory(GiB)": 165.8, "reward": 2.5648438930511475, "reward_std": 0.3316023349761963, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.2871517837047577, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7398437857627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.11712443083524704, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 1010, "train_speed(iter/s)": 0.025579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 502.0, "completions/mean_length": 437.41668701171875, "completions/min_length": 391.0, "epoch": 0.12113587347232208, "grad_norm": 1.0466658954503993, "kl": 0.7190330922603607, "learning_rate": 9.69258327124714e-07, "loss": 0.0007202252745628357, "memory(GiB)": 165.8, "reward": 2.716538429260254, "reward_std": 0.20950019359588623, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7582052946090698, "rewards/GeoVisalEntityMatch2ORM/std": 0.12404627352952957, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1011, "train_speed(iter/s)": 0.025584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 439.4895935058594, "completions/min_length": 389.0, "epoch": 0.12125569134914929, "grad_norm": 1.0842221439497373, "kl": 0.7217369377613068, "learning_rate": 9.691926562808666e-07, "loss": 0.0007222667336463928, "memory(GiB)": 165.8, "reward": 2.764881134033203, "reward_std": 0.16308921575546265, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.785714328289032, "rewards/GeoVisalEntityMatch2ORM/std": 0.15804782509803772, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1012, "train_speed(iter/s)": 0.025591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/mean_length": 444.44793701171875, "completions/min_length": 391.0, "epoch": 0.12137550922597652, "grad_norm": 1.0878730320704006, "kl": 0.7100459635257721, "learning_rate": 9.69126917597843e-07, "loss": 0.0007098764181137085, "memory(GiB)": 165.8, "reward": 2.6617560386657715, "reward_std": 0.0840507298707962, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6617559790611267, "rewards/GeoVisalEntityMatch2ORM/std": 0.1508229374885559, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1013, "train_speed(iter/s)": 0.025598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 510.0, "completions/mean_length": 421.35418701171875, "completions/min_length": 328.0, "epoch": 0.12149532710280374, "grad_norm": 0.960550918147325, "kl": 0.8201901018619537, "learning_rate": 9.690611110851482e-07, "loss": 0.0008198072900995612, "memory(GiB)": 165.8, "reward": 2.273357391357422, "reward_std": 0.333530068397522, "rewards/GeoLocAccuracyV2ORM/mean": 0.8125, "rewards/GeoLocAccuracyV2ORM/std": 0.39236125349998474, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6483573913574219, "rewards/GeoVisalEntityMatch2ORM/std": 0.14062273502349854, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236125349998474, "step": 1014, "train_speed(iter/s)": 0.025599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 428.3333435058594, "completions/min_length": 384.0, "epoch": 0.12161514497963095, "grad_norm": 0.8984902024293925, "kl": 0.7247978150844574, "learning_rate": 9.68995236752297e-07, "loss": 0.0007278522243723273, "memory(GiB)": 165.8, "reward": 2.6195437908172607, "reward_std": 0.07520876824855804, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6195436716079712, "rewards/GeoVisalEntityMatch2ORM/std": 0.21070782840251923, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1015, "train_speed(iter/s)": 0.025606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 418.4270935058594, "completions/min_length": 361.0, "epoch": 0.12173496285645818, "grad_norm": 1.1266681809247654, "kl": 0.671709269285202, "learning_rate": 9.689292946088142e-07, "loss": 0.0006729463930241764, "memory(GiB)": 165.8, "reward": 2.4407224655151367, "reward_std": 0.19480571150779724, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3302046060562134, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5865556597709656, "rewards/GeoVisalEntityMatch2ORM/std": 0.12423084676265717, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1016, "train_speed(iter/s)": 0.025614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/mean_length": 413.7083435058594, "completions/min_length": 347.0, "epoch": 0.1218547807332854, "grad_norm": 1.0161150672501893, "kl": 0.6981585621833801, "learning_rate": 9.688632846642339e-07, "loss": 0.0007007233798503876, "memory(GiB)": 165.8, "reward": 2.5418403148651123, "reward_std": 0.05835973098874092, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5418403148651123, "rewards/GeoVisalEntityMatch2ORM/std": 0.38558828830718994, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1017, "train_speed(iter/s)": 0.025621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 482.0, "completions/mean_length": 410.82293701171875, "completions/min_length": 354.0, "epoch": 0.12197459861011263, "grad_norm": 1.0888610236526033, "kl": 0.7829595506191254, "learning_rate": 9.687972069281003e-07, "loss": 0.0007819334859959781, "memory(GiB)": 165.8, "reward": 2.3765625953674316, "reward_std": 0.3309396505355835, "rewards/GeoLocAccuracyV2ORM/mean": 0.625, "rewards/GeoLocAccuracyV2ORM/std": 0.4866642951965332, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8765625357627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.16658908128738403, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245497941970825, "step": 1018, "train_speed(iter/s)": 0.025623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.20833333333333334, "completions/max_length": 487.0, "completions/mean_length": 396.4895935058594, "completions/min_length": 347.0, "epoch": 0.12209441648693985, "grad_norm": 1.0946828633937224, "kl": 1.237956315279007, "learning_rate": 9.687310614099676e-07, "loss": 0.0012364562135189772, "memory(GiB)": 165.8, "reward": 2.0477185249328613, "reward_std": 0.2673250138759613, "rewards/GeoLocAccuracyV2ORM/mean": 0.5333333611488342, "rewards/GeoLocAccuracyV2ORM/std": 0.49982452392578125, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7227182388305664, "rewards/GeoVisalEntityMatch2ORM/std": 0.17890875041484833, "rewards/MathFormat/mean": 0.7916666865348816, "rewards/MathFormat/std": 0.40824830532073975, "step": 1019, "train_speed(iter/s)": 0.025622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 461.0, "completions/mean_length": 410.35418701171875, "completions/min_length": 363.0, "epoch": 0.12221423436376708, "grad_norm": 1.153127380413897, "kl": 0.7768715918064117, "learning_rate": 9.686648481193991e-07, "loss": 0.000774545012973249, "memory(GiB)": 165.8, "reward": 2.7552084922790527, "reward_std": 0.17422787845134735, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7760416865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.11481592059135437, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1020, "train_speed(iter/s)": 0.025623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.0, "completions/mean_length": 394.38543701171875, "completions/min_length": 348.0, "epoch": 0.1223340522405943, "grad_norm": 1.1286234151121601, "kl": 0.7711580395698547, "learning_rate": 9.68598567065969e-07, "loss": 0.0007708817720413208, "memory(GiB)": 165.8, "reward": 2.332911729812622, "reward_std": 0.07826865464448929, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5329117178916931, "rewards/GeoVisalEntityMatch2ORM/std": 0.15691788494586945, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1021, "train_speed(iter/s)": 0.025631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 404.6145935058594, "completions/min_length": 341.0, "epoch": 0.12245387011742152, "grad_norm": 1.0956436020437876, "kl": 0.6962274610996246, "learning_rate": 9.685322182592602e-07, "loss": 0.0006966082146391273, "memory(GiB)": 165.8, "reward": 2.762152671813965, "reward_std": 0.10502045601606369, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7621528506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.1529354304075241, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1022, "train_speed(iter/s)": 0.025638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 406.5208435058594, "completions/min_length": 345.0, "epoch": 0.12257368799424874, "grad_norm": 1.0610633389837165, "kl": 0.7095065712928772, "learning_rate": 9.684658017088663e-07, "loss": 0.0007110238075256348, "memory(GiB)": 165.8, "reward": 2.3359375, "reward_std": 0.2508231997489929, "rewards/GeoLocAccuracyV2ORM/mean": 0.7749999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.36157548427581787, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5609375238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.22330358624458313, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1023, "train_speed(iter/s)": 0.025645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 382.1770935058594, "completions/min_length": 326.0, "epoch": 0.12269350587107597, "grad_norm": 1.183228556843044, "kl": 0.7016107738018036, "learning_rate": 9.683993174243898e-07, "loss": 0.0007024556398391724, "memory(GiB)": 165.8, "reward": 2.7191014289855957, "reward_std": 0.10707878321409225, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7191014289855957, "rewards/GeoVisalEntityMatch2ORM/std": 0.20004025101661682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1024, "train_speed(iter/s)": 0.025652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 421.0, "completions/mean_length": 366.4583435058594, "completions/min_length": 315.0, "epoch": 0.12281332374790319, "grad_norm": 1.1783318185982707, "kl": 1.3348066806793213, "learning_rate": 9.683327654154439e-07, "loss": 0.001332377432845533, "memory(GiB)": 165.8, "reward": 2.1781044006347656, "reward_std": 0.3921512961387634, "rewards/GeoLocAccuracyV2ORM/mean": 0.7354166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.42796438932418823, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6093543767929077, "rewards/GeoVisalEntityMatch2ORM/std": 0.1269814819097519, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 1025, "train_speed(iter/s)": 0.025651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 393.59375, "completions/min_length": 331.0, "epoch": 0.12293314162473042, "grad_norm": 0.9682737037184134, "kl": 0.7256739139556885, "learning_rate": 9.682661456916507e-07, "loss": 0.0007270320202223957, "memory(GiB)": 165.8, "reward": 2.7864584922790527, "reward_std": 0.07028453052043915, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7864583730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.16974256932735443, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1026, "train_speed(iter/s)": 0.025658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 454.0, "completions/mean_length": 376.76043701171875, "completions/min_length": 335.0, "epoch": 0.12305295950155763, "grad_norm": 1.141720035660863, "kl": 0.7375003099441528, "learning_rate": 9.681994582626432e-07, "loss": 0.0007347129285335541, "memory(GiB)": 165.8, "reward": 2.5260417461395264, "reward_std": 0.16935387253761292, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.546875, "rewards/GeoVisalEntityMatch2ORM/std": 0.12810614705085754, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1027, "train_speed(iter/s)": 0.02566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 386.0, "completions/min_length": 318.0, "epoch": 0.12317277737838485, "grad_norm": 1.118190837399029, "kl": 0.6873968839645386, "learning_rate": 9.681327031380628e-07, "loss": 0.0006886559422127903, "memory(GiB)": 165.8, "reward": 2.7324652671813965, "reward_std": 0.11095467954874039, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7491320371627808, "rewards/GeoVisalEntityMatch2ORM/std": 0.17965713143348694, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1028, "train_speed(iter/s)": 0.025668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 371.4895935058594, "completions/min_length": 328.0, "epoch": 0.12329259525521208, "grad_norm": 1.1508928263417169, "kl": 0.7122111320495605, "learning_rate": 9.680658803275618e-07, "loss": 0.0007140761008486152, "memory(GiB)": 165.8, "reward": 2.7493057250976562, "reward_std": 0.07736854255199432, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7493056058883667, "rewards/GeoVisalEntityMatch2ORM/std": 0.12021578848361969, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1029, "train_speed(iter/s)": 0.025675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/mean_length": 364.375, "completions/min_length": 306.0, "epoch": 0.1234124131320393, "grad_norm": 1.2668650164863378, "kl": 0.7372041046619415, "learning_rate": 9.67998989840802e-07, "loss": 0.0007387648220174015, "memory(GiB)": 165.8, "reward": 2.1995277404785156, "reward_std": 0.08578012883663177, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321254789829254, "rewards/GeoVisalEntityMatch2ORM/mean": 0.44744449853897095, "rewards/GeoVisalEntityMatch2ORM/std": 0.10194049030542374, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1030, "train_speed(iter/s)": 0.025683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 360.94793701171875, "completions/min_length": 305.0, "epoch": 0.12353223100886652, "grad_norm": 1.1747629666487704, "kl": 0.7827550172805786, "learning_rate": 9.679320316874548e-07, "loss": 0.0007846871158108115, "memory(GiB)": 165.8, "reward": 2.7106895446777344, "reward_std": 0.0828959047794342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7106894850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.19906893372535706, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1031, "train_speed(iter/s)": 0.02569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 365.1770935058594, "completions/min_length": 289.0, "epoch": 0.12365204888569374, "grad_norm": 0.9942576554279163, "kl": 0.718647837638855, "learning_rate": 9.678650058772016e-07, "loss": 0.0007198316743597388, "memory(GiB)": 165.8, "reward": 2.7943952083587646, "reward_std": 0.08394962549209595, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.794394850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.1794130504131317, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1032, "train_speed(iter/s)": 0.025697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 372.66668701171875, "completions/min_length": 327.0, "epoch": 0.12377186676252097, "grad_norm": 1.1554211580910219, "kl": 0.7428738176822662, "learning_rate": 9.677979124197331e-07, "loss": 0.000743438838981092, "memory(GiB)": 165.8, "reward": 2.1021125316619873, "reward_std": 0.1702767014503479, "rewards/GeoLocAccuracyV2ORM/mean": 0.6083333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.40201249718666077, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49377894401550293, "rewards/GeoVisalEntityMatch2ORM/std": 0.19662386178970337, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1033, "train_speed(iter/s)": 0.025703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 422.0, "completions/mean_length": 360.51043701171875, "completions/min_length": 303.0, "epoch": 0.12389168463934819, "grad_norm": 1.114214816677407, "kl": 0.7163221836090088, "learning_rate": 9.677307513247507e-07, "loss": 0.0007162702968344092, "memory(GiB)": 165.8, "reward": 2.7288565635681152, "reward_std": 0.26106375455856323, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7913566827774048, "rewards/GeoVisalEntityMatch2ORM/std": 0.14886681735515594, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 1034, "train_speed(iter/s)": 0.025703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 364.0520935058594, "completions/min_length": 308.0, "epoch": 0.12401150251617542, "grad_norm": 1.2190277839467214, "kl": 0.7393757998943329, "learning_rate": 9.676635226019642e-07, "loss": 0.0007397582521662116, "memory(GiB)": 165.8, "reward": 2.759490728378296, "reward_std": 0.1256917119026184, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7594908475875854, "rewards/GeoVisalEntityMatch2ORM/std": 0.1370721161365509, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1035, "train_speed(iter/s)": 0.02571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 374.125, "completions/min_length": 316.0, "epoch": 0.12413132039300263, "grad_norm": 1.1316510938206454, "kl": 0.7212246656417847, "learning_rate": 9.675962262610949e-07, "loss": 0.0007223201682791114, "memory(GiB)": 165.8, "reward": 2.4766368865966797, "reward_std": 0.12418209761381149, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.659970223903656, "rewards/GeoVisalEntityMatch2ORM/std": 0.20294350385665894, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1036, "train_speed(iter/s)": 0.025717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 391.0, "completions/mean_length": 353.4583435058594, "completions/min_length": 302.0, "epoch": 0.12425113826982986, "grad_norm": 1.3531278002286555, "kl": 0.9050153493881226, "learning_rate": 9.675288623118722e-07, "loss": 0.0009023423190228641, "memory(GiB)": 165.8, "reward": 2.257260322570801, "reward_std": 0.44522905349731445, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4411657154560089, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6426767706871033, "rewards/GeoVisalEntityMatch2ORM/std": 0.150332972407341, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245497941970825, "step": 1037, "train_speed(iter/s)": 0.025717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 342.44793701171875, "completions/min_length": 291.0, "epoch": 0.12437095614665708, "grad_norm": 1.2456506788029829, "kl": 0.7004247903823853, "learning_rate": 9.674614307640367e-07, "loss": 0.0007034838199615479, "memory(GiB)": 165.8, "reward": 2.8154513835906982, "reward_std": 0.09103308618068695, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8154515027999878, "rewards/GeoVisalEntityMatch2ORM/std": 0.23591609299182892, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1038, "train_speed(iter/s)": 0.025718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 363.78125, "completions/min_length": 306.0, "epoch": 0.12449077402348431, "grad_norm": 1.1592971563013572, "kl": 0.7401541769504547, "learning_rate": 9.673939316273377e-07, "loss": 0.0007411192054860294, "memory(GiB)": 165.8, "reward": 2.427020311355591, "reward_std": 0.271213173866272, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.36696958541870117, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6186869144439697, "rewards/GeoVisalEntityMatch2ORM/std": 0.14231504499912262, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1039, "train_speed(iter/s)": 0.02572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/mean_length": 372.3958435058594, "completions/min_length": 327.0, "epoch": 0.12461059190031153, "grad_norm": 1.215425368993479, "kl": 0.7365065813064575, "learning_rate": 9.67326364911535e-07, "loss": 0.0007377136498689651, "memory(GiB)": 165.8, "reward": 2.702678680419922, "reward_std": 0.11157512664794922, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7026786208152771, "rewards/GeoVisalEntityMatch2ORM/std": 0.2386307716369629, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1040, "train_speed(iter/s)": 0.025728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 360.4270935058594, "completions/min_length": 300.0, "epoch": 0.12473040977713874, "grad_norm": 1.1863908737283235, "kl": 0.7216721475124359, "learning_rate": 9.672587306263979e-07, "loss": 0.0007227609748952091, "memory(GiB)": 165.8, "reward": 2.574416160583496, "reward_std": 0.23139065504074097, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6994160413742065, "rewards/GeoVisalEntityMatch2ORM/std": 0.14546427130699158, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1041, "train_speed(iter/s)": 0.025736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 414.0, "completions/mean_length": 363.6770935058594, "completions/min_length": 317.0, "epoch": 0.12485022765396597, "grad_norm": 1.1422631147031177, "kl": 0.8195094466209412, "learning_rate": 9.671910287817052e-07, "loss": 0.0008109286427497864, "memory(GiB)": 165.8, "reward": 2.7318286895751953, "reward_std": 0.1835915744304657, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7609953880310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.17930877208709717, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1042, "train_speed(iter/s)": 0.025737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/mean_length": 349.2395935058594, "completions/min_length": 309.0, "epoch": 0.12497004553079319, "grad_norm": 1.2739043610992598, "kl": 0.702932208776474, "learning_rate": 9.671232593872457e-07, "loss": 0.0007041519274935126, "memory(GiB)": 165.8, "reward": 2.6408233642578125, "reward_std": 0.07385799288749695, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6408234238624573, "rewards/GeoVisalEntityMatch2ORM/std": 0.18930433690547943, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1043, "train_speed(iter/s)": 0.025745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 363.125, "completions/min_length": 301.0, "epoch": 0.1250898634076204, "grad_norm": 1.1733500315781331, "kl": 0.7080602645874023, "learning_rate": 9.67055422452818e-07, "loss": 0.0007088296115398407, "memory(GiB)": 165.8, "reward": 2.586421012878418, "reward_std": 0.16176070272922516, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6114211082458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.12946496903896332, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1044, "train_speed(iter/s)": 0.025752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 351.65625, "completions/min_length": 298.0, "epoch": 0.12520968128444765, "grad_norm": 1.2728138506454232, "kl": 0.7402370870113373, "learning_rate": 9.669875179882303e-07, "loss": 0.0007416183943860233, "memory(GiB)": 165.8, "reward": 2.6476221084594727, "reward_std": 0.11308388411998749, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6476221084594727, "rewards/GeoVisalEntityMatch2ORM/std": 0.13619020581245422, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1045, "train_speed(iter/s)": 0.025754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 370.3333435058594, "completions/min_length": 297.0, "epoch": 0.12532949916127487, "grad_norm": 1.1550912911117082, "kl": 0.664361983537674, "learning_rate": 9.669195460033013e-07, "loss": 0.0006651381845586002, "memory(GiB)": 165.8, "reward": 2.3299107551574707, "reward_std": 0.15973258018493652, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5215774178504944, "rewards/GeoVisalEntityMatch2ORM/std": 0.19910642504692078, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1046, "train_speed(iter/s)": 0.02576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 427.0, "completions/mean_length": 377.7083435058594, "completions/min_length": 336.0, "epoch": 0.12544931703810208, "grad_norm": 1.1243688104546852, "kl": 0.6976429522037506, "learning_rate": 9.668515065078582e-07, "loss": 0.0006981231272220612, "memory(GiB)": 165.8, "reward": 2.6473958492279053, "reward_std": 0.15209399163722992, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6682292222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.22467751801013947, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1047, "train_speed(iter/s)": 0.025761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/mean_length": 364.04168701171875, "completions/min_length": 313.0, "epoch": 0.1255691349149293, "grad_norm": 1.2061200408637505, "kl": 0.7250948548316956, "learning_rate": 9.66783399511739e-07, "loss": 0.0007253339281305671, "memory(GiB)": 165.8, "reward": 2.842336416244507, "reward_std": 0.11473904550075531, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8423363566398621, "rewards/GeoVisalEntityMatch2ORM/std": 0.15182118117809296, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1048, "train_speed(iter/s)": 0.025768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 365.7395935058594, "completions/min_length": 316.0, "epoch": 0.12568895279175654, "grad_norm": 1.0948575866120338, "kl": 0.7246094346046448, "learning_rate": 9.66715225024791e-07, "loss": 0.0007277354598045349, "memory(GiB)": 165.8, "reward": 2.676868438720703, "reward_std": 0.08455280214548111, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6768683791160583, "rewards/GeoVisalEntityMatch2ORM/std": 0.1074448749423027, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1049, "train_speed(iter/s)": 0.025775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 359.65625, "completions/min_length": 291.0, "epoch": 0.12580877066858376, "grad_norm": 1.1667927367093311, "kl": 0.727827250957489, "learning_rate": 9.666469830568713e-07, "loss": 0.0007295943796634674, "memory(GiB)": 165.8, "reward": 2.3301382064819336, "reward_std": 0.09625177085399628, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5801383256912231, "rewards/GeoVisalEntityMatch2ORM/std": 0.21437682211399078, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1050, "train_speed(iter/s)": 0.025774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 387.01043701171875, "completions/min_length": 340.0, "epoch": 0.12592858854541097, "grad_norm": 1.1243887385985887, "kl": 0.7281507551670074, "learning_rate": 9.66578673617847e-07, "loss": 0.0007292802329175174, "memory(GiB)": 165.8, "reward": 2.6278109550476074, "reward_std": 0.08938619494438171, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6278108954429626, "rewards/GeoVisalEntityMatch2ORM/std": 0.1101505234837532, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1051, "train_speed(iter/s)": 0.025782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 350.79168701171875, "completions/min_length": 307.0, "epoch": 0.1260484064222382, "grad_norm": 1.1755909265876194, "kl": 0.7224728465080261, "learning_rate": 9.665102967175945e-07, "loss": 0.0007235134835354984, "memory(GiB)": 165.8, "reward": 2.2993552684783936, "reward_std": 0.15081775188446045, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.42250296473503113, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5285218954086304, "rewards/GeoVisalEntityMatch2ORM/std": 0.1318942904472351, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1052, "train_speed(iter/s)": 0.025789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 443.0, "completions/mean_length": 383.1875, "completions/min_length": 341.0, "epoch": 0.1261682242990654, "grad_norm": 1.1156788352118716, "kl": 0.9765416979789734, "learning_rate": 9.664418523660004e-07, "loss": 0.0009748985758051276, "memory(GiB)": 165.8, "reward": 2.3459203243255615, "reward_std": 0.3329806923866272, "rewards/GeoLocAccuracyV2ORM/mean": 0.84375, "rewards/GeoLocAccuracyV2ORM/std": 0.3649981915950775, "rewards/GeoVisalEntityMatch2ORM/mean": 0.658420205116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.1206328272819519, "rewards/MathFormat/mean": 0.84375, "rewards/MathFormat/std": 0.3649981915950775, "step": 1053, "train_speed(iter/s)": 0.025789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 384.4375, "completions/min_length": 316.0, "epoch": 0.12628804217589265, "grad_norm": 1.1178518480182171, "kl": 0.7248474061489105, "learning_rate": 9.663733405729604e-07, "loss": 0.0007264527375809848, "memory(GiB)": 165.8, "reward": 2.3489348888397217, "reward_std": 0.2631404399871826, "rewards/GeoLocAccuracyV2ORM/mean": 0.8187500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.38066771626472473, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5301846861839294, "rewards/GeoVisalEntityMatch2ORM/std": 0.1779479682445526, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1054, "train_speed(iter/s)": 0.025794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 372.79168701171875, "completions/min_length": 315.0, "epoch": 0.12640786005271987, "grad_norm": 1.1661022819767597, "kl": 0.7463468015193939, "learning_rate": 9.66304761348381e-07, "loss": 0.0007474521989934146, "memory(GiB)": 165.8, "reward": 2.4126157760620117, "reward_std": 0.13305388391017914, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5876157283782959, "rewards/GeoVisalEntityMatch2ORM/std": 0.16647060215473175, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1055, "train_speed(iter/s)": 0.025799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 379.6770935058594, "completions/min_length": 313.0, "epoch": 0.12652767792954708, "grad_norm": 1.110160786861612, "kl": 0.7013111114501953, "learning_rate": 9.66236114702178e-07, "loss": 0.0007014995208010077, "memory(GiB)": 165.8, "reward": 2.742311477661133, "reward_std": 0.2128095179796219, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8089781999588013, "rewards/GeoVisalEntityMatch2ORM/std": 0.1136305034160614, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1056, "train_speed(iter/s)": 0.025806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.0, "completions/mean_length": 369.0, "completions/min_length": 299.0, "epoch": 0.1266474958063743, "grad_norm": 1.1989283672936375, "kl": 0.7365289628505707, "learning_rate": 9.66167400644276e-07, "loss": 0.0007381712784990668, "memory(GiB)": 165.8, "reward": 2.631882429122925, "reward_std": 0.09425748884677887, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6318824887275696, "rewards/GeoVisalEntityMatch2ORM/std": 0.1783338487148285, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1057, "train_speed(iter/s)": 0.025813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 475.0, "completions/mean_length": 386.57293701171875, "completions/min_length": 331.0, "epoch": 0.12676731368320154, "grad_norm": 1.1431925983751314, "kl": 1.0907817482948303, "learning_rate": 9.66098619184611e-07, "loss": 0.0010846804361790419, "memory(GiB)": 165.8, "reward": 2.2532196044921875, "reward_std": 0.4078059196472168, "rewards/GeoLocAccuracyV2ORM/mean": 0.7145833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.41926732659339905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6740530729293823, "rewards/GeoVisalEntityMatch2ORM/std": 0.20820017158985138, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1058, "train_speed(iter/s)": 0.025812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 386.78125, "completions/min_length": 331.0, "epoch": 0.12688713156002876, "grad_norm": 1.1817888333079096, "kl": 0.725432276725769, "learning_rate": 9.660297703331272e-07, "loss": 0.0007255661184899509, "memory(GiB)": 165.8, "reward": 2.439178466796875, "reward_std": 0.13448908925056458, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6225115656852722, "rewards/GeoVisalEntityMatch2ORM/std": 0.20257353782653809, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1059, "train_speed(iter/s)": 0.025822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 374.96875, "completions/min_length": 335.0, "epoch": 0.12700694943685598, "grad_norm": 1.1830276775680477, "kl": 0.7425413131713867, "learning_rate": 9.659608540997799e-07, "loss": 0.0007433692808263004, "memory(GiB)": 165.8, "reward": 2.542386054992676, "reward_std": 0.14115002751350403, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5507192611694336, "rewards/GeoVisalEntityMatch2ORM/std": 0.1280697137117386, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1060, "train_speed(iter/s)": 0.025832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/mean_length": 365.21875, "completions/min_length": 295.0, "epoch": 0.1271267673136832, "grad_norm": 1.1477957491077286, "kl": 0.6656101047992706, "learning_rate": 9.65891870494533e-07, "loss": 0.0006673932075500488, "memory(GiB)": 165.8, "reward": 2.688694953918457, "reward_std": 0.10495783388614655, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.688694953918457, "rewards/GeoVisalEntityMatch2ORM/std": 0.15480223298072815, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1061, "train_speed(iter/s)": 0.025835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 366.8333435058594, "completions/min_length": 317.0, "epoch": 0.12724658519051044, "grad_norm": 1.0004855999489206, "kl": 0.6943696141242981, "learning_rate": 9.65822819527361e-07, "loss": 0.0006947293877601624, "memory(GiB)": 165.8, "reward": 2.74428653717041, "reward_std": 0.06627273559570312, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7442866563796997, "rewards/GeoVisalEntityMatch2ORM/std": 0.20514696836471558, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1062, "train_speed(iter/s)": 0.025837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.0, "completions/mean_length": 373.2083435058594, "completions/min_length": 329.0, "epoch": 0.12736640306733765, "grad_norm": 1.1474166519500262, "kl": 0.6829322278499603, "learning_rate": 9.657537012082475e-07, "loss": 0.0006845196476206183, "memory(GiB)": 165.8, "reward": 2.5170140266418457, "reward_std": 0.11560270190238953, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.717013955116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.16060884296894073, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1063, "train_speed(iter/s)": 0.025844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 424.0, "completions/mean_length": 367.8125, "completions/min_length": 323.0, "epoch": 0.12748622094416487, "grad_norm": 1.1637816917415114, "kl": 0.7397647202014923, "learning_rate": 9.656845155471862e-07, "loss": 0.0007408385863527656, "memory(GiB)": 165.8, "reward": 2.308333396911621, "reward_std": 0.20325356721878052, "rewards/GeoLocAccuracyV2ORM/mean": 0.6416666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4587549567222595, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6666666865348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.2830907106399536, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1064, "train_speed(iter/s)": 0.025851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 375.0, "completions/min_length": 317.0, "epoch": 0.12760603882099208, "grad_norm": 0.9557516027103998, "kl": 0.7392153143882751, "learning_rate": 9.656152625541806e-07, "loss": 0.0007397755980491638, "memory(GiB)": 165.8, "reward": 2.8714656829833984, "reward_std": 0.0680856853723526, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.871465802192688, "rewards/GeoVisalEntityMatch2ORM/std": 0.10887262970209122, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1065, "train_speed(iter/s)": 0.025858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 381.4270935058594, "completions/min_length": 295.0, "epoch": 0.1277258566978193, "grad_norm": 1.4396063309522418, "kl": 0.7088020741939545, "learning_rate": 9.655459422392435e-07, "loss": 0.000708912848494947, "memory(GiB)": 165.8, "reward": 2.772569417953491, "reward_std": 0.10332657396793365, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7725694179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.1654469221830368, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1066, "train_speed(iter/s)": 0.025864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 603.0, "completions/mean_length": 387.9583435058594, "completions/min_length": 324.0, "epoch": 0.12784567457464655, "grad_norm": 1.1037614857895364, "kl": 0.7995970249176025, "learning_rate": 9.65476554612398e-07, "loss": 0.0008025343413464725, "memory(GiB)": 165.8, "reward": 2.545254707336426, "reward_std": 0.2436206191778183, "rewards/GeoLocAccuracyV2ORM/mean": 0.8479167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.31887274980545044, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7077546119689941, "rewards/GeoVisalEntityMatch2ORM/std": 0.11624803394079208, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1067, "train_speed(iter/s)": 0.025871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 427.0, "completions/mean_length": 374.35418701171875, "completions/min_length": 325.0, "epoch": 0.12796549245147376, "grad_norm": 1.1240329372307885, "kl": 1.0950595140457153, "learning_rate": 9.654070996836764e-07, "loss": 0.0010954737663269043, "memory(GiB)": 165.8, "reward": 2.072747230529785, "reward_std": 0.3223339021205902, "rewards/GeoLocAccuracyV2ORM/mean": 0.5854166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.4935327470302582, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7060806155204773, "rewards/GeoVisalEntityMatch2ORM/std": 0.1620078831911087, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1068, "train_speed(iter/s)": 0.025871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 370.21875, "completions/min_length": 311.0, "epoch": 0.12808531032830098, "grad_norm": 1.2226646909863867, "kl": 0.7412737607955933, "learning_rate": 9.653375774631211e-07, "loss": 0.0007426589727401733, "memory(GiB)": 165.8, "reward": 2.561570167541504, "reward_std": 0.17944705486297607, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5928199887275696, "rewards/GeoVisalEntityMatch2ORM/std": 0.15947221219539642, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1069, "train_speed(iter/s)": 0.025877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 394.04168701171875, "completions/min_length": 330.0, "epoch": 0.1282051282051282, "grad_norm": 1.1297704956439687, "kl": 0.7044177651405334, "learning_rate": 9.652679879607843e-07, "loss": 0.0007055911119095981, "memory(GiB)": 165.8, "reward": 2.6372766494750977, "reward_std": 0.14945083856582642, "rewards/GeoLocAccuracyV2ORM/mean": 0.8500000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.3138890266418457, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7872767448425293, "rewards/GeoVisalEntityMatch2ORM/std": 0.23131853342056274, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1070, "train_speed(iter/s)": 0.025886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 388.26043701171875, "completions/min_length": 325.0, "epoch": 0.12832494608195544, "grad_norm": 1.1040102383659187, "kl": 0.7753864824771881, "learning_rate": 9.651983311867274e-07, "loss": 0.0007761679589748383, "memory(GiB)": 165.8, "reward": 2.0531249046325684, "reward_std": 0.1906185746192932, "rewards/GeoLocAccuracyV2ORM/mean": 0.5687500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.44799259305000305, "rewards/GeoVisalEntityMatch2ORM/mean": 0.484375, "rewards/GeoVisalEntityMatch2ORM/std": 0.2821430563926697, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1071, "train_speed(iter/s)": 0.025893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 377.57293701171875, "completions/min_length": 309.0, "epoch": 0.12844476395878265, "grad_norm": 1.2033111247758168, "kl": 0.8020365238189697, "learning_rate": 9.651286071510222e-07, "loss": 0.0008030831813812256, "memory(GiB)": 165.8, "reward": 2.7326183319091797, "reward_std": 0.08458682894706726, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7326182126998901, "rewards/GeoVisalEntityMatch2ORM/std": 0.19577649235725403, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1072, "train_speed(iter/s)": 0.0259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 394.59375, "completions/min_length": 351.0, "epoch": 0.12856458183560987, "grad_norm": 1.1972929761133866, "kl": 0.7573293149471283, "learning_rate": 9.650588158637494e-07, "loss": 0.0007575949421152472, "memory(GiB)": 165.8, "reward": 2.3432435989379883, "reward_std": 0.23132692277431488, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5349101424217224, "rewards/GeoVisalEntityMatch2ORM/std": 0.22025880217552185, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1073, "train_speed(iter/s)": 0.025907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 387.63543701171875, "completions/min_length": 325.0, "epoch": 0.1286843997124371, "grad_norm": 1.2175290828074286, "kl": 0.733207494020462, "learning_rate": 9.649889573350004e-07, "loss": 0.0007344583864323795, "memory(GiB)": 165.8, "reward": 2.504439353942871, "reward_std": 0.08667734265327454, "rewards/GeoLocAccuracyV2ORM/mean": 0.7520833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4321255087852478, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7523561716079712, "rewards/GeoVisalEntityMatch2ORM/std": 0.1960674524307251, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1074, "train_speed(iter/s)": 0.025913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 406.5625, "completions/min_length": 340.0, "epoch": 0.12880421758926433, "grad_norm": 1.0893133805453872, "kl": 0.7700877487659454, "learning_rate": 9.649190315748757e-07, "loss": 0.0007708495249971747, "memory(GiB)": 165.8, "reward": 2.383246421813965, "reward_std": 0.13661760091781616, "rewards/GeoLocAccuracyV2ORM/mean": 0.8020833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3789123296737671, "rewards/GeoVisalEntityMatch2ORM/mean": 0.581163227558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.17650458216667175, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1075, "train_speed(iter/s)": 0.02592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 396.60418701171875, "completions/min_length": 324.0, "epoch": 0.12892403546609155, "grad_norm": 2.290703322592438, "kl": 0.9211163818836212, "learning_rate": 9.648490385934856e-07, "loss": 0.0009226029505953193, "memory(GiB)": 165.8, "reward": 2.6208457946777344, "reward_std": 0.09388380497694016, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6208457350730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.2546853721141815, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1076, "train_speed(iter/s)": 0.025927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 385.84375, "completions/min_length": 311.0, "epoch": 0.12904385334291876, "grad_norm": 1.1500653369037879, "kl": 0.7049891352653503, "learning_rate": 9.6477897840095e-07, "loss": 0.0007061424548737705, "memory(GiB)": 165.8, "reward": 2.6241986751556396, "reward_std": 0.14121031761169434, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7908653616905212, "rewards/GeoVisalEntityMatch2ORM/std": 0.11914564669132233, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1077, "train_speed(iter/s)": 0.025934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 395.32293701171875, "completions/min_length": 340.0, "epoch": 0.12916367121974598, "grad_norm": 0.963230996705433, "kl": 0.7537297010421753, "learning_rate": 9.647088510073993e-07, "loss": 0.00075516477227211, "memory(GiB)": 165.8, "reward": 2.6292378902435303, "reward_std": 0.23605811595916748, "rewards/GeoLocAccuracyV2ORM/mean": 0.8500000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.35183727741241455, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7792377471923828, "rewards/GeoVisalEntityMatch2ORM/std": 0.21680928766727448, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1078, "train_speed(iter/s)": 0.02594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 379.13543701171875, "completions/min_length": 335.0, "epoch": 0.1292834890965732, "grad_norm": 1.254020352917123, "kl": 0.7136467397212982, "learning_rate": 9.646386564229723e-07, "loss": 0.0007159486413002014, "memory(GiB)": 165.8, "reward": 2.5931355953216553, "reward_std": 0.15163764357566833, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824830532073975, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8014690279960632, "rewards/GeoVisalEntityMatch2ORM/std": 0.11972072720527649, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1079, "train_speed(iter/s)": 0.025946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 387.3333435058594, "completions/min_length": 332.0, "epoch": 0.12940330697340044, "grad_norm": 1.1250473793615154, "kl": 0.776026725769043, "learning_rate": 9.645683946578187e-07, "loss": 0.0007775624981150031, "memory(GiB)": 165.8, "reward": 2.7782740592956543, "reward_std": 0.07075756788253784, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7782738208770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.14838068187236786, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1080, "train_speed(iter/s)": 0.025952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 400.72918701171875, "completions/min_length": 349.0, "epoch": 0.12952312485022766, "grad_norm": 1.0860095628986028, "kl": 0.7640403807163239, "learning_rate": 9.644980657220972e-07, "loss": 0.0007655024528503418, "memory(GiB)": 165.8, "reward": 2.394841194152832, "reward_std": 0.16922545433044434, "rewards/GeoLocAccuracyV2ORM/mean": 0.7916666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.40824827551841736, "rewards/GeoVisalEntityMatch2ORM/mean": 0.60317462682724, "rewards/GeoVisalEntityMatch2ORM/std": 0.20968008041381836, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1081, "train_speed(iter/s)": 0.025959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 463.0, "completions/mean_length": 414.97918701171875, "completions/min_length": 380.0, "epoch": 0.12964294272705487, "grad_norm": 1.1474950751672872, "kl": 0.7983390688896179, "learning_rate": 9.644276696259765e-07, "loss": 0.000798674940597266, "memory(GiB)": 165.8, "reward": 2.012326717376709, "reward_std": 0.34026917815208435, "rewards/GeoLocAccuracyV2ORM/mean": 0.627083420753479, "rewards/GeoLocAccuracyV2ORM/std": 0.4482666850090027, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5310764312744141, "rewards/GeoVisalEntityMatch2ORM/std": 0.26250725984573364, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 1082, "train_speed(iter/s)": 0.02596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 420.71875, "completions/min_length": 372.0, "epoch": 0.1297627606038821, "grad_norm": 1.142319567963428, "kl": 0.7502825260162354, "learning_rate": 9.64357206379635e-07, "loss": 0.0007504845852963626, "memory(GiB)": 165.8, "reward": 2.6904101371765137, "reward_std": 0.10025037080049515, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.698743462562561, "rewards/GeoVisalEntityMatch2ORM/std": 0.14072956144809723, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1083, "train_speed(iter/s)": 0.025967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 399.66668701171875, "completions/min_length": 340.0, "epoch": 0.12988257848070933, "grad_norm": 1.2330743696342896, "kl": 0.7501913011074066, "learning_rate": 9.64286675993261e-07, "loss": 0.0007510620052926242, "memory(GiB)": 165.8, "reward": 2.69502329826355, "reward_std": 0.089688241481781, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6950231790542603, "rewards/GeoVisalEntityMatch2ORM/std": 0.22225800156593323, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1084, "train_speed(iter/s)": 0.025974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 411.38543701171875, "completions/min_length": 363.0, "epoch": 0.13000239635753655, "grad_norm": 1.1676877680559188, "kl": 0.766894519329071, "learning_rate": 9.642160784770518e-07, "loss": 0.0007682144641876221, "memory(GiB)": 165.8, "reward": 2.3513994216918945, "reward_std": 0.112688347697258, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6013994216918945, "rewards/GeoVisalEntityMatch2ORM/std": 0.14296096563339233, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1085, "train_speed(iter/s)": 0.025981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/mean_length": 414.6875, "completions/min_length": 370.0, "epoch": 0.13012221423436376, "grad_norm": 1.0814726680272488, "kl": 0.7281382083892822, "learning_rate": 9.641454138412152e-07, "loss": 0.0007299880380742252, "memory(GiB)": 165.8, "reward": 2.6305432319641113, "reward_std": 0.13451838493347168, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7555431127548218, "rewards/GeoVisalEntityMatch2ORM/std": 0.21118997037410736, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1086, "train_speed(iter/s)": 0.025987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 476.0, "completions/mean_length": 413.15625, "completions/min_length": 368.0, "epoch": 0.13024203211119098, "grad_norm": 0.9354128684406181, "kl": 0.8022629618644714, "learning_rate": 9.640746820959683e-07, "loss": 0.0008037214865908027, "memory(GiB)": 165.8, "reward": 2.293518543243408, "reward_std": 0.057901568710803986, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7935185432434082, "rewards/GeoVisalEntityMatch2ORM/std": 0.20761753618717194, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1087, "train_speed(iter/s)": 0.025987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 443.5520935058594, "completions/min_length": 403.0, "epoch": 0.13036184998801822, "grad_norm": 1.0175204991700617, "kl": 0.734151154756546, "learning_rate": 9.64003883251538e-07, "loss": 0.000734801113139838, "memory(GiB)": 165.8, "reward": 2.7252604961395264, "reward_std": 0.1872260868549347, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.74609375, "rewards/GeoVisalEntityMatch2ORM/std": 0.1177746132016182, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1088, "train_speed(iter/s)": 0.025993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 433.46875, "completions/min_length": 383.0, "epoch": 0.13048166786484544, "grad_norm": 1.1484711042389375, "kl": 0.7424640357494354, "learning_rate": 9.63933017318161e-07, "loss": 0.0007442733040079474, "memory(GiB)": 165.8, "reward": 2.7667617797851562, "reward_std": 0.0973646342754364, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7750951051712036, "rewards/GeoVisalEntityMatch2ORM/std": 0.14796341955661774, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1089, "train_speed(iter/s)": 0.025999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 433.4375, "completions/min_length": 387.0, "epoch": 0.13060148574167266, "grad_norm": 1.0252190955443272, "kl": 0.7844124138355255, "learning_rate": 9.638620843060835e-07, "loss": 0.0007855507428757846, "memory(GiB)": 165.8, "reward": 2.4791953563690186, "reward_std": 0.17149946093559265, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5000286102294922, "rewards/GeoVisalEntityMatch2ORM/std": 0.13724663853645325, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1090, "train_speed(iter/s)": 0.026005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 437.26043701171875, "completions/min_length": 356.0, "epoch": 0.13072130361849987, "grad_norm": 1.0796924505209917, "kl": 0.7157692909240723, "learning_rate": 9.637910842255615e-07, "loss": 0.0007160952081903815, "memory(GiB)": 165.8, "reward": 2.7296504974365234, "reward_std": 0.1353364884853363, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7504836320877075, "rewards/GeoVisalEntityMatch2ORM/std": 0.18915310502052307, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1091, "train_speed(iter/s)": 0.026011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 443.63543701171875, "completions/min_length": 398.0, "epoch": 0.1308411214953271, "grad_norm": 1.093506934998777, "kl": 0.775530070066452, "learning_rate": 9.637200170868605e-07, "loss": 0.0007773749530315399, "memory(GiB)": 165.8, "reward": 2.370312452316284, "reward_std": 0.1369493007659912, "rewards/GeoLocAccuracyV2ORM/mean": 0.8250000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5453125238418579, "rewards/GeoVisalEntityMatch2ORM/std": 0.1254105120897293, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1092, "train_speed(iter/s)": 0.026017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 469.0, "completions/mean_length": 423.7708435058594, "completions/min_length": 346.0, "epoch": 0.13096093937215433, "grad_norm": 1.297304241480976, "kl": 2.1367239952087402, "learning_rate": 9.636488829002563e-07, "loss": 0.0021300986409187317, "memory(GiB)": 165.8, "reward": 2.4181923866271973, "reward_std": 0.2810429632663727, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.374634325504303, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7515256404876709, "rewards/GeoVisalEntityMatch2ORM/std": 0.18234258890151978, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 1093, "train_speed(iter/s)": 0.026021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 440.0208435058594, "completions/min_length": 345.0, "epoch": 0.13108075724898155, "grad_norm": 0.9166063307691237, "kl": 0.734380841255188, "learning_rate": 9.635776816760337e-07, "loss": 0.0007344683399423957, "memory(GiB)": 165.8, "reward": 2.741840362548828, "reward_std": 0.1422150731086731, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8168403506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.16281254589557648, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1094, "train_speed(iter/s)": 0.02603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 431.78125, "completions/min_length": 379.0, "epoch": 0.13120057512580877, "grad_norm": 0.9588612749542758, "kl": 0.711978405714035, "learning_rate": 9.635064134244874e-07, "loss": 0.0007124344701878726, "memory(GiB)": 165.8, "reward": 2.7609128952026367, "reward_std": 0.06592504680156708, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7609127759933472, "rewards/GeoVisalEntityMatch2ORM/std": 0.15855565667152405, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1095, "train_speed(iter/s)": 0.026036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 417.35418701171875, "completions/min_length": 375.0, "epoch": 0.13132039300263598, "grad_norm": 1.1535407267010127, "kl": 0.7708742916584015, "learning_rate": 9.634350781559222e-07, "loss": 0.000771214603446424, "memory(GiB)": 165.8, "reward": 2.525496006011963, "reward_std": 0.13577593863010406, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5254960060119629, "rewards/GeoVisalEntityMatch2ORM/std": 0.15596383810043335, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1096, "train_speed(iter/s)": 0.026043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07291666666666667, "completions/max_length": 469.0, "completions/mean_length": 420.19793701171875, "completions/min_length": 376.0, "epoch": 0.13144021087946323, "grad_norm": 1.1009155490560842, "kl": 0.7977803945541382, "learning_rate": 9.633636758806521e-07, "loss": 0.0007944554090499878, "memory(GiB)": 165.8, "reward": 2.461028575897217, "reward_std": 0.356425940990448, "rewards/GeoLocAccuracyV2ORM/mean": 0.9020833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2901829481124878, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6318618059158325, "rewards/GeoVisalEntityMatch2ORM/std": 0.16560408473014832, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136451959609985, "step": 1097, "train_speed(iter/s)": 0.026044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 417.625, "completions/min_length": 371.0, "epoch": 0.13156002875629044, "grad_norm": 0.8902208914967066, "kl": 0.7225980758666992, "learning_rate": 9.632922066090006e-07, "loss": 0.0007236252422444522, "memory(GiB)": 165.8, "reward": 2.6927084922790527, "reward_std": 0.07276354730129242, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6927083730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.22484637796878815, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1098, "train_speed(iter/s)": 0.026051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 430.04168701171875, "completions/min_length": 380.0, "epoch": 0.13167984663311766, "grad_norm": 1.0619503362559062, "kl": 0.7502496242523193, "learning_rate": 9.632206703513018e-07, "loss": 0.000751785933971405, "memory(GiB)": 165.8, "reward": 2.75, "reward_std": 0.08657082170248032, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.75, "rewards/GeoVisalEntityMatch2ORM/std": 0.1528208702802658, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1099, "train_speed(iter/s)": 0.026058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 429.66668701171875, "completions/min_length": 382.0, "epoch": 0.13179966450994487, "grad_norm": 1.1568465957668865, "kl": 0.7291749715805054, "learning_rate": 9.631490671178985e-07, "loss": 0.0007306933403015137, "memory(GiB)": 165.8, "reward": 2.709301710128784, "reward_std": 0.1114402711391449, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7093015909194946, "rewards/GeoVisalEntityMatch2ORM/std": 0.1602720022201538, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1100, "train_speed(iter/s)": 0.026063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 431.40625, "completions/min_length": 376.0, "epoch": 0.13191948238677212, "grad_norm": 0.994763333405021, "kl": 0.7636890709400177, "learning_rate": 9.630773969191437e-07, "loss": 0.0007648319005966187, "memory(GiB)": 165.8, "reward": 2.6663320064544678, "reward_std": 0.08536599576473236, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6663318872451782, "rewards/GeoVisalEntityMatch2ORM/std": 0.14569155871868134, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1101, "train_speed(iter/s)": 0.026069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 440.15625, "completions/min_length": 397.0, "epoch": 0.13203930026359934, "grad_norm": 1.0180351026673935, "kl": 0.7207099795341492, "learning_rate": 9.630056597654002e-07, "loss": 0.0007226815214380622, "memory(GiB)": 165.8, "reward": 2.6622023582458496, "reward_std": 0.07058346271514893, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6622023582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.22741089761257172, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1102, "train_speed(iter/s)": 0.026075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 412.0208435058594, "completions/min_length": 374.0, "epoch": 0.13215911814042655, "grad_norm": 1.1077729236859468, "kl": 0.7391486763954163, "learning_rate": 9.6293385566704e-07, "loss": 0.00073961669113487, "memory(GiB)": 165.8, "reward": 2.6232638359069824, "reward_std": 0.19559186697006226, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6899306178092957, "rewards/GeoVisalEntityMatch2ORM/std": 0.2323472648859024, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1103, "train_speed(iter/s)": 0.026081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 437.1770935058594, "completions/min_length": 387.0, "epoch": 0.13227893601725377, "grad_norm": 1.115650007696811, "kl": 0.7681922614574432, "learning_rate": 9.628619846344453e-07, "loss": 0.0007691358914598823, "memory(GiB)": 165.8, "reward": 2.715278148651123, "reward_std": 0.08270353078842163, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7152778506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.16243156790733337, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1104, "train_speed(iter/s)": 0.026088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/mean_length": 426.13543701171875, "completions/min_length": 386.0, "epoch": 0.13239875389408098, "grad_norm": 0.9986152989433827, "kl": 0.7300591468811035, "learning_rate": 9.627900466780073e-07, "loss": 0.0007326627965085208, "memory(GiB)": 165.8, "reward": 2.798032522201538, "reward_std": 0.06014744192361832, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7980324625968933, "rewards/GeoVisalEntityMatch2ORM/std": 0.19720590114593506, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1105, "train_speed(iter/s)": 0.026094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 427.75, "completions/min_length": 364.0, "epoch": 0.13251857177090823, "grad_norm": 1.0959072267289143, "kl": 0.7592073976993561, "learning_rate": 9.627180418081275e-07, "loss": 0.0007599840755574405, "memory(GiB)": 165.8, "reward": 2.6723713874816895, "reward_std": 0.0944095104932785, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6723710298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.14280471205711365, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1106, "train_speed(iter/s)": 0.0261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 433.26043701171875, "completions/min_length": 382.0, "epoch": 0.13263838964773544, "grad_norm": 1.103721671911954, "kl": 0.6929910778999329, "learning_rate": 9.62645970035217e-07, "loss": 0.0006935273995622993, "memory(GiB)": 165.8, "reward": 2.3803820610046387, "reward_std": 0.2021940052509308, "rewards/GeoLocAccuracyV2ORM/mean": 0.7250000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.3819616734981537, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6553819179534912, "rewards/GeoVisalEntityMatch2ORM/std": 0.14439824223518372, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1107, "train_speed(iter/s)": 0.026109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 441.7083435058594, "completions/min_length": 399.0, "epoch": 0.13275820752456266, "grad_norm": 1.0324718384540925, "kl": 0.7332083880901337, "learning_rate": 9.625738313696966e-07, "loss": 0.0007357001304626465, "memory(GiB)": 165.8, "reward": 2.6513311862945557, "reward_std": 0.22330451011657715, "rewards/GeoLocAccuracyV2ORM/mean": 0.8791667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.32246309518814087, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7721643447875977, "rewards/GeoVisalEntityMatch2ORM/std": 0.21813863515853882, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1108, "train_speed(iter/s)": 0.026115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 418.90625, "completions/min_length": 358.0, "epoch": 0.13287802540138988, "grad_norm": 1.0851553662454696, "kl": 0.720378041267395, "learning_rate": 9.62501625821996e-07, "loss": 0.000721494376193732, "memory(GiB)": 165.8, "reward": 2.7416090965270996, "reward_std": 0.20864737033843994, "rewards/GeoLocAccuracyV2ORM/mean": 0.9375, "rewards/GeoLocAccuracyV2ORM/std": 0.2433321177959442, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8041087985038757, "rewards/GeoVisalEntityMatch2ORM/std": 0.12400359660387039, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1109, "train_speed(iter/s)": 0.026121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 427.10418701171875, "completions/min_length": 381.0, "epoch": 0.13299784327821712, "grad_norm": 1.112135452378082, "kl": 0.7400838732719421, "learning_rate": 9.624293534025557e-07, "loss": 0.0007418828899972141, "memory(GiB)": 165.8, "reward": 2.483044147491455, "reward_std": 0.1514022946357727, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4225029945373535, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7122106552124023, "rewards/GeoVisalEntityMatch2ORM/std": 0.17700053751468658, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1110, "train_speed(iter/s)": 0.026125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 434.96875, "completions/min_length": 369.0, "epoch": 0.13311766115504434, "grad_norm": 0.9967105130055521, "kl": 0.7159760892391205, "learning_rate": 9.623570141218254e-07, "loss": 0.0007164577837102115, "memory(GiB)": 165.8, "reward": 2.565476179122925, "reward_std": 0.09481590986251831, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5654762387275696, "rewards/GeoVisalEntityMatch2ORM/std": 0.20220108330249786, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1111, "train_speed(iter/s)": 0.026126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 513.0, "completions/mean_length": 454.9375, "completions/min_length": 410.0, "epoch": 0.13323747903187155, "grad_norm": 1.0284413228506473, "kl": 0.7220926582813263, "learning_rate": 9.62284607990264e-07, "loss": 0.0007220382685773075, "memory(GiB)": 165.8, "reward": 2.610813617706299, "reward_std": 0.2195366621017456, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6399801969528198, "rewards/GeoVisalEntityMatch2ORM/std": 0.122381791472435, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1112, "train_speed(iter/s)": 0.026133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 466.1875, "completions/min_length": 411.0, "epoch": 0.13335729690869877, "grad_norm": 1.0368806330349383, "kl": 0.8332785964012146, "learning_rate": 9.62212135018341e-07, "loss": 0.0008287479868158698, "memory(GiB)": 165.8, "reward": 2.576637029647827, "reward_std": 0.3498236835002899, "rewards/GeoLocAccuracyV2ORM/mean": 0.9479166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.22336146235466003, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6808035373687744, "rewards/GeoVisalEntityMatch2ORM/std": 0.14332956075668335, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336146235466003, "step": 1113, "train_speed(iter/s)": 0.026138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 510.0, "completions/mean_length": 450.0833435058594, "completions/min_length": 401.0, "epoch": 0.133477114785526, "grad_norm": 1.2159866177174488, "kl": 0.818728119134903, "learning_rate": 9.621395952165345e-07, "loss": 0.0008146291365846992, "memory(GiB)": 165.8, "reward": 2.5347719192504883, "reward_std": 0.1675436645746231, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250827372074127, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5722718238830566, "rewards/GeoVisalEntityMatch2ORM/std": 0.2141614705324173, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1114, "train_speed(iter/s)": 0.026143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 452.65625, "completions/min_length": 378.0, "epoch": 0.13359693266235323, "grad_norm": 1.079043102706275, "kl": 0.7474818527698517, "learning_rate": 9.620669885953333e-07, "loss": 0.0007483065128326416, "memory(GiB)": 165.8, "reward": 2.533705472946167, "reward_std": 0.31414270401000977, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.22834260761737823, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6128720641136169, "rewards/GeoVisalEntityMatch2ORM/std": 0.1446959227323532, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 1115, "train_speed(iter/s)": 0.026148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 440.2083435058594, "completions/min_length": 390.0, "epoch": 0.13371675053918045, "grad_norm": 1.1175688213387927, "kl": 0.728894054889679, "learning_rate": 9.61994315165235e-07, "loss": 0.0007288357010111213, "memory(GiB)": 165.8, "reward": 2.647524356842041, "reward_std": 0.08183534443378448, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.647524356842041, "rewards/GeoVisalEntityMatch2ORM/std": 0.14327780902385712, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1116, "train_speed(iter/s)": 0.026156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 508.0, "completions/mean_length": 444.40625, "completions/min_length": 388.0, "epoch": 0.13383656841600766, "grad_norm": 1.073345002319903, "kl": 1.118530124425888, "learning_rate": 9.619215749367475e-07, "loss": 0.001117505133152008, "memory(GiB)": 165.8, "reward": 2.240670680999756, "reward_std": 0.22444841265678406, "rewards/GeoLocAccuracyV2ORM/mean": 0.7750000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.4157428741455078, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6219205260276794, "rewards/GeoVisalEntityMatch2ORM/std": 0.12878896296024323, "rewards/MathFormat/mean": 0.84375, "rewards/MathFormat/std": 0.3649982213973999, "step": 1117, "train_speed(iter/s)": 0.026154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 420.8645935058594, "completions/min_length": 347.0, "epoch": 0.13395638629283488, "grad_norm": 0.9268483980483224, "kl": 0.7155219316482544, "learning_rate": 9.61848767920388e-07, "loss": 0.0007179677486419678, "memory(GiB)": 165.8, "reward": 2.6375527381896973, "reward_std": 0.0625050812959671, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6375526189804077, "rewards/GeoVisalEntityMatch2ORM/std": 0.22646327316761017, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1118, "train_speed(iter/s)": 0.02616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 408.5208435058594, "completions/min_length": 369.0, "epoch": 0.13407620416966212, "grad_norm": 1.054981297915027, "kl": 0.7565959692001343, "learning_rate": 9.617758941266834e-07, "loss": 0.0007570212474092841, "memory(GiB)": 165.8, "reward": 2.7260148525238037, "reward_std": 0.1056172251701355, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7260149717330933, "rewards/GeoVisalEntityMatch2ORM/std": 0.16048946976661682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1119, "train_speed(iter/s)": 0.026166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 458.0, "completions/mean_length": 404.3645935058594, "completions/min_length": 354.0, "epoch": 0.13419602204648934, "grad_norm": 1.0693435715394928, "kl": 0.898673951625824, "learning_rate": 9.617029535661704e-07, "loss": 0.00090007483959198, "memory(GiB)": 165.8, "reward": 2.1808533668518066, "reward_std": 0.37572476267814636, "rewards/GeoLocAccuracyV2ORM/mean": 0.7458333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.4220044016838074, "rewards/GeoVisalEntityMatch2ORM/mean": 0.622519850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.2314434051513672, "rewards/MathFormat/mean": 0.8125, "rewards/MathFormat/std": 0.39236128330230713, "step": 1120, "train_speed(iter/s)": 0.026167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 398.5, "completions/min_length": 352.0, "epoch": 0.13431583992331655, "grad_norm": 1.193549028815874, "kl": 0.7329542338848114, "learning_rate": 9.616299462493951e-07, "loss": 0.000733132183086127, "memory(GiB)": 165.8, "reward": 2.725860118865967, "reward_std": 0.16016869246959686, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7466931343078613, "rewards/GeoVisalEntityMatch2ORM/std": 0.15302905440330505, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1121, "train_speed(iter/s)": 0.026173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 396.2083435058594, "completions/min_length": 340.0, "epoch": 0.13443565780014377, "grad_norm": 1.1387453402642833, "kl": 0.7634320557117462, "learning_rate": 9.615568721869138e-07, "loss": 0.0007657533278688788, "memory(GiB)": 165.8, "reward": 2.4804821014404297, "reward_std": 0.10991982370615005, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48881542682647705, "rewards/GeoVisalEntityMatch2ORM/std": 0.10945060849189758, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1122, "train_speed(iter/s)": 0.026182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 383.1145935058594, "completions/min_length": 305.0, "epoch": 0.13455547567697101, "grad_norm": 1.1530162614255082, "kl": 0.7289322912693024, "learning_rate": 9.614837313892915e-07, "loss": 0.000729583203792572, "memory(GiB)": 165.8, "reward": 2.8079614639282227, "reward_std": 0.07703490555286407, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8079614043235779, "rewards/GeoVisalEntityMatch2ORM/std": 0.2347974181175232, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1123, "train_speed(iter/s)": 0.026187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 371.13543701171875, "completions/min_length": 317.0, "epoch": 0.13467529355379823, "grad_norm": 1.1440000712369067, "kl": 0.7518846392631531, "learning_rate": 9.614105238671038e-07, "loss": 0.000753358006477356, "memory(GiB)": 165.8, "reward": 2.5254628658294678, "reward_std": 0.16514462232589722, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5671296119689941, "rewards/GeoVisalEntityMatch2ORM/std": 0.2718345522880554, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1124, "train_speed(iter/s)": 0.026193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 357.3333435058594, "completions/min_length": 327.0, "epoch": 0.13479511143062545, "grad_norm": 1.2241591501688542, "kl": 0.7433689534664154, "learning_rate": 9.613372496309355e-07, "loss": 0.0007437765598297119, "memory(GiB)": 165.8, "reward": 2.636979341506958, "reward_std": 0.11410277336835861, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6369791626930237, "rewards/GeoVisalEntityMatch2ORM/std": 0.24194876849651337, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1125, "train_speed(iter/s)": 0.026199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 381.32293701171875, "completions/min_length": 332.0, "epoch": 0.13491492930745266, "grad_norm": 1.2089089728014668, "kl": 0.7823553085327148, "learning_rate": 9.612639086913813e-07, "loss": 0.0007839898462407291, "memory(GiB)": 165.8, "reward": 2.4332387447357178, "reward_std": 0.0816785991191864, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.433238685131073, "rewards/GeoVisalEntityMatch2ORM/std": 0.20410962402820587, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1126, "train_speed(iter/s)": 0.026204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.0, "completions/mean_length": 350.6145935058594, "completions/min_length": 322.0, "epoch": 0.1350347471842799, "grad_norm": 1.0970887022116027, "kl": 0.7529805600643158, "learning_rate": 9.611905010590447e-07, "loss": 0.0007544110412709415, "memory(GiB)": 165.8, "reward": 2.636979103088379, "reward_std": 0.10114604234695435, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6369792222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.2649970054626465, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1127, "train_speed(iter/s)": 0.026209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 398.0, "completions/mean_length": 348.16668701171875, "completions/min_length": 284.0, "epoch": 0.13515456506110712, "grad_norm": 1.2801367583238372, "kl": 0.7830772399902344, "learning_rate": 9.611170267445402e-07, "loss": 0.0007847597589716315, "memory(GiB)": 165.8, "reward": 2.5782408714294434, "reward_std": 0.1126934215426445, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5782407522201538, "rewards/GeoVisalEntityMatch2ORM/std": 0.18601471185684204, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1128, "train_speed(iter/s)": 0.026215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 356.07293701171875, "completions/min_length": 313.0, "epoch": 0.13527438293793434, "grad_norm": 1.0910909498499828, "kl": 0.7432673871517181, "learning_rate": 9.610434857584908e-07, "loss": 0.0007462098146788776, "memory(GiB)": 165.8, "reward": 2.7279765605926514, "reward_std": 0.09024926275014877, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7279762029647827, "rewards/GeoVisalEntityMatch2ORM/std": 0.18746015429496765, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1129, "train_speed(iter/s)": 0.026217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/mean_length": 351.28125, "completions/min_length": 319.0, "epoch": 0.13539420081476156, "grad_norm": 1.1039490018625275, "kl": 0.7710272073745728, "learning_rate": 9.609698781115297e-07, "loss": 0.0007724463939666748, "memory(GiB)": 165.8, "reward": 2.6532039642333984, "reward_std": 0.10649270564317703, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6615372896194458, "rewards/GeoVisalEntityMatch2ORM/std": 0.1369786560535431, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1130, "train_speed(iter/s)": 0.026224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 354.38543701171875, "completions/min_length": 305.0, "epoch": 0.13551401869158877, "grad_norm": 1.1639147384494863, "kl": 0.7007962465286255, "learning_rate": 9.608962038142996e-07, "loss": 0.0007015379960648715, "memory(GiB)": 165.8, "reward": 2.6054110527038574, "reward_std": 0.11135172843933105, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6054109334945679, "rewards/GeoVisalEntityMatch2ORM/std": 0.1193559318780899, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1131, "train_speed(iter/s)": 0.026228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.0, "completions/mean_length": 322.21875, "completions/min_length": 268.0, "epoch": 0.13563383656841602, "grad_norm": 1.3201868814446065, "kl": 0.7596472799777985, "learning_rate": 9.60822462877453e-07, "loss": 0.0007607936859130859, "memory(GiB)": 165.8, "reward": 2.629166841506958, "reward_std": 0.11998555809259415, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6291667222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.19572925567626953, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1132, "train_speed(iter/s)": 0.026235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.0, "completions/mean_length": 342.91668701171875, "completions/min_length": 297.0, "epoch": 0.13575365444524323, "grad_norm": 1.1577215520010167, "kl": 0.7711005806922913, "learning_rate": 9.607486553116515e-07, "loss": 0.0007714828243479133, "memory(GiB)": 165.8, "reward": 2.4699816703796387, "reward_std": 0.19478195905685425, "rewards/GeoLocAccuracyV2ORM/mean": 0.9562500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.18851915001869202, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5137314796447754, "rewards/GeoVisalEntityMatch2ORM/std": 0.1312430053949356, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1133, "train_speed(iter/s)": 0.026239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.0, "completions/mean_length": 334.35418701171875, "completions/min_length": 289.0, "epoch": 0.13587347232207045, "grad_norm": 1.264297128724348, "kl": 0.7368108034133911, "learning_rate": 9.606747811275673e-07, "loss": 0.0007367829675786197, "memory(GiB)": 165.8, "reward": 2.4133105278015137, "reward_std": 0.22196313738822937, "rewards/GeoLocAccuracyV2ORM/mean": 0.9000000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.2959374189376831, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5133101940155029, "rewards/GeoVisalEntityMatch2ORM/std": 0.3326498866081238, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1134, "train_speed(iter/s)": 0.026248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 320.3958435058594, "completions/min_length": 262.0, "epoch": 0.13599329019889767, "grad_norm": 1.266736765750109, "kl": 0.775256335735321, "learning_rate": 9.606008403358812e-07, "loss": 0.0007758910651318729, "memory(GiB)": 165.8, "reward": 2.6713790893554688, "reward_std": 0.09432955086231232, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.671379029750824, "rewards/GeoVisalEntityMatch2ORM/std": 0.22578765451908112, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1135, "train_speed(iter/s)": 0.026255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 319.4583435058594, "completions/min_length": 268.0, "epoch": 0.1361131080757249, "grad_norm": 1.2142499975526684, "kl": 0.7723496556282043, "learning_rate": 9.605268329472843e-07, "loss": 0.0007731443038210273, "memory(GiB)": 165.8, "reward": 2.5299479961395264, "reward_std": 0.13363786041736603, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5299479365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.17695266008377075, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1136, "train_speed(iter/s)": 0.026261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/mean_length": 324.3645935058594, "completions/min_length": 263.0, "epoch": 0.13623292595255213, "grad_norm": 1.197767026922093, "kl": 0.7148101329803467, "learning_rate": 9.604527589724767e-07, "loss": 0.0007160976529121399, "memory(GiB)": 165.8, "reward": 2.7557291984558105, "reward_std": 0.1544833928346634, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7890625, "rewards/GeoVisalEntityMatch2ORM/std": 0.14392440021038055, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1137, "train_speed(iter/s)": 0.026264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/mean_length": 340.22918701171875, "completions/min_length": 292.0, "epoch": 0.13635274382937934, "grad_norm": 1.128907060663227, "kl": 0.7661861479282379, "learning_rate": 9.603786184221692e-07, "loss": 0.0007664760341867805, "memory(GiB)": 165.8, "reward": 2.387632369995117, "reward_std": 0.15518513321876526, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5626323223114014, "rewards/GeoVisalEntityMatch2ORM/std": 0.19796794652938843, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1138, "train_speed(iter/s)": 0.02627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.0, "completions/mean_length": 322.75, "completions/min_length": 277.0, "epoch": 0.13647256170620656, "grad_norm": 1.1495202802855637, "kl": 0.780261904001236, "learning_rate": 9.603044113070812e-07, "loss": 0.0007812343537807465, "memory(GiB)": 165.8, "reward": 2.675260543823242, "reward_std": 0.12200000882148743, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6752604246139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.18639567494392395, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1139, "train_speed(iter/s)": 0.026276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 318.38543701171875, "completions/min_length": 276.0, "epoch": 0.1365923795830338, "grad_norm": 1.2337041796120487, "kl": 0.803246021270752, "learning_rate": 9.60230137637942e-07, "loss": 0.0008038952946662903, "memory(GiB)": 165.8, "reward": 2.747048854827881, "reward_std": 0.08015458285808563, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7470486164093018, "rewards/GeoVisalEntityMatch2ORM/std": 0.27810966968536377, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1140, "train_speed(iter/s)": 0.026275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 323.96875, "completions/min_length": 280.0, "epoch": 0.13671219745986102, "grad_norm": 1.3023063841562346, "kl": 0.792303055524826, "learning_rate": 9.601557974254905e-07, "loss": 0.0007920513744466007, "memory(GiB)": 165.8, "reward": 2.505687713623047, "reward_std": 0.08903113752603531, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5056878328323364, "rewards/GeoVisalEntityMatch2ORM/std": 0.13067017495632172, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1141, "train_speed(iter/s)": 0.026277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/mean_length": 322.1145935058594, "completions/min_length": 271.0, "epoch": 0.13683201533668823, "grad_norm": 1.1887814382511217, "kl": 0.796624481678009, "learning_rate": 9.600813906804757e-07, "loss": 0.0007967526908032596, "memory(GiB)": 165.8, "reward": 2.5306217670440674, "reward_std": 0.1651965081691742, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5889550447463989, "rewards/GeoVisalEntityMatch2ORM/std": 0.2511986494064331, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1142, "train_speed(iter/s)": 0.026286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 328.34375, "completions/min_length": 283.0, "epoch": 0.13695183321351545, "grad_norm": 1.253812781357718, "kl": 0.7656348645687103, "learning_rate": 9.600069174136557e-07, "loss": 0.0007669901242479682, "memory(GiB)": 165.8, "reward": 2.5169272422790527, "reward_std": 0.09190841019153595, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5169271230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.14689704775810242, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1143, "train_speed(iter/s)": 0.026287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/mean_length": 342.04168701171875, "completions/min_length": 296.0, "epoch": 0.13707165109034267, "grad_norm": 1.1159836953633455, "kl": 0.7524176239967346, "learning_rate": 9.599323776357983e-07, "loss": 0.0007529184222221375, "memory(GiB)": 165.8, "reward": 2.7005622386932373, "reward_std": 0.10114053636789322, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7005621790885925, "rewards/GeoVisalEntityMatch2ORM/std": 0.1450151652097702, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1144, "train_speed(iter/s)": 0.026294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 384.0, "completions/mean_length": 327.5833435058594, "completions/min_length": 274.0, "epoch": 0.1371914689671699, "grad_norm": 1.1837745496751308, "kl": 0.8024639785289764, "learning_rate": 9.598577713576812e-07, "loss": 0.0008041064138524234, "memory(GiB)": 165.8, "reward": 2.657060384750366, "reward_std": 0.06323553621768951, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6570601463317871, "rewards/GeoVisalEntityMatch2ORM/std": 0.22154058516025543, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1145, "train_speed(iter/s)": 0.0263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 376.0, "completions/mean_length": 327.1458435058594, "completions/min_length": 289.0, "epoch": 0.13731128684399713, "grad_norm": 1.2687164530196706, "kl": 0.8004708886146545, "learning_rate": 9.597830985900912e-07, "loss": 0.0008010789752006531, "memory(GiB)": 165.8, "reward": 2.2258310317993164, "reward_std": 0.2117292732000351, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.37910327315330505, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4924975335597992, "rewards/GeoVisalEntityMatch2ORM/std": 0.1318233460187912, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1146, "train_speed(iter/s)": 0.026304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.0, "completions/mean_length": 311.96875, "completions/min_length": 270.0, "epoch": 0.13743110472082434, "grad_norm": 1.1529269771830772, "kl": 0.786090224981308, "learning_rate": 9.597083593438253e-07, "loss": 0.0007881075143814087, "memory(GiB)": 165.8, "reward": 2.6320602893829346, "reward_std": 0.09985395520925522, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6320602297782898, "rewards/GeoVisalEntityMatch2ORM/std": 0.17261411249637604, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1147, "train_speed(iter/s)": 0.026298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 332.25, "completions/min_length": 280.0, "epoch": 0.13755092259765156, "grad_norm": 1.0065402309125735, "kl": 0.7677012085914612, "learning_rate": 9.596335536296895e-07, "loss": 0.0007680679555051029, "memory(GiB)": 165.8, "reward": 2.6076388359069824, "reward_std": 0.1930839717388153, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136451959609985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.680555522441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.2320820689201355, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1148, "train_speed(iter/s)": 0.026304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/mean_length": 346.6770935058594, "completions/min_length": 300.0, "epoch": 0.1376707404744788, "grad_norm": 1.2022673652586204, "kl": 0.820725828409195, "learning_rate": 9.595586814585002e-07, "loss": 0.0008219729061238468, "memory(GiB)": 165.8, "reward": 2.6949405670166016, "reward_std": 0.14424961805343628, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7053570747375488, "rewards/GeoVisalEntityMatch2ORM/std": 0.11913218349218369, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1149, "train_speed(iter/s)": 0.026311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 355.25, "completions/min_length": 293.0, "epoch": 0.13779055835130602, "grad_norm": 1.2453057416903583, "kl": 0.7397390902042389, "learning_rate": 9.594837428410825e-07, "loss": 0.0007410521502606571, "memory(GiB)": 165.8, "reward": 2.446094036102295, "reward_std": 0.16415587067604065, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6127604246139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.20036864280700684, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1150, "train_speed(iter/s)": 0.026317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 426.0, "completions/mean_length": 357.57293701171875, "completions/min_length": 304.0, "epoch": 0.13791037622813324, "grad_norm": 1.3503407787508603, "kl": 0.8861611187458038, "learning_rate": 9.594087377882716e-07, "loss": 0.0008835109765641391, "memory(GiB)": 165.8, "reward": 2.387732982635498, "reward_std": 0.27983811497688293, "rewards/GeoLocAccuracyV2ORM/mean": 0.9041666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.2690789997577667, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5043996572494507, "rewards/GeoVisalEntityMatch2ORM/std": 0.18319445848464966, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1151, "train_speed(iter/s)": 0.026313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 377.84375, "completions/min_length": 334.0, "epoch": 0.13803019410496045, "grad_norm": 1.1419827222181935, "kl": 0.7460018694400787, "learning_rate": 9.593336663109126e-07, "loss": 0.000746076344512403, "memory(GiB)": 165.8, "reward": 2.6266534328460693, "reward_std": 0.17772632837295532, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6933201551437378, "rewards/GeoVisalEntityMatch2ORM/std": 0.17480872571468353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1152, "train_speed(iter/s)": 0.026319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/mean_length": 362.1145935058594, "completions/min_length": 316.0, "epoch": 0.1381500119817877, "grad_norm": 1.2113461101947058, "kl": 0.7615353167057037, "learning_rate": 9.592585284198596e-07, "loss": 0.0007623297860845923, "memory(GiB)": 165.8, "reward": 2.4478671550750732, "reward_std": 0.1956833451986313, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5895337462425232, "rewards/GeoVisalEntityMatch2ORM/std": 0.2478339821100235, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1153, "train_speed(iter/s)": 0.026325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 365.6145935058594, "completions/min_length": 300.0, "epoch": 0.1382698298586149, "grad_norm": 1.1886940716526613, "kl": 0.7892188727855682, "learning_rate": 9.591833241259764e-07, "loss": 0.0007910430431365967, "memory(GiB)": 165.8, "reward": 2.5999505519866943, "reward_std": 0.11674240976572037, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5999504327774048, "rewards/GeoVisalEntityMatch2ORM/std": 0.14420293271541595, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1154, "train_speed(iter/s)": 0.026328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.0, "completions/mean_length": 349.72918701171875, "completions/min_length": 302.0, "epoch": 0.13838964773544213, "grad_norm": 1.1064434891395691, "kl": 0.7604959011077881, "learning_rate": 9.591080534401371e-07, "loss": 0.0007615312933921814, "memory(GiB)": 165.8, "reward": 2.7890625, "reward_std": 0.13485094904899597, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7890625, "rewards/GeoVisalEntityMatch2ORM/std": 0.24181124567985535, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1155, "train_speed(iter/s)": 0.026335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 372.0, "completions/min_length": 296.0, "epoch": 0.13850946561226934, "grad_norm": 0.8928962900208844, "kl": 0.7566817104816437, "learning_rate": 9.590327163732242e-07, "loss": 0.0007580220699310303, "memory(GiB)": 165.8, "reward": 2.7288589477539062, "reward_std": 0.13113254308700562, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7621922492980957, "rewards/GeoVisalEntityMatch2ORM/std": 0.14791598916053772, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1156, "train_speed(iter/s)": 0.026342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 369.66668701171875, "completions/min_length": 333.0, "epoch": 0.13862928348909656, "grad_norm": 1.1463234120551458, "kl": 0.7483115792274475, "learning_rate": 9.58957312936131e-07, "loss": 0.0007489746203646064, "memory(GiB)": 165.8, "reward": 2.5334200859069824, "reward_std": 0.11127965152263641, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5438368320465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.1435522437095642, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1157, "train_speed(iter/s)": 0.026348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 383.3125, "completions/min_length": 325.0, "epoch": 0.1387491013659238, "grad_norm": 1.009701729568597, "kl": 0.7128320336341858, "learning_rate": 9.588818431397595e-07, "loss": 0.0007138252258300781, "memory(GiB)": 165.8, "reward": 2.609722137451172, "reward_std": 0.10368587076663971, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6180555820465088, "rewards/GeoVisalEntityMatch2ORM/std": 0.16744597256183624, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1158, "train_speed(iter/s)": 0.026355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 411.3645935058594, "completions/min_length": 376.0, "epoch": 0.13886891924275102, "grad_norm": 1.0676205938621333, "kl": 0.7640237808227539, "learning_rate": 9.588063069950221e-07, "loss": 0.0007649610633961856, "memory(GiB)": 165.8, "reward": 2.78889536857605, "reward_std": 0.07053171843290329, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7888956069946289, "rewards/GeoVisalEntityMatch2ORM/std": 0.1114669144153595, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1159, "train_speed(iter/s)": 0.026361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 412.21875, "completions/min_length": 349.0, "epoch": 0.13898873711957824, "grad_norm": 1.1149508826816183, "kl": 0.7782899141311646, "learning_rate": 9.5873070451284e-07, "loss": 0.0007796312565915287, "memory(GiB)": 165.8, "reward": 2.6674933433532715, "reward_std": 0.1586129516363144, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.17868918180465698, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7091600298881531, "rewards/GeoVisalEntityMatch2ORM/std": 0.18860438466072083, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1160, "train_speed(iter/s)": 0.026367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 408.29168701171875, "completions/min_length": 330.0, "epoch": 0.13910855499640545, "grad_norm": 1.091779563804894, "kl": 0.7330003976821899, "learning_rate": 9.586550357041443e-07, "loss": 0.0007340411539189517, "memory(GiB)": 165.8, "reward": 2.6223092079162598, "reward_std": 0.1241096630692482, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6306424140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.142603799700737, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1161, "train_speed(iter/s)": 0.026373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 412.875, "completions/min_length": 334.0, "epoch": 0.1392283728732327, "grad_norm": 1.1386775737991794, "kl": 0.7617494165897369, "learning_rate": 9.585793005798757e-07, "loss": 0.0007637888193130493, "memory(GiB)": 165.8, "reward": 2.240438938140869, "reward_std": 0.12069755792617798, "rewards/GeoLocAccuracyV2ORM/mean": 0.7250000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.443194180727005, "rewards/GeoVisalEntityMatch2ORM/mean": 0.515438973903656, "rewards/GeoVisalEntityMatch2ORM/std": 0.14842575788497925, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1162, "train_speed(iter/s)": 0.02638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/mean_length": 378.0, "completions/min_length": 335.0, "epoch": 0.1393481907500599, "grad_norm": 0.9630997260096107, "kl": 0.7301982939243317, "learning_rate": 9.585034991509848e-07, "loss": 0.000730180530808866, "memory(GiB)": 165.8, "reward": 2.7366371154785156, "reward_std": 0.06149432808160782, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7366372346878052, "rewards/GeoVisalEntityMatch2ORM/std": 0.2172205150127411, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1163, "train_speed(iter/s)": 0.026386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 418.1145935058594, "completions/min_length": 351.0, "epoch": 0.13946800862688713, "grad_norm": 1.0480798646395033, "kl": 0.7574247121810913, "learning_rate": 9.584276314284314e-07, "loss": 0.0007587174768559635, "memory(GiB)": 165.8, "reward": 2.5875372886657715, "reward_std": 0.17178568243980408, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662496328354, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6229538917541504, "rewards/GeoVisalEntityMatch2ORM/std": 0.22450131177902222, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1164, "train_speed(iter/s)": 0.026392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 408.5833435058594, "completions/min_length": 363.0, "epoch": 0.13958782650371435, "grad_norm": 1.1310212201460643, "kl": 0.7550548613071442, "learning_rate": 9.583516974231849e-07, "loss": 0.0007567852735519409, "memory(GiB)": 165.8, "reward": 2.669934034347534, "reward_std": 0.08954377472400665, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6782670021057129, "rewards/GeoVisalEntityMatch2ORM/std": 0.24312016367912292, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1165, "train_speed(iter/s)": 0.026398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 392.8125, "completions/min_length": 340.0, "epoch": 0.1397076443805416, "grad_norm": 1.1977019431326408, "kl": 0.73280468583107, "learning_rate": 9.582756971462244e-07, "loss": 0.0007333457469940186, "memory(GiB)": 165.8, "reward": 2.8140628337860107, "reward_std": 0.1846718192100525, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087526738643646, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8557291030883789, "rewards/GeoVisalEntityMatch2ORM/std": 0.13895659148693085, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1166, "train_speed(iter/s)": 0.026406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 417.8645935058594, "completions/min_length": 349.0, "epoch": 0.1398274622573688, "grad_norm": 0.9499284640531453, "kl": 0.7612461745738983, "learning_rate": 9.581996306085385e-07, "loss": 0.0007601219112984836, "memory(GiB)": 165.8, "reward": 2.7885913848876953, "reward_std": 0.06657497584819794, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7885912656784058, "rewards/GeoVisalEntityMatch2ORM/std": 0.18589699268341064, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1167, "train_speed(iter/s)": 0.026412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 424.53125, "completions/min_length": 389.0, "epoch": 0.13994728013419602, "grad_norm": 0.9129132977302227, "kl": 0.7594745755195618, "learning_rate": 9.581234978211256e-07, "loss": 0.0007597235962748528, "memory(GiB)": 165.8, "reward": 2.3265626430511475, "reward_std": 0.08620665967464447, "rewards/GeoLocAccuracyV2ORM/mean": 0.6000000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.40209975838661194, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7265625, "rewards/GeoVisalEntityMatch2ORM/std": 0.2067229449748993, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1168, "train_speed(iter/s)": 0.026418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 430.4270935058594, "completions/min_length": 379.0, "epoch": 0.14006709801102324, "grad_norm": 1.1436773768673278, "kl": 0.7546738982200623, "learning_rate": 9.580472987949934e-07, "loss": 0.0007558564539067447, "memory(GiB)": 165.8, "reward": 2.301562547683716, "reward_std": 0.16137757897377014, "rewards/GeoLocAccuracyV2ORM/mean": 0.7958332896232605, "rewards/GeoLocAccuracyV2ORM/std": 0.3903889060020447, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5057291984558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.1985374540090561, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1169, "train_speed(iter/s)": 0.026424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 441.3125, "completions/min_length": 399.0, "epoch": 0.14018691588785046, "grad_norm": 1.0737935098903406, "kl": 0.7564011514186859, "learning_rate": 9.579710335411592e-07, "loss": 0.0007583586266264319, "memory(GiB)": 165.8, "reward": 2.3292269706726074, "reward_std": 0.06265045702457428, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5792268514633179, "rewards/GeoVisalEntityMatch2ORM/std": 0.12352319806814194, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1170, "train_speed(iter/s)": 0.02643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 506.0, "completions/mean_length": 430.03125, "completions/min_length": 338.0, "epoch": 0.1403067337646777, "grad_norm": 1.072589528084599, "kl": 1.7458757162094116, "learning_rate": 9.578947020706502e-07, "loss": 0.0017292823176831007, "memory(GiB)": 165.8, "reward": 2.411673069000244, "reward_std": 0.2985311448574066, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.34396424889564514, "rewards/GeoVisalEntityMatch2ORM/mean": 0.682506263256073, "rewards/GeoVisalEntityMatch2ORM/std": 0.1252516210079193, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1171, "train_speed(iter/s)": 0.026436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3854166666666667, "completions/max_length": 481.0, "completions/mean_length": 410.84375, "completions/min_length": 368.0, "epoch": 0.14042655164150492, "grad_norm": 3.1124256245417765, "kl": 5.505877733230591, "learning_rate": 9.57818304394503e-07, "loss": 0.005501513835042715, "memory(GiB)": 165.8, "reward": 2.0040178298950195, "reward_std": 0.32039645314216614, "rewards/GeoLocAccuracyV2ORM/mean": 0.6354166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4838397204875946, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7331845164299011, "rewards/GeoVisalEntityMatch2ORM/std": 0.24130983650684357, "rewards/MathFormat/mean": 0.6354166865348816, "rewards/MathFormat/std": 0.4838397204875946, "step": 1172, "train_speed(iter/s)": 0.026435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 486.0, "completions/mean_length": 446.6145935058594, "completions/min_length": 398.0, "epoch": 0.14054636951833213, "grad_norm": 1.1229156209022095, "kl": 1.597461760044098, "learning_rate": 9.577418405237631e-07, "loss": 0.001579205272719264, "memory(GiB)": 165.8, "reward": 2.300231695175171, "reward_std": 0.351686954498291, "rewards/GeoLocAccuracyV2ORM/mean": 0.8791667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.32246312499046326, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5252314805984497, "rewards/GeoVisalEntityMatch2ORM/std": 0.28753381967544556, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 1173, "train_speed(iter/s)": 0.026439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 463.0, "completions/mean_length": 407.97918701171875, "completions/min_length": 338.0, "epoch": 0.14066618739515935, "grad_norm": 1.1081649988257634, "kl": 0.7721230983734131, "learning_rate": 9.57665310469487e-07, "loss": 0.0007703403825871646, "memory(GiB)": 165.8, "reward": 2.5313491821289062, "reward_std": 0.17305564880371094, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5605158805847168, "rewards/GeoVisalEntityMatch2ORM/std": 0.29473876953125, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1174, "train_speed(iter/s)": 0.02644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 487.0, "completions/mean_length": 410.44793701171875, "completions/min_length": 368.0, "epoch": 0.1407860052719866, "grad_norm": 1.1223804521826135, "kl": 0.8183779716491699, "learning_rate": 9.575887142427394e-07, "loss": 0.0008182885940186679, "memory(GiB)": 165.8, "reward": 2.213578939437866, "reward_std": 0.11707049608230591, "rewards/GeoLocAccuracyV2ORM/mean": 0.7333333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.44073307514190674, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7302455902099609, "rewards/GeoVisalEntityMatch2ORM/std": 0.10860524326562881, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1175, "train_speed(iter/s)": 0.02644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 422.94793701171875, "completions/min_length": 363.0, "epoch": 0.1409058231488138, "grad_norm": 1.147857087841442, "kl": 0.7299293875694275, "learning_rate": 9.575120518545954e-07, "loss": 0.0007309647044166923, "memory(GiB)": 165.8, "reward": 2.5620534420013428, "reward_std": 0.1762731522321701, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7037202715873718, "rewards/GeoVisalEntityMatch2ORM/std": 0.15014754235744476, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1176, "train_speed(iter/s)": 0.026446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 428.71875, "completions/min_length": 389.0, "epoch": 0.14102564102564102, "grad_norm": 0.9890631961192629, "kl": 0.769422173500061, "learning_rate": 9.57435323316139e-07, "loss": 0.0007710258360020816, "memory(GiB)": 165.8, "reward": 2.5445313453674316, "reward_std": 0.0752091035246849, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7945312857627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.10407506674528122, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1177, "train_speed(iter/s)": 0.026452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 457.0, "completions/mean_length": 405.09375, "completions/min_length": 359.0, "epoch": 0.14114545890246824, "grad_norm": 1.1393526043788975, "kl": 0.7998897731304169, "learning_rate": 9.573585286384647e-07, "loss": 0.00079978012945503, "memory(GiB)": 165.8, "reward": 2.423163890838623, "reward_std": 0.2467816025018692, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.506497323513031, "rewards/GeoVisalEntityMatch2ORM/std": 0.20805390179157257, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 1178, "train_speed(iter/s)": 0.026453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 417.47918701171875, "completions/min_length": 367.0, "epoch": 0.14126527677929548, "grad_norm": 1.0784066217804544, "kl": 0.77300825715065, "learning_rate": 9.572816678326758e-07, "loss": 0.0007743227179162204, "memory(GiB)": 165.8, "reward": 2.3828048706054688, "reward_std": 0.11584768444299698, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6223883628845215, "rewards/GeoVisalEntityMatch2ORM/std": 0.20954208076000214, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1179, "train_speed(iter/s)": 0.026459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 403.25, "completions/min_length": 354.0, "epoch": 0.1413850946561227, "grad_norm": 1.1718730700633253, "kl": 0.7675166428089142, "learning_rate": 9.57204740909885e-07, "loss": 0.0007685869932174683, "memory(GiB)": 165.8, "reward": 2.3327178955078125, "reward_std": 0.12517398595809937, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5243844985961914, "rewards/GeoVisalEntityMatch2ORM/std": 0.11646642535924911, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1180, "train_speed(iter/s)": 0.026467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 386.66668701171875, "completions/min_length": 330.0, "epoch": 0.14150491253294992, "grad_norm": 1.2297944255188662, "kl": 0.8039784133434296, "learning_rate": 9.571277478812155e-07, "loss": 0.0008059119572862983, "memory(GiB)": 165.8, "reward": 2.6455817222595215, "reward_std": 0.075923852622509, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6455814838409424, "rewards/GeoVisalEntityMatch2ORM/std": 0.22904929518699646, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1181, "train_speed(iter/s)": 0.026468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 384.2708435058594, "completions/min_length": 320.0, "epoch": 0.14162473040977713, "grad_norm": 1.2001891346308575, "kl": 0.7807312309741974, "learning_rate": 9.570506887577993e-07, "loss": 0.0007816199213266373, "memory(GiB)": 165.8, "reward": 2.634003162384033, "reward_std": 0.16373004019260406, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6840029954910278, "rewards/GeoVisalEntityMatch2ORM/std": 0.11316412687301636, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1182, "train_speed(iter/s)": 0.026477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 358.8020935058594, "completions/min_length": 315.0, "epoch": 0.14174454828660435, "grad_norm": 1.5343137423739155, "kl": 0.7807320058345795, "learning_rate": 9.56973563550778e-07, "loss": 0.0007808506488800049, "memory(GiB)": 165.8, "reward": 2.4440104961395264, "reward_std": 0.08777475357055664, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6940104365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.13147667050361633, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1183, "train_speed(iter/s)": 0.026478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 356.07293701171875, "completions/min_length": 317.0, "epoch": 0.1418643661634316, "grad_norm": 1.284680516491522, "kl": 0.823371171951294, "learning_rate": 9.56896372271303e-07, "loss": 0.000825740396976471, "memory(GiB)": 165.8, "reward": 2.564384937286377, "reward_std": 0.06621275842189789, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.764384925365448, "rewards/GeoVisalEntityMatch2ORM/std": 0.13975709676742554, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1184, "train_speed(iter/s)": 0.026484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.0, "completions/mean_length": 370.79168701171875, "completions/min_length": 326.0, "epoch": 0.1419841840402588, "grad_norm": 1.200226254068384, "kl": 0.7937484085559845, "learning_rate": 9.568191149305351e-07, "loss": 0.000796005129814148, "memory(GiB)": 165.8, "reward": 2.4455440044403076, "reward_std": 0.08004535734653473, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6455440521240234, "rewards/GeoVisalEntityMatch2ORM/std": 0.1013837605714798, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1185, "train_speed(iter/s)": 0.02649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/mean_length": 358.8645935058594, "completions/min_length": 306.0, "epoch": 0.14210400191708603, "grad_norm": 1.0808387417737433, "kl": 0.7714793384075165, "learning_rate": 9.567417915396449e-07, "loss": 0.0007728090276941657, "memory(GiB)": 165.8, "reward": 2.405642509460449, "reward_std": 0.17345167696475983, "rewards/GeoLocAccuracyV2ORM/mean": 0.793749988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4046603739261627, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6118924021720886, "rewards/GeoVisalEntityMatch2ORM/std": 0.23450443148612976, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1186, "train_speed(iter/s)": 0.026497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 344.3645935058594, "completions/min_length": 286.0, "epoch": 0.14222381979391324, "grad_norm": 1.1770306884858932, "kl": 0.7642548084259033, "learning_rate": 9.566644021098122e-07, "loss": 0.0007663866272196174, "memory(GiB)": 165.8, "reward": 2.699578285217285, "reward_std": 0.20607586205005646, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7204116582870483, "rewards/GeoVisalEntityMatch2ORM/std": 0.1731071174144745, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1187, "train_speed(iter/s)": 0.026503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/mean_length": 348.4895935058594, "completions/min_length": 285.0, "epoch": 0.14234363767074049, "grad_norm": 1.1337079880790064, "kl": 0.8177600800991058, "learning_rate": 9.565869466522265e-07, "loss": 0.0008182128658518195, "memory(GiB)": 165.8, "reward": 2.776909589767456, "reward_std": 0.0587446428835392, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7769097089767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.12285872548818588, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1188, "train_speed(iter/s)": 0.02651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 348.28125, "completions/min_length": 305.0, "epoch": 0.1424634555475677, "grad_norm": 1.1432427989980818, "kl": 0.7931170165538788, "learning_rate": 9.56509425178087e-07, "loss": 0.0007940108771435916, "memory(GiB)": 165.8, "reward": 2.603980779647827, "reward_std": 0.08598847687244415, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6039807200431824, "rewards/GeoVisalEntityMatch2ORM/std": 0.16193139553070068, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1189, "train_speed(iter/s)": 0.026517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 376.0, "completions/mean_length": 327.15625, "completions/min_length": 283.0, "epoch": 0.14258327342439492, "grad_norm": 1.2451411318805574, "kl": 0.808705061674118, "learning_rate": 9.564318376986021e-07, "loss": 0.0008098284597508609, "memory(GiB)": 165.8, "reward": 2.3606772422790527, "reward_std": 0.175672709941864, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.29199856519699097, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4856770932674408, "rewards/GeoVisalEntityMatch2ORM/std": 0.20446300506591797, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1190, "train_speed(iter/s)": 0.026526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 320.2395935058594, "completions/min_length": 254.0, "epoch": 0.14270309130122213, "grad_norm": 1.1955672471495389, "kl": 0.8034473657608032, "learning_rate": 9.5635418422499e-07, "loss": 0.0008046751609072089, "memory(GiB)": 165.8, "reward": 2.340625047683716, "reward_std": 0.12430927157402039, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7906250357627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.19632309675216675, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1191, "train_speed(iter/s)": 0.026533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 387.0, "completions/mean_length": 346.84375, "completions/min_length": 295.0, "epoch": 0.14282290917804938, "grad_norm": 1.267763362489449, "kl": 0.8454152941703796, "learning_rate": 9.562764647684787e-07, "loss": 0.0008464108104817569, "memory(GiB)": 165.8, "reward": 2.14365816116333, "reward_std": 0.21706345677375793, "rewards/GeoLocAccuracyV2ORM/mean": 0.7895833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.37988337874412537, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4478245973587036, "rewards/GeoVisalEntityMatch2ORM/std": 0.17051252722740173, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930106818675995, "step": 1192, "train_speed(iter/s)": 0.026532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/mean_length": 330.10418701171875, "completions/min_length": 283.0, "epoch": 0.1429427270548766, "grad_norm": 1.1695143986686973, "kl": 0.7999550700187683, "learning_rate": 9.56198679340305e-07, "loss": 0.0008004754781723022, "memory(GiB)": 165.8, "reward": 2.5142858028411865, "reward_std": 0.09817127883434296, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5142857432365417, "rewards/GeoVisalEntityMatch2ORM/std": 0.11124183982610703, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1193, "train_speed(iter/s)": 0.026535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 378.0, "completions/mean_length": 332.19793701171875, "completions/min_length": 291.0, "epoch": 0.1430625449317038, "grad_norm": 1.1493773697481195, "kl": 0.8505961894989014, "learning_rate": 9.561208279517157e-07, "loss": 0.0008517404785379767, "memory(GiB)": 165.8, "reward": 2.4875857830047607, "reward_std": 0.2625800371170044, "rewards/GeoLocAccuracyV2ORM/mean": 0.9041666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.2842966318130493, "rewards/GeoVisalEntityMatch2ORM/mean": 0.593835711479187, "rewards/GeoVisalEntityMatch2ORM/std": 0.07388029247522354, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1194, "train_speed(iter/s)": 0.026534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.0, "completions/mean_length": 319.0520935058594, "completions/min_length": 269.0, "epoch": 0.14318236280853103, "grad_norm": 0.8486584363707638, "kl": 0.8540058732032776, "learning_rate": 9.560429106139674e-07, "loss": 0.0014515494694933295, "memory(GiB)": 165.8, "reward": 2.4888229370117188, "reward_std": 0.05502335727214813, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6888227462768555, "rewards/GeoVisalEntityMatch2ORM/std": 0.2719776928424835, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1195, "train_speed(iter/s)": 0.026535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 323.125, "completions/min_length": 279.0, "epoch": 0.14330218068535824, "grad_norm": 1.0734495435603881, "kl": 0.8182897865772247, "learning_rate": 9.559649273383257e-07, "loss": 0.0008185803890228271, "memory(GiB)": 165.8, "reward": 2.538029193878174, "reward_std": 0.07010942697525024, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.538029134273529, "rewards/GeoVisalEntityMatch2ORM/std": 0.11769445240497589, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1196, "train_speed(iter/s)": 0.026541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/mean_length": 322.0, "completions/min_length": 273.0, "epoch": 0.1434219985621855, "grad_norm": 1.182492306125891, "kl": 0.7876188158988953, "learning_rate": 9.558868781360658e-07, "loss": 0.0007893567671999335, "memory(GiB)": 165.8, "reward": 2.393287181854248, "reward_std": 0.13241933286190033, "rewards/GeoLocAccuracyV2ORM/mean": 0.824999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5682870745658875, "rewards/GeoVisalEntityMatch2ORM/std": 0.24022191762924194, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1197, "train_speed(iter/s)": 0.026547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 317.5520935058594, "completions/min_length": 277.0, "epoch": 0.1435418164390127, "grad_norm": 1.1900569125254097, "kl": 0.7772952914237976, "learning_rate": 9.558087630184733e-07, "loss": 0.0007782056927680969, "memory(GiB)": 165.8, "reward": 2.4200892448425293, "reward_std": 0.08028946071863174, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6700893640518188, "rewards/GeoVisalEntityMatch2ORM/std": 0.2225530743598938, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1198, "train_speed(iter/s)": 0.026554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.0, "completions/mean_length": 293.8645935058594, "completions/min_length": 241.0, "epoch": 0.14366163431583992, "grad_norm": 1.2990714262391934, "kl": 0.8014352321624756, "learning_rate": 9.557305819968418e-07, "loss": 0.0008055294747464359, "memory(GiB)": 165.8, "reward": 2.1644840240478516, "reward_std": 0.06979687511920929, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6144841909408569, "rewards/GeoVisalEntityMatch2ORM/std": 0.1255628913640976, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1199, "train_speed(iter/s)": 0.026562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 284.25, "completions/min_length": 233.0, "epoch": 0.14378145219266714, "grad_norm": 1.235882843060413, "kl": 0.8469605445861816, "learning_rate": 9.556523350824756e-07, "loss": 0.000848879455588758, "memory(GiB)": 165.8, "reward": 2.539843797683716, "reward_std": 0.1153615266084671, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5481771230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.1559799164533615, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1200, "train_speed(iter/s)": 0.026563 }, { "epoch": 0.14378145219266714, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.000248015873015873, "eval_completions/max_length": 329.2202380952381, "eval_completions/mean_length": 285.85194560459684, "eval_completions/min_length": 246.51785714285714, "eval_kl": 5.766728815578279, "eval_loss": 0.005638060625642538, "eval_reward": 2.535917279266176, "eval_reward_std": 0.12196529983720254, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.9090277848853952, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.10928986319119022, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.627137464958997, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.14811775866629823, "eval_rewards/MathFormat/mean": 0.9997519843635105, "eval_rewards/MathFormat/std": 0.0017183043417476473, "eval_runtime": 1645.5332, "eval_samples_per_second": 0.205, "eval_steps_per_second": 0.005, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.0, "completions/mean_length": 286.3645935058594, "completions/min_length": 214.0, "epoch": 0.14390127006949438, "grad_norm": 1.2021908612549388, "kl": 0.7841156423091888, "learning_rate": 9.555740222866885e-07, "loss": 0.0007846703520044684, "memory(GiB)": 165.8, "reward": 2.309300422668457, "reward_std": 0.19713041186332703, "rewards/GeoLocAccuracyV2ORM/mean": 0.6437500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4469339847564697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6655506491661072, "rewards/GeoVisalEntityMatch2ORM/std": 0.3092457354068756, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1201, "train_speed(iter/s)": 0.025603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 290.04168701171875, "completions/min_length": 250.0, "epoch": 0.1440210879463216, "grad_norm": 1.4070302175546212, "kl": 0.8173543810844421, "learning_rate": 9.554956436208031e-07, "loss": 0.0008183866739273071, "memory(GiB)": 165.8, "reward": 2.2474868297576904, "reward_std": 0.15986457467079163, "rewards/GeoLocAccuracyV2ORM/mean": 0.6416666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.3999122977256775, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6058201193809509, "rewards/GeoVisalEntityMatch2ORM/std": 0.10376521199941635, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1202, "train_speed(iter/s)": 0.025602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.0, "completions/mean_length": 286.22918701171875, "completions/min_length": 222.0, "epoch": 0.1441409058231488, "grad_norm": 1.4241346748017318, "kl": 0.8488571345806122, "learning_rate": 9.55417199096152e-07, "loss": 0.0008494779467582703, "memory(GiB)": 165.8, "reward": 2.4844207763671875, "reward_std": 0.059438858181238174, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4844208359718323, "rewards/GeoVisalEntityMatch2ORM/std": 0.22849039733409882, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1203, "train_speed(iter/s)": 0.025609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/mean_length": 279.69793701171875, "completions/min_length": 242.0, "epoch": 0.14426072369997603, "grad_norm": 1.336186616159345, "kl": 0.8643037378787994, "learning_rate": 9.553386887240774e-07, "loss": 0.0008674835553392768, "memory(GiB)": 165.8, "reward": 2.574305534362793, "reward_std": 0.08744779229164124, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.574305534362793, "rewards/GeoVisalEntityMatch2ORM/std": 0.12399103492498398, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1204, "train_speed(iter/s)": 0.025611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 274.09375, "completions/min_length": 237.0, "epoch": 0.14438054157680325, "grad_norm": 1.451656659980137, "kl": 0.8326798677444458, "learning_rate": 9.552601125159307e-07, "loss": 0.0008350412244908512, "memory(GiB)": 165.8, "reward": 2.56701397895813, "reward_std": 0.07932159304618835, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5670139193534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.20615854859352112, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1205, "train_speed(iter/s)": 0.025618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.0, "completions/mean_length": 263.6145935058594, "completions/min_length": 215.0, "epoch": 0.1445003594536305, "grad_norm": 1.4053012805239913, "kl": 0.8461422920227051, "learning_rate": 9.551814704830734e-07, "loss": 0.0008479704847559333, "memory(GiB)": 165.8, "reward": 2.5718626976013184, "reward_std": 0.09535245597362518, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5718626379966736, "rewards/GeoVisalEntityMatch2ORM/std": 0.18354853987693787, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1206, "train_speed(iter/s)": 0.025621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/mean_length": 270.22918701171875, "completions/min_length": 234.0, "epoch": 0.1446201773304577, "grad_norm": 1.2629225545178573, "kl": 0.8132900893688202, "learning_rate": 9.551027626368754e-07, "loss": 0.0008142752340063453, "memory(GiB)": 165.8, "reward": 2.6650853157043457, "reward_std": 0.10944914072751999, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6650853753089905, "rewards/GeoVisalEntityMatch2ORM/std": 0.16718749701976776, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1207, "train_speed(iter/s)": 0.02563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/mean_length": 251.45834350585938, "completions/min_length": 200.0, "epoch": 0.14473999520728492, "grad_norm": 1.2706336026210698, "kl": 0.8397422134876251, "learning_rate": 9.550239889887178e-07, "loss": 0.0008403485408052802, "memory(GiB)": 165.8, "reward": 2.6358134746551514, "reward_std": 0.08405215293169022, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6358135342597961, "rewards/GeoVisalEntityMatch2ORM/std": 0.15326027572155, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1208, "train_speed(iter/s)": 0.02563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.0, "completions/mean_length": 258.3020935058594, "completions/min_length": 212.0, "epoch": 0.14485981308411214, "grad_norm": 1.479410335775479, "kl": 0.8328289091587067, "learning_rate": 9.549451495499894e-07, "loss": 0.0008346214890480042, "memory(GiB)": 165.8, "reward": 2.301488161087036, "reward_std": 0.07725591957569122, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5014880895614624, "rewards/GeoVisalEntityMatch2ORM/std": 0.22589343786239624, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1209, "train_speed(iter/s)": 0.025636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 261.96875, "completions/min_length": 218.0, "epoch": 0.14497963096093938, "grad_norm": 1.364407959432969, "kl": 0.8685548901557922, "learning_rate": 9.548662443320895e-07, "loss": 0.0008697062730789185, "memory(GiB)": 165.8, "reward": 2.1424479484558105, "reward_std": 0.16651560366153717, "rewards/GeoLocAccuracyV2ORM/mean": 0.7583333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.3692571818828583, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3841145932674408, "rewards/GeoVisalEntityMatch2ORM/std": 0.18866287171840668, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1210, "train_speed(iter/s)": 0.025646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.0, "completions/mean_length": 265.46875, "completions/min_length": 229.0, "epoch": 0.1450994488377666, "grad_norm": 1.4468013268327944, "kl": 0.877078503370285, "learning_rate": 9.547872733464272e-07, "loss": 0.0008782744407653809, "memory(GiB)": 165.8, "reward": 2.42832350730896, "reward_std": 0.19511941075325012, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3746342957019806, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5949901342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.23043210804462433, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1211, "train_speed(iter/s)": 0.025653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 273.47918701171875, "completions/min_length": 235.0, "epoch": 0.14521926671459381, "grad_norm": 1.6106039093324824, "kl": 0.8669330775737762, "learning_rate": 9.547082366044206e-07, "loss": 0.000869880139362067, "memory(GiB)": 165.8, "reward": 2.6172993183135986, "reward_std": 0.217941015958786, "rewards/GeoLocAccuracyV2ORM/mean": 0.9437500834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.22043617069721222, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6735491752624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.18951299786567688, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1212, "train_speed(iter/s)": 0.025655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 271.03125, "completions/min_length": 220.0, "epoch": 0.14533908459142103, "grad_norm": 1.5185947244923328, "kl": 0.8288183510303497, "learning_rate": 9.54629134117497e-07, "loss": 0.0008306751842610538, "memory(GiB)": 165.8, "reward": 2.577695369720459, "reward_std": 0.08435514569282532, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5776951313018799, "rewards/GeoVisalEntityMatch2ORM/std": 0.34644144773483276, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1213, "train_speed(iter/s)": 0.025662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 276.60418701171875, "completions/min_length": 174.0, "epoch": 0.14545890246824827, "grad_norm": 1.4563130694472206, "kl": 0.7640045285224915, "learning_rate": 9.545499658970939e-07, "loss": 0.0007661283016204834, "memory(GiB)": 165.8, "reward": 2.8469247817993164, "reward_std": 0.08408641815185547, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8469246625900269, "rewards/GeoVisalEntityMatch2ORM/std": 0.13834992051124573, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1214, "train_speed(iter/s)": 0.025669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/mean_length": 288.625, "completions/min_length": 245.0, "epoch": 0.1455787203450755, "grad_norm": 1.3113247667732413, "kl": 0.833207905292511, "learning_rate": 9.544707319546577e-07, "loss": 0.0008342142100445926, "memory(GiB)": 165.8, "reward": 2.376159191131592, "reward_std": 0.19893936812877655, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.46782559156417847, "rewards/GeoVisalEntityMatch2ORM/std": 0.18973037600517273, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1215, "train_speed(iter/s)": 0.025678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.0, "completions/mean_length": 273.8020935058594, "completions/min_length": 228.0, "epoch": 0.1456985382219027, "grad_norm": 1.1942242436180037, "kl": 0.8689008057117462, "learning_rate": 9.543914323016451e-07, "loss": 0.0008714236319065094, "memory(GiB)": 165.8, "reward": 2.7493550777435303, "reward_std": 0.06224175542593002, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7493551969528198, "rewards/GeoVisalEntityMatch2ORM/std": 0.157471165060997, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1216, "train_speed(iter/s)": 0.025685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/mean_length": 289.2395935058594, "completions/min_length": 243.0, "epoch": 0.14581835609872992, "grad_norm": 1.3146036334635751, "kl": 0.8609098494052887, "learning_rate": 9.543120669495212e-07, "loss": 0.0008617466082796454, "memory(GiB)": 165.8, "reward": 2.5663938522338867, "reward_std": 0.0938727855682373, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5663938522338867, "rewards/GeoVisalEntityMatch2ORM/std": 0.20520202815532684, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1217, "train_speed(iter/s)": 0.025692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.0, "completions/mean_length": 283.9583435058594, "completions/min_length": 244.0, "epoch": 0.14593817397555714, "grad_norm": 1.351632617865316, "kl": 0.8505342304706573, "learning_rate": 9.542326359097617e-07, "loss": 0.0008537024259567261, "memory(GiB)": 165.8, "reward": 2.7087676525115967, "reward_std": 0.0590934157371521, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7087674140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.12902790307998657, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1218, "train_speed(iter/s)": 0.025698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/mean_length": 296.1875, "completions/min_length": 257.0, "epoch": 0.14605799185238438, "grad_norm": 1.1869506317196319, "kl": 0.7949878573417664, "learning_rate": 9.541531391938512e-07, "loss": 0.0007966160774230957, "memory(GiB)": 165.8, "reward": 2.612847328186035, "reward_std": 0.0982494056224823, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6128472685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.19174452126026154, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1219, "train_speed(iter/s)": 0.0257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 357.0, "completions/mean_length": 299.76043701171875, "completions/min_length": 253.0, "epoch": 0.1461778097292116, "grad_norm": 1.3021093551539717, "kl": 0.7941355407238007, "learning_rate": 9.540735768132837e-07, "loss": 0.0007965515251271427, "memory(GiB)": 165.8, "reward": 2.5411458015441895, "reward_std": 0.1058453917503357, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7328125834465027, "rewards/GeoVisalEntityMatch2ORM/std": 0.16855180263519287, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1220, "train_speed(iter/s)": 0.025707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.0, "completions/mean_length": 296.84375, "completions/min_length": 238.0, "epoch": 0.14629762760603882, "grad_norm": 1.5294310754883682, "kl": 0.8386862277984619, "learning_rate": 9.539939487795631e-07, "loss": 0.0008401032537221909, "memory(GiB)": 165.8, "reward": 2.582688093185425, "reward_std": 0.10213878750801086, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5826878547668457, "rewards/GeoVisalEntityMatch2ORM/std": 0.24911925196647644, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1221, "train_speed(iter/s)": 0.025713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/mean_length": 300.1145935058594, "completions/min_length": 239.0, "epoch": 0.14641744548286603, "grad_norm": 1.2562658239111675, "kl": 0.8191156983375549, "learning_rate": 9.539142551042022e-07, "loss": 0.0008211458916775882, "memory(GiB)": 165.8, "reward": 2.483631134033203, "reward_std": 0.11989661306142807, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7336310148239136, "rewards/GeoVisalEntityMatch2ORM/std": 0.12551361322402954, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1222, "train_speed(iter/s)": 0.02572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/mean_length": 320.6458435058594, "completions/min_length": 267.0, "epoch": 0.14653726335969328, "grad_norm": 1.243434196447471, "kl": 0.7816189825534821, "learning_rate": 9.538344957987243e-07, "loss": 0.0007831330294720829, "memory(GiB)": 165.8, "reward": 2.464980363845825, "reward_std": 0.09563969820737839, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6649801731109619, "rewards/GeoVisalEntityMatch2ORM/std": 0.10517074167728424, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1223, "train_speed(iter/s)": 0.025727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.0, "completions/mean_length": 314.875, "completions/min_length": 274.0, "epoch": 0.1466570812365205, "grad_norm": 1.3205497579369592, "kl": 0.7518818378448486, "learning_rate": 9.53754670874661e-07, "loss": 0.0007541105151176453, "memory(GiB)": 165.8, "reward": 2.597172737121582, "reward_std": 0.14727871119976044, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204164206981659, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7555060386657715, "rewards/GeoVisalEntityMatch2ORM/std": 0.09904970973730087, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1224, "train_speed(iter/s)": 0.025733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/mean_length": 318.60418701171875, "completions/min_length": 271.0, "epoch": 0.1467768991133477, "grad_norm": 1.2125636926748113, "kl": 0.8109531700611115, "learning_rate": 9.536747803435543e-07, "loss": 0.0008116389508359134, "memory(GiB)": 165.8, "reward": 2.784313201904297, "reward_std": 0.0925239697098732, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7843129634857178, "rewards/GeoVisalEntityMatch2ORM/std": 0.11632126569747925, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1225, "train_speed(iter/s)": 0.02574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 317.91668701171875, "completions/min_length": 276.0, "epoch": 0.14689671699017492, "grad_norm": 1.1230338322106306, "kl": 0.8271346390247345, "learning_rate": 9.535948242169551e-07, "loss": 0.0008279085159301758, "memory(GiB)": 165.8, "reward": 2.610482931137085, "reward_std": 0.17337274551391602, "rewards/GeoLocAccuracyV2ORM/mean": 0.7958333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4010293781757355, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8146495223045349, "rewards/GeoVisalEntityMatch2ORM/std": 0.14173875749111176, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1226, "train_speed(iter/s)": 0.025746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 325.0625, "completions/min_length": 284.0, "epoch": 0.14701653486700217, "grad_norm": 0.926426602412924, "kl": 0.7885805666446686, "learning_rate": 9.535148025064242e-07, "loss": 0.001386106014251709, "memory(GiB)": 165.8, "reward": 2.2928977012634277, "reward_std": 0.050626277923583984, "rewards/GeoLocAccuracyV2ORM/mean": 0.5500000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7428976893424988, "rewards/GeoVisalEntityMatch2ORM/std": 0.192275732755661, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1227, "train_speed(iter/s)": 0.025751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 329.5208435058594, "completions/min_length": 248.0, "epoch": 0.14713635274382939, "grad_norm": 1.3121519763416853, "kl": 0.7729979157447815, "learning_rate": 9.534347152235316e-07, "loss": 0.0007754862308502197, "memory(GiB)": 165.8, "reward": 2.765625, "reward_std": 0.1012328565120697, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.765625, "rewards/GeoVisalEntityMatch2ORM/std": 0.15249507129192352, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1228, "train_speed(iter/s)": 0.025755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.0, "completions/mean_length": 327.60418701171875, "completions/min_length": 278.0, "epoch": 0.1472561706206566, "grad_norm": 1.184090480881064, "kl": 0.8008185029029846, "learning_rate": 9.533545623798569e-07, "loss": 0.0008021928369998932, "memory(GiB)": 165.8, "reward": 2.680713415145874, "reward_std": 0.08097431063652039, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.680713415145874, "rewards/GeoVisalEntityMatch2ORM/std": 0.15587130188941956, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1229, "train_speed(iter/s)": 0.025762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 425.0, "completions/mean_length": 349.84375, "completions/min_length": 299.0, "epoch": 0.14737598849748382, "grad_norm": 1.0154699456677707, "kl": 0.842971533536911, "learning_rate": 9.532743439869891e-07, "loss": 0.0008414065232500434, "memory(GiB)": 165.8, "reward": 2.5840489864349365, "reward_std": 0.23632539808750153, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6673821210861206, "rewards/GeoVisalEntityMatch2ORM/std": 0.21424837410449982, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 1230, "train_speed(iter/s)": 0.025761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.0, "completions/mean_length": 376.0520935058594, "completions/min_length": 331.0, "epoch": 0.14749580637431103, "grad_norm": 1.1841515369015396, "kl": 0.797236829996109, "learning_rate": 9.531940600565269e-07, "loss": 0.000798764347564429, "memory(GiB)": 165.8, "reward": 2.3485560417175293, "reward_std": 0.09425894170999527, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5985559225082397, "rewards/GeoVisalEntityMatch2ORM/std": 0.13344737887382507, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1231, "train_speed(iter/s)": 0.025768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 350.79168701171875, "completions/min_length": 309.0, "epoch": 0.14761562425113828, "grad_norm": 1.374763239681669, "kl": 0.7942949831485748, "learning_rate": 9.531137106000782e-07, "loss": 0.0007961615920066833, "memory(GiB)": 165.8, "reward": 2.6444506645202637, "reward_std": 0.07682989537715912, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6444505453109741, "rewards/GeoVisalEntityMatch2ORM/std": 0.1812092810869217, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1232, "train_speed(iter/s)": 0.025774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 363.0520935058594, "completions/min_length": 302.0, "epoch": 0.1477354421279655, "grad_norm": 1.1926538968998655, "kl": 0.7132971882820129, "learning_rate": 9.530332956292603e-07, "loss": 0.0007145119598135352, "memory(GiB)": 165.8, "reward": 2.3903770446777344, "reward_std": 0.15149971842765808, "rewards/GeoLocAccuracyV2ORM/mean": 0.9083333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.25615236163139343, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48204368352890015, "rewards/GeoVisalEntityMatch2ORM/std": 0.3118570148944855, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1233, "train_speed(iter/s)": 0.025776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 381.65625, "completions/min_length": 309.0, "epoch": 0.1478552600047927, "grad_norm": 1.2447967578293198, "kl": 0.761481523513794, "learning_rate": 9.529528151557007e-07, "loss": 0.0007635504007339478, "memory(GiB)": 165.8, "reward": 2.504547119140625, "reward_std": 0.08602224290370941, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5045469999313354, "rewards/GeoVisalEntityMatch2ORM/std": 0.14309006929397583, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1234, "train_speed(iter/s)": 0.025781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 386.34375, "completions/min_length": 329.0, "epoch": 0.14797507788161993, "grad_norm": 1.1312155373297763, "kl": 0.7554765939712524, "learning_rate": 9.528722691910354e-07, "loss": 0.0007577016949653625, "memory(GiB)": 165.8, "reward": 2.7775917053222656, "reward_std": 0.11222139000892639, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7775918245315552, "rewards/GeoVisalEntityMatch2ORM/std": 0.16375482082366943, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1235, "train_speed(iter/s)": 0.025787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 384.07293701171875, "completions/min_length": 314.0, "epoch": 0.14809489575844717, "grad_norm": 1.0326351932541642, "kl": 0.6985898613929749, "learning_rate": 9.527916577469103e-07, "loss": 0.0006999386241659522, "memory(GiB)": 165.8, "reward": 2.4757275581359863, "reward_std": 0.11717559397220612, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49239420890808105, "rewards/GeoVisalEntityMatch2ORM/std": 0.2076462060213089, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1236, "train_speed(iter/s)": 0.025794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 403.3333435058594, "completions/min_length": 342.0, "epoch": 0.1482147136352744, "grad_norm": 1.256526094378549, "kl": 0.752823144197464, "learning_rate": 9.52710980834981e-07, "loss": 0.0007527197594754398, "memory(GiB)": 165.8, "reward": 2.434483051300049, "reward_std": 0.1274515986442566, "rewards/GeoLocAccuracyV2ORM/mean": 0.7687500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.37312692403793335, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6657328605651855, "rewards/GeoVisalEntityMatch2ORM/std": 0.1385716199874878, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1237, "train_speed(iter/s)": 0.0258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 383.9375, "completions/min_length": 320.0, "epoch": 0.1483345315121016, "grad_norm": 1.1422701946775387, "kl": 0.7183757424354553, "learning_rate": 9.526302384669121e-07, "loss": 0.0007193511119112372, "memory(GiB)": 165.8, "reward": 2.827864646911621, "reward_std": 0.10270635038614273, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8278645873069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.12510427832603455, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1238, "train_speed(iter/s)": 0.025806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 449.0, "completions/mean_length": 379.8645935058594, "completions/min_length": 332.0, "epoch": 0.14845434938892882, "grad_norm": 1.1815228791361345, "kl": 0.7629120349884033, "learning_rate": 9.525494306543781e-07, "loss": 0.0007627134909853339, "memory(GiB)": 165.8, "reward": 2.2601191997528076, "reward_std": 0.10680416971445084, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7601191401481628, "rewards/GeoVisalEntityMatch2ORM/std": 0.1551029533147812, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1239, "train_speed(iter/s)": 0.025806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 409.7083435058594, "completions/min_length": 342.0, "epoch": 0.14857416726575606, "grad_norm": 1.1121661789571797, "kl": 0.7504504024982452, "learning_rate": 9.524685574090627e-07, "loss": 0.0007521311636082828, "memory(GiB)": 165.8, "reward": 2.6544270515441895, "reward_std": 0.0908767580986023, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6544271111488342, "rewards/GeoVisalEntityMatch2ORM/std": 0.13930672407150269, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1240, "train_speed(iter/s)": 0.025807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 453.0, "completions/mean_length": 402.6145935058594, "completions/min_length": 367.0, "epoch": 0.14869398514258328, "grad_norm": 1.1307745237729947, "kl": 0.7738660871982574, "learning_rate": 9.52387618742659e-07, "loss": 0.0007761667366139591, "memory(GiB)": 165.8, "reward": 2.1441800594329834, "reward_std": 0.09523767232894897, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6441799402236938, "rewards/GeoVisalEntityMatch2ORM/std": 0.16270586848258972, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1241, "train_speed(iter/s)": 0.025807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 417.3333435058594, "completions/min_length": 335.0, "epoch": 0.1488138030194105, "grad_norm": 1.0340733961503363, "kl": 0.7609741687774658, "learning_rate": 9.523066146668699e-07, "loss": 0.0007612885674461722, "memory(GiB)": 165.8, "reward": 2.3748557567596436, "reward_std": 0.17706914246082306, "rewards/GeoLocAccuracyV2ORM/mean": 0.6229166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.46236902475357056, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7623557448387146, "rewards/GeoVisalEntityMatch2ORM/std": 0.22854559123516083, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1242, "train_speed(iter/s)": 0.025813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 505.0, "completions/mean_length": 419.3958435058594, "completions/min_length": 368.0, "epoch": 0.1489336208962377, "grad_norm": 1.1260328333600023, "kl": 0.7912556827068329, "learning_rate": 9.522255451934073e-07, "loss": 0.0007917782058939338, "memory(GiB)": 165.8, "reward": 1.8314236402511597, "reward_std": 0.1669616550207138, "rewards/GeoLocAccuracyV2ORM/mean": 0.5104166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.5025156140327454, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5605902671813965, "rewards/GeoVisalEntityMatch2ORM/std": 0.12491277605295181, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906978726387024, "step": 1243, "train_speed(iter/s)": 0.025812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 440.4270935058594, "completions/min_length": 368.0, "epoch": 0.14905343877306493, "grad_norm": 1.1150637190508559, "kl": 0.752193421125412, "learning_rate": 9.521444103339929e-07, "loss": 0.0007525645196437836, "memory(GiB)": 165.8, "reward": 2.3265254497528076, "reward_std": 0.11338093876838684, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5765253305435181, "rewards/GeoVisalEntityMatch2ORM/std": 0.27012038230895996, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1244, "train_speed(iter/s)": 0.02582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 496.0, "completions/mean_length": 432.90625, "completions/min_length": 395.0, "epoch": 0.14917325664989217, "grad_norm": 1.1167895878683947, "kl": 0.7987524569034576, "learning_rate": 9.520632101003579e-07, "loss": 0.0007992635364644229, "memory(GiB)": 165.8, "reward": 2.2519097328186035, "reward_std": 0.07196661829948425, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7519097328186035, "rewards/GeoVisalEntityMatch2ORM/std": 0.295144647359848, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1245, "train_speed(iter/s)": 0.025821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 506.0, "completions/mean_length": 460.875, "completions/min_length": 377.0, "epoch": 0.1492930745267194, "grad_norm": 1.080093252355523, "kl": 0.8080162405967712, "learning_rate": 9.519819445042425e-07, "loss": 0.0008066743612289429, "memory(GiB)": 165.8, "reward": 2.4586806297302246, "reward_std": 0.5204894542694092, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3892412781715393, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8170139193534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.1299736499786377, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.37463435530662537, "step": 1246, "train_speed(iter/s)": 0.025826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 440.1770935058594, "completions/min_length": 355.0, "epoch": 0.1494128924035466, "grad_norm": 1.0810014936704808, "kl": 0.732917994260788, "learning_rate": 9.519006135573971e-07, "loss": 0.0007333904504776001, "memory(GiB)": 165.8, "reward": 2.8561015129089355, "reward_std": 0.14045484364032745, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8727679252624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.19175100326538086, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1247, "train_speed(iter/s)": 0.025832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/mean_length": 440.4895935058594, "completions/min_length": 399.0, "epoch": 0.14953271028037382, "grad_norm": 1.2648891469347656, "kl": 0.7304287254810333, "learning_rate": 9.518192172715805e-07, "loss": 0.0007320530712604523, "memory(GiB)": 165.8, "reward": 2.6154515743255615, "reward_std": 0.11587590724229813, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.615451455116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.15407231450080872, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1248, "train_speed(iter/s)": 0.025838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 519.0, "completions/mean_length": 466.34375, "completions/min_length": 399.0, "epoch": 0.14965252815720106, "grad_norm": 1.1568015871988577, "kl": 0.7070353329181671, "learning_rate": 9.517377556585621e-07, "loss": 0.000707807659637183, "memory(GiB)": 165.8, "reward": 2.7247025966644287, "reward_std": 0.201774463057518, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7663690447807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.12120743095874786, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 1249, "train_speed(iter/s)": 0.025843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 431.07293701171875, "completions/min_length": 377.0, "epoch": 0.14977234603402828, "grad_norm": 0.9428837830148283, "kl": 0.7652747333049774, "learning_rate": 9.516562287301197e-07, "loss": 0.000765625387430191, "memory(GiB)": 165.8, "reward": 2.6542744636535645, "reward_std": 0.12409977614879608, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6709408164024353, "rewards/GeoVisalEntityMatch2ORM/std": 0.125261589884758, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1250, "train_speed(iter/s)": 0.025849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.34375, "completions/max_length": 543.0, "completions/mean_length": 441.40625, "completions/min_length": 368.0, "epoch": 0.1498921639108555, "grad_norm": 1.1316375469440512, "kl": 1.0276089310646057, "learning_rate": 9.515746364980415e-07, "loss": 0.0010275791864842176, "memory(GiB)": 165.8, "reward": 2.077381134033203, "reward_std": 0.310393750667572, "rewards/GeoLocAccuracyV2ORM/mean": 0.65625, "rewards/GeoLocAccuracyV2ORM/std": 0.4774521291255951, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7648810148239136, "rewards/GeoVisalEntityMatch2ORM/std": 0.16592565178871155, "rewards/MathFormat/mean": 0.65625, "rewards/MathFormat/std": 0.4774521291255951, "step": 1251, "train_speed(iter/s)": 0.025848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 472.44793701171875, "completions/min_length": 426.0, "epoch": 0.1500119817876827, "grad_norm": 1.086149281828617, "kl": 0.7181700170040131, "learning_rate": 9.514929789741245e-07, "loss": 0.0007192331249825656, "memory(GiB)": 165.8, "reward": 2.54608154296875, "reward_std": 0.24067339301109314, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6085813641548157, "rewards/GeoVisalEntityMatch2ORM/std": 0.17207913100719452, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 1252, "train_speed(iter/s)": 0.025854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2708333333333333, "completions/max_length": 506.0, "completions/mean_length": 436.07293701171875, "completions/min_length": 368.0, "epoch": 0.15013179966450996, "grad_norm": 1.115341171806781, "kl": 0.7852159738540649, "learning_rate": 9.514112561701752e-07, "loss": 0.0007833726704120636, "memory(GiB)": 165.8, "reward": 2.1432292461395264, "reward_std": 0.1955263912677765, "rewards/GeoLocAccuracyV2ORM/mean": 0.7229167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.44614821672439575, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6807292699813843, "rewards/GeoVisalEntityMatch2ORM/std": 0.11789649724960327, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657452583313, "step": 1253, "train_speed(iter/s)": 0.025853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 506.0, "completions/mean_length": 468.2395935058594, "completions/min_length": 404.0, "epoch": 0.15025161754133717, "grad_norm": 1.128334692791818, "kl": 0.8474015891551971, "learning_rate": 9.513294680980098e-07, "loss": 0.000844040303491056, "memory(GiB)": 165.8, "reward": 2.2821943759918213, "reward_std": 0.45462337136268616, "rewards/GeoLocAccuracyV2ORM/mean": 0.8895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3096616864204407, "rewards/GeoVisalEntityMatch2ORM/mean": 0.48636099696159363, "rewards/GeoVisalEntityMatch2ORM/std": 0.23396971821784973, "rewards/MathFormat/mean": 0.90625, "rewards/MathFormat/std": 0.2930107116699219, "step": 1254, "train_speed(iter/s)": 0.025858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 597.0, "completions/mean_length": 462.63543701171875, "completions/min_length": 391.0, "epoch": 0.1503714354181644, "grad_norm": 1.0209974742702612, "kl": 0.7855178415775299, "learning_rate": 9.512476147694536e-07, "loss": 0.0007849385729059577, "memory(GiB)": 165.8, "reward": 2.7123398780822754, "reward_std": 0.1686519831418991, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7331730723381042, "rewards/GeoVisalEntityMatch2ORM/std": 0.16023235023021698, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1255, "train_speed(iter/s)": 0.025865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 552.0, "completions/mean_length": 483.57293701171875, "completions/min_length": 407.0, "epoch": 0.1504912532949916, "grad_norm": 44.871158973475964, "kl": 58.37869048118591, "learning_rate": 9.511656961963419e-07, "loss": 0.04777805507183075, "memory(GiB)": 165.8, "reward": 2.259185791015625, "reward_std": 0.506983757019043, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3632214665412903, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5508522987365723, "rewards/GeoVisalEntityMatch2ORM/std": 0.1899903118610382, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 1256, "train_speed(iter/s)": 0.025871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 552.0, "completions/mean_length": 471.8645935058594, "completions/min_length": 413.0, "epoch": 0.15061107117181882, "grad_norm": 343.36376495879006, "kl": 662.079060792923, "learning_rate": 9.510837123905189e-07, "loss": 0.6444333791732788, "memory(GiB)": 165.8, "reward": 1.8480722904205322, "reward_std": 0.47223782539367676, "rewards/GeoLocAccuracyV2ORM/mean": 0.5958333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4741566777229309, "rewards/GeoVisalEntityMatch2ORM/mean": 0.564738929271698, "rewards/GeoVisalEntityMatch2ORM/std": 0.1203228086233139, "rewards/MathFormat/mean": 0.6875, "rewards/MathFormat/std": 0.4659455716609955, "step": 1257, "train_speed(iter/s)": 0.025872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/mean_length": 469.26043701171875, "completions/min_length": 394.0, "epoch": 0.15073088904864607, "grad_norm": 0.7815566701505277, "kl": 0.7049993574619293, "learning_rate": 9.51001663363838e-07, "loss": 0.0007051043212413788, "memory(GiB)": 165.8, "reward": 2.7075068950653076, "reward_std": 0.048340022563934326, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7075066566467285, "rewards/GeoVisalEntityMatch2ORM/std": 0.2226007729768753, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1258, "train_speed(iter/s)": 0.025879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 530.0, "completions/mean_length": 471.5, "completions/min_length": 403.0, "epoch": 0.15085070692547328, "grad_norm": 1.1647663301953748, "kl": 0.8789043426513672, "learning_rate": 9.50919549128163e-07, "loss": 0.0008747465908527374, "memory(GiB)": 165.8, "reward": 2.2395291328430176, "reward_std": 0.2912787199020386, "rewards/GeoLocAccuracyV2ORM/mean": 0.6229166984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.44378259778022766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.627029299736023, "rewards/GeoVisalEntityMatch2ORM/std": 0.2229771465063095, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1259, "train_speed(iter/s)": 0.025885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/mean_length": 470.1875, "completions/min_length": 398.0, "epoch": 0.1509705248023005, "grad_norm": 1.2272519739228491, "kl": 0.6789988577365875, "learning_rate": 9.508373696953663e-07, "loss": 0.0006800865521654487, "memory(GiB)": 165.8, "reward": 2.7330729961395264, "reward_std": 0.11002781987190247, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7330729365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.15553367137908936, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1260, "train_speed(iter/s)": 0.02589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.34375, "completions/max_length": 555.0, "completions/mean_length": 499.59375, "completions/min_length": 449.0, "epoch": 0.15109034267912771, "grad_norm": 1.1750994305941374, "kl": 1.7542149424552917, "learning_rate": 9.5075512507733e-07, "loss": 0.0017527639865875244, "memory(GiB)": 165.8, "reward": 2.0074777603149414, "reward_std": 0.5112979412078857, "rewards/GeoLocAccuracyV2ORM/mean": 0.5666667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.47741997241973877, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7741443514823914, "rewards/GeoVisalEntityMatch2ORM/std": 0.19878101348876953, "rewards/MathFormat/mean": 0.6666666865348816, "rewards/MathFormat/std": 0.4738790988922119, "step": 1261, "train_speed(iter/s)": 0.025895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 552.0, "completions/mean_length": 489.75, "completions/min_length": 435.0, "epoch": 0.15121016055595496, "grad_norm": 1.1250462802673076, "kl": 0.8079535663127899, "learning_rate": 9.506728152859455e-07, "loss": 0.0008050898904912174, "memory(GiB)": 165.8, "reward": 1.936309576034546, "reward_std": 0.6383876204490662, "rewards/GeoLocAccuracyV2ORM/mean": 0.6104166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.4850347936153412, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5446429252624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.19868256151676178, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1262, "train_speed(iter/s)": 0.0259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 544.0, "completions/mean_length": 476.4375, "completions/min_length": 405.0, "epoch": 0.15132997843278218, "grad_norm": 1.075721721371363, "kl": 0.9255732893943787, "learning_rate": 9.50590440333114e-07, "loss": 0.000916463672183454, "memory(GiB)": 165.8, "reward": 2.5873265266418457, "reward_std": 0.37176501750946045, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.753993034362793, "rewards/GeoVisalEntityMatch2ORM/std": 0.26151543855667114, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783626317977905, "step": 1263, "train_speed(iter/s)": 0.025905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 451.85418701171875, "completions/min_length": 390.0, "epoch": 0.1514497963096094, "grad_norm": 1.1414694484494752, "kl": 0.7697924077510834, "learning_rate": 9.505080002307455e-07, "loss": 0.0007696797838434577, "memory(GiB)": 165.8, "reward": 2.54296875, "reward_std": 0.2878471314907074, "rewards/GeoLocAccuracyV2ORM/mean": 0.8125, "rewards/GeoLocAccuracyV2ORM/std": 0.39236128330230713, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7513021230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.12690839171409607, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1264, "train_speed(iter/s)": 0.025911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 529.0, "completions/mean_length": 467.66668701171875, "completions/min_length": 403.0, "epoch": 0.1515696141864366, "grad_norm": 0.9261492132530684, "kl": 0.7340285181999207, "learning_rate": 9.504254949907601e-07, "loss": 0.0007352903485298157, "memory(GiB)": 165.8, "reward": 2.5248754024505615, "reward_std": 0.061149969696998596, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7248753309249878, "rewards/GeoVisalEntityMatch2ORM/std": 0.15565745532512665, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1265, "train_speed(iter/s)": 0.025917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 453.3333435058594, "completions/min_length": 383.0, "epoch": 0.15168943206326385, "grad_norm": 1.1558116072554687, "kl": 0.721583217382431, "learning_rate": 9.503429246250867e-07, "loss": 0.0007224107976071537, "memory(GiB)": 165.8, "reward": 2.6021578311920166, "reward_std": 0.10719456523656845, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6021577715873718, "rewards/GeoVisalEntityMatch2ORM/std": 0.24352654814720154, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1266, "train_speed(iter/s)": 0.025922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2708333333333333, "completions/max_length": 506.0, "completions/mean_length": 426.5833435058594, "completions/min_length": 368.0, "epoch": 0.15180924994009107, "grad_norm": 1.1639642416944567, "kl": 0.7726037502288818, "learning_rate": 9.502602891456641e-07, "loss": 0.0007729406352154911, "memory(GiB)": 165.8, "reward": 2.062847137451172, "reward_std": 0.20133939385414124, "rewards/GeoLocAccuracyV2ORM/mean": 0.612500011920929, "rewards/GeoLocAccuracyV2ORM/std": 0.46504101157188416, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7211806178092957, "rewards/GeoVisalEntityMatch2ORM/std": 0.11708938330411911, "rewards/MathFormat/mean": 0.7291666865348816, "rewards/MathFormat/std": 0.44672298431396484, "step": 1267, "train_speed(iter/s)": 0.025921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 407.625, "completions/min_length": 340.0, "epoch": 0.15192906781691828, "grad_norm": 1.2593132516833554, "kl": 0.686778724193573, "learning_rate": 9.501775885644405e-07, "loss": 0.0006881207227706909, "memory(GiB)": 165.8, "reward": 2.7006077766418457, "reward_std": 0.08198797702789307, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7006076574325562, "rewards/GeoVisalEntityMatch2ORM/std": 0.22064505517482758, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1268, "train_speed(iter/s)": 0.025927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 400.2708435058594, "completions/min_length": 356.0, "epoch": 0.1520488856937455, "grad_norm": 1.1836158458415056, "kl": 0.7421871721744537, "learning_rate": 9.500948228933727e-07, "loss": 0.0007422765484079719, "memory(GiB)": 165.8, "reward": 2.582291603088379, "reward_std": 0.11862270534038544, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5822917222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.26535382866859436, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1269, "train_speed(iter/s)": 0.025933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 383.71875, "completions/min_length": 321.0, "epoch": 0.15216870357057272, "grad_norm": 1.0703679669297683, "kl": 0.691929817199707, "learning_rate": 9.500119921444283e-07, "loss": 0.0006927127833478153, "memory(GiB)": 165.8, "reward": 2.676609754562378, "reward_std": 0.05509645864367485, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6766098737716675, "rewards/GeoVisalEntityMatch2ORM/std": 0.2349080890417099, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1270, "train_speed(iter/s)": 0.025939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 399.0625, "completions/min_length": 344.0, "epoch": 0.15228852144739996, "grad_norm": 1.1472650886740958, "kl": 0.7437763214111328, "learning_rate": 9.499290963295829e-07, "loss": 0.0007439616019837558, "memory(GiB)": 165.8, "reward": 2.3685765266418457, "reward_std": 0.09494030475616455, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6081597805023193, "rewards/GeoVisalEntityMatch2ORM/std": 0.17130382359027863, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1271, "train_speed(iter/s)": 0.025944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 379.65625, "completions/min_length": 337.0, "epoch": 0.15240833932422718, "grad_norm": 1.164300601345331, "kl": 0.7812645137310028, "learning_rate": 9.498461354608227e-07, "loss": 0.0007833391427993774, "memory(GiB)": 165.8, "reward": 2.7836806774139404, "reward_std": 0.09040775895118713, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7836806178092957, "rewards/GeoVisalEntityMatch2ORM/std": 0.11407142132520676, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1272, "train_speed(iter/s)": 0.02595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/mean_length": 331.9375, "completions/min_length": 242.0, "epoch": 0.1525281572010544, "grad_norm": 1.3188758344890463, "kl": 0.8095161318778992, "learning_rate": 9.497631095501424e-07, "loss": 0.000810772180557251, "memory(GiB)": 165.8, "reward": 2.5647571086883545, "reward_std": 0.11014130711555481, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5647569894790649, "rewards/GeoVisalEntityMatch2ORM/std": 0.25812098383903503, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1273, "train_speed(iter/s)": 0.025956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 410.0, "completions/mean_length": 360.7395935058594, "completions/min_length": 307.0, "epoch": 0.1526479750778816, "grad_norm": 1.2645606090703576, "kl": 0.7315858602523804, "learning_rate": 9.496800186095465e-07, "loss": 0.000732913613319397, "memory(GiB)": 165.8, "reward": 2.586632251739502, "reward_std": 0.13123762607574463, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5866319537162781, "rewards/GeoVisalEntityMatch2ORM/std": 0.1701728254556656, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1274, "train_speed(iter/s)": 0.025965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 348.0625, "completions/min_length": 271.0, "epoch": 0.15276779295470885, "grad_norm": 1.2746264567808308, "kl": 0.780292421579361, "learning_rate": 9.495968626510491e-07, "loss": 0.0007821868057362735, "memory(GiB)": 165.8, "reward": 2.4880642890930176, "reward_std": 0.1824936866760254, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566414952278137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.571397602558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.16629375517368317, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1275, "train_speed(iter/s)": 0.025968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 356.375, "completions/min_length": 317.0, "epoch": 0.15288761083153607, "grad_norm": 1.2397994680455777, "kl": 0.7750926911830902, "learning_rate": 9.495136416866732e-07, "loss": 0.0007758488645777106, "memory(GiB)": 165.8, "reward": 2.544907569885254, "reward_std": 0.15043818950653076, "rewards/GeoLocAccuracyV2ORM/mean": 0.9666666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.1607002168893814, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5782407522201538, "rewards/GeoVisalEntityMatch2ORM/std": 0.18708576261997223, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1276, "train_speed(iter/s)": 0.025974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 360.47918701171875, "completions/min_length": 314.0, "epoch": 0.15300742870836329, "grad_norm": 1.3374196059979395, "kl": 0.7590793967247009, "learning_rate": 9.494303557284518e-07, "loss": 0.000761141418479383, "memory(GiB)": 165.8, "reward": 2.3576390743255615, "reward_std": 0.26694434881210327, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3985444903373718, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5972222685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.17646904289722443, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1277, "train_speed(iter/s)": 0.025977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 351.59375, "completions/min_length": 309.0, "epoch": 0.1531272465851905, "grad_norm": 1.2691839497159725, "kl": 0.7641236186027527, "learning_rate": 9.493470047884266e-07, "loss": 0.0007641265983693302, "memory(GiB)": 165.8, "reward": 2.55439829826355, "reward_std": 0.18994566798210144, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566414952278137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6377314925193787, "rewards/GeoVisalEntityMatch2ORM/std": 0.20724046230316162, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1278, "train_speed(iter/s)": 0.025983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.0, "completions/mean_length": 335.66668701171875, "completions/min_length": 294.0, "epoch": 0.15324706446201775, "grad_norm": 1.2356711050262819, "kl": 0.761489987373352, "learning_rate": 9.492635888786493e-07, "loss": 0.0007623285055160522, "memory(GiB)": 165.8, "reward": 2.6443867683410645, "reward_std": 0.09863436222076416, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6443866491317749, "rewards/GeoVisalEntityMatch2ORM/std": 0.1862279176712036, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1279, "train_speed(iter/s)": 0.025989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/mean_length": 318.54168701171875, "completions/min_length": 269.0, "epoch": 0.15336688233884496, "grad_norm": 1.2981801932174601, "kl": 0.8464152216911316, "learning_rate": 9.491801080111807e-07, "loss": 0.0008473446359857917, "memory(GiB)": 165.8, "reward": 2.5202584266662598, "reward_std": 0.07652986794710159, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.520258367061615, "rewards/GeoVisalEntityMatch2ORM/std": 0.21901503205299377, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1280, "train_speed(iter/s)": 0.025996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.0, "completions/mean_length": 309.53125, "completions/min_length": 231.0, "epoch": 0.15348670021567218, "grad_norm": 1.2110914641126154, "kl": 0.7923578917980194, "learning_rate": 9.490965621980911e-07, "loss": 0.0007924735546112061, "memory(GiB)": 165.8, "reward": 2.475893020629883, "reward_std": 0.07723333686590195, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7258929014205933, "rewards/GeoVisalEntityMatch2ORM/std": 0.2081560641527176, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1281, "train_speed(iter/s)": 0.026004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.0, "completions/mean_length": 303.3333435058594, "completions/min_length": 264.0, "epoch": 0.1536065180924994, "grad_norm": 1.0483964306145537, "kl": 0.753022164106369, "learning_rate": 9.490129514514602e-07, "loss": 0.000755536078941077, "memory(GiB)": 165.8, "reward": 2.8501157760620117, "reward_std": 0.08277349919080734, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8501157164573669, "rewards/GeoVisalEntityMatch2ORM/std": 0.15155243873596191, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1282, "train_speed(iter/s)": 0.02601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 314.8333435058594, "completions/min_length": 268.0, "epoch": 0.1537263359693266, "grad_norm": 1.2906924010802245, "kl": 0.8203109800815582, "learning_rate": 9.489292757833767e-07, "loss": 0.0008229563827626407, "memory(GiB)": 165.8, "reward": 2.5094246864318848, "reward_std": 0.09657374024391174, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.50942462682724, "rewards/GeoVisalEntityMatch2ORM/std": 0.14111477136611938, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1283, "train_speed(iter/s)": 0.02601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 322.07293701171875, "completions/min_length": 289.0, "epoch": 0.15384615384615385, "grad_norm": 1.40544077456083, "kl": 0.7864813804626465, "learning_rate": 9.488455352059394e-07, "loss": 0.0007865304942242801, "memory(GiB)": 165.8, "reward": 2.4600942134857178, "reward_std": 0.15243516862392426, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5350942611694336, "rewards/GeoVisalEntityMatch2ORM/std": 0.11298713833093643, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1284, "train_speed(iter/s)": 0.026009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.0, "completions/mean_length": 316.7708435058594, "completions/min_length": 210.0, "epoch": 0.15396597172298107, "grad_norm": 1.1584274011900297, "kl": 0.7866895496845245, "learning_rate": 9.487617297312559e-07, "loss": 0.0007868409156799316, "memory(GiB)": 165.8, "reward": 2.3888890743255615, "reward_std": 0.0831618458032608, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3888889253139496, "rewards/GeoVisalEntityMatch2ORM/std": 0.19744962453842163, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1285, "train_speed(iter/s)": 0.026009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 316.625, "completions/min_length": 259.0, "epoch": 0.1540857895998083, "grad_norm": 1.2563092637502975, "kl": 0.8382110297679901, "learning_rate": 9.486778593714434e-07, "loss": 0.0008392781019210815, "memory(GiB)": 165.8, "reward": 2.7026705741882324, "reward_std": 0.09921044111251831, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7235037088394165, "rewards/GeoVisalEntityMatch2ORM/std": 0.2661222815513611, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1286, "train_speed(iter/s)": 0.026015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 304.41668701171875, "completions/min_length": 233.0, "epoch": 0.1542056074766355, "grad_norm": 1.3425079651156748, "kl": 0.8344587683677673, "learning_rate": 9.485939241386285e-07, "loss": 0.0008359799976460636, "memory(GiB)": 165.8, "reward": 2.5986690521240234, "reward_std": 0.0835503488779068, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5986690521240234, "rewards/GeoVisalEntityMatch2ORM/std": 0.15849201381206512, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1287, "train_speed(iter/s)": 0.026017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.0, "completions/mean_length": 315.28125, "completions/min_length": 211.0, "epoch": 0.15432542535346275, "grad_norm": 1.0474617554545693, "kl": 0.8523800373077393, "learning_rate": 9.485099240449473e-07, "loss": 0.0014495756477117538, "memory(GiB)": 165.8, "reward": 2.6190104484558105, "reward_std": 0.06864562630653381, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6190104484558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.273045152425766, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1288, "train_speed(iter/s)": 0.026026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/mean_length": 308.5833435058594, "completions/min_length": 207.0, "epoch": 0.15444524323028996, "grad_norm": 1.3813948746671478, "kl": 0.7929113805294037, "learning_rate": 9.484258591025447e-07, "loss": 0.0007937501068226993, "memory(GiB)": 165.8, "reward": 2.711197853088379, "reward_std": 0.19273719191551208, "rewards/GeoLocAccuracyV2ORM/mean": 0.8916666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.2751713991165161, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8195313215255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.12477364391088486, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1289, "train_speed(iter/s)": 0.026032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 311.1770935058594, "completions/min_length": 187.0, "epoch": 0.15456506110711718, "grad_norm": 1.0193896935797113, "kl": 0.747714102268219, "learning_rate": 9.483417293235759e-07, "loss": 0.000750565086491406, "memory(GiB)": 165.8, "reward": 2.610416889190674, "reward_std": 0.0384981706738472, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6104167103767395, "rewards/GeoVisalEntityMatch2ORM/std": 0.10774807631969452, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1290, "train_speed(iter/s)": 0.026037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.0, "completions/mean_length": 316.2708435058594, "completions/min_length": 257.0, "epoch": 0.1546848789839444, "grad_norm": 1.1608405827790298, "kl": 0.7892743945121765, "learning_rate": 9.482575347202047e-07, "loss": 0.0007900918717496097, "memory(GiB)": 165.8, "reward": 2.5024306774139404, "reward_std": 0.09435286372900009, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.38021230697631836, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7190972566604614, "rewards/GeoVisalEntityMatch2ORM/std": 0.15907220542430878, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1291, "train_speed(iter/s)": 0.026044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.0, "completions/mean_length": 314.88543701171875, "completions/min_length": 264.0, "epoch": 0.15480469686077164, "grad_norm": 1.0856485573101846, "kl": 0.8016420900821686, "learning_rate": 9.481732753046044e-07, "loss": 0.0008014893974177539, "memory(GiB)": 165.8, "reward": 2.6652777194976807, "reward_std": 0.14151589572429657, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7319445013999939, "rewards/GeoVisalEntityMatch2ORM/std": 0.09299595654010773, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1292, "train_speed(iter/s)": 0.02605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/mean_length": 311.8645935058594, "completions/min_length": 275.0, "epoch": 0.15492451473759886, "grad_norm": 1.1964407593289523, "kl": 0.7763842940330505, "learning_rate": 9.480889510889583e-07, "loss": 0.0007778058643452823, "memory(GiB)": 165.8, "reward": 2.6611111164093018, "reward_std": 0.1501200646162033, "rewards/GeoLocAccuracyV2ORM/mean": 0.8833333849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.28383341431617737, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7777778506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.21838794648647308, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1293, "train_speed(iter/s)": 0.026052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 311.8645935058594, "completions/min_length": 221.0, "epoch": 0.15504433261442607, "grad_norm": 1.3905620514042374, "kl": 0.8015052378177643, "learning_rate": 9.48004562085458e-07, "loss": 0.000802457332611084, "memory(GiB)": 165.8, "reward": 2.3209176063537598, "reward_std": 0.11297957599163055, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.570917546749115, "rewards/GeoVisalEntityMatch2ORM/std": 0.22688300907611847, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1294, "train_speed(iter/s)": 0.026057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 297.71875, "completions/min_length": 220.0, "epoch": 0.1551641504912533, "grad_norm": 1.4317721150846585, "kl": 0.7582740783691406, "learning_rate": 9.479201083063055e-07, "loss": 0.0007581189274787903, "memory(GiB)": 165.8, "reward": 2.829662799835205, "reward_std": 0.07668986916542053, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8296627402305603, "rewards/GeoVisalEntityMatch2ORM/std": 0.18043123185634613, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1295, "train_speed(iter/s)": 0.02605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 311.8125, "completions/min_length": 225.0, "epoch": 0.1552839683680805, "grad_norm": 1.400151021861737, "kl": 0.8676963746547699, "learning_rate": 9.478355897637117e-07, "loss": 0.0008674512500874698, "memory(GiB)": 165.8, "reward": 2.6966147422790527, "reward_std": 0.10067495703697205, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6966146230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.18709293007850647, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1296, "train_speed(iter/s)": 0.026056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.0, "completions/mean_length": 317.71875, "completions/min_length": 250.0, "epoch": 0.15540378624490775, "grad_norm": 1.350755772029964, "kl": 0.797987699508667, "learning_rate": 9.477510064698966e-07, "loss": 0.000799087225459516, "memory(GiB)": 165.8, "reward": 2.5097737312316895, "reward_std": 0.20678219199180603, "rewards/GeoLocAccuracyV2ORM/mean": 0.9145833849906921, "rewards/GeoLocAccuracyV2ORM/std": 0.26871198415756226, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5951904654502869, "rewards/GeoVisalEntityMatch2ORM/std": 0.17402754724025726, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1297, "train_speed(iter/s)": 0.026062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 328.2083435058594, "completions/min_length": 269.0, "epoch": 0.15552360412173497, "grad_norm": 1.1227755281773366, "kl": 0.7617897987365723, "learning_rate": 9.476663584370901e-07, "loss": 0.0007640471449121833, "memory(GiB)": 165.8, "reward": 2.6794395446777344, "reward_std": 0.043346185237169266, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6794394850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.16688352823257446, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1298, "train_speed(iter/s)": 0.026068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.0, "completions/mean_length": 309.6145935058594, "completions/min_length": 243.0, "epoch": 0.15564342199856218, "grad_norm": 1.1396948195927281, "kl": 0.8256427943706512, "learning_rate": 9.475816456775312e-07, "loss": 0.0014232173562049866, "memory(GiB)": 165.8, "reward": 2.6627604961395264, "reward_std": 0.06004940718412399, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6627604365348816, "rewards/GeoVisalEntityMatch2ORM/std": 0.14442753791809082, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1299, "train_speed(iter/s)": 0.026073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/mean_length": 341.10418701171875, "completions/min_length": 292.0, "epoch": 0.1557632398753894, "grad_norm": 1.0878037376759138, "kl": 0.804194450378418, "learning_rate": 9.474968682034681e-07, "loss": 0.000803070783149451, "memory(GiB)": 165.8, "reward": 2.847916603088379, "reward_std": 0.11332286149263382, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.981249988079071, "rewards/GeoVisalEntityMatch2ORM/std": 0.04734162241220474, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1300, "train_speed(iter/s)": 0.026078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/mean_length": 331.3645935058594, "completions/min_length": 273.0, "epoch": 0.15588305775221664, "grad_norm": 1.381257856017839, "kl": 0.8022509217262268, "learning_rate": 9.474120260271586e-07, "loss": 0.000802667229436338, "memory(GiB)": 165.8, "reward": 2.4777462482452393, "reward_std": 0.07115887850522995, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.47774624824523926, "rewards/GeoVisalEntityMatch2ORM/std": 0.2095130980014801, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1301, "train_speed(iter/s)": 0.026081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/mean_length": 319.04168701171875, "completions/min_length": 246.0, "epoch": 0.15600287562904386, "grad_norm": 1.3049235031929796, "kl": 0.8221968412399292, "learning_rate": 9.473271191608697e-07, "loss": 0.000825225084554404, "memory(GiB)": 165.8, "reward": 2.6983799934387207, "reward_std": 0.11416123807430267, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7067129611968994, "rewards/GeoVisalEntityMatch2ORM/std": 0.1888405680656433, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1302, "train_speed(iter/s)": 0.026083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.0, "completions/mean_length": 328.1145935058594, "completions/min_length": 268.0, "epoch": 0.15612269350587107, "grad_norm": 1.1823595306422057, "kl": 0.8078324198722839, "learning_rate": 9.47242147616878e-07, "loss": 0.0008087555761449039, "memory(GiB)": 165.8, "reward": 2.6860861778259277, "reward_std": 0.17377084493637085, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904296875, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8006696701049805, "rewards/GeoVisalEntityMatch2ORM/std": 0.14594805240631104, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1303, "train_speed(iter/s)": 0.026088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.0, "completions/mean_length": 344.8020935058594, "completions/min_length": 268.0, "epoch": 0.1562425113826983, "grad_norm": 1.3191619187914412, "kl": 0.80416339635849, "learning_rate": 9.471571114074692e-07, "loss": 0.0008057877421379089, "memory(GiB)": 165.8, "reward": 2.460214138031006, "reward_std": 0.09551917016506195, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7102141380310059, "rewards/GeoVisalEntityMatch2ORM/std": 0.1065797507762909, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1304, "train_speed(iter/s)": 0.026087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 344.19793701171875, "completions/min_length": 291.0, "epoch": 0.15636232925952553, "grad_norm": 1.0169610230817754, "kl": 0.8335814476013184, "learning_rate": 9.470720105449385e-07, "loss": 0.0008241484756581485, "memory(GiB)": 165.8, "reward": 2.6459574699401855, "reward_std": 0.13807684183120728, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6667906641960144, "rewards/GeoVisalEntityMatch2ORM/std": 0.10518959909677505, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1305, "train_speed(iter/s)": 0.026093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.0, "completions/mean_length": 344.4375, "completions/min_length": 296.0, "epoch": 0.15648214713635275, "grad_norm": 1.1866928479660765, "kl": 0.7822552919387817, "learning_rate": 9.469868450415903e-07, "loss": 0.0007847423548810184, "memory(GiB)": 165.8, "reward": 2.6394832134246826, "reward_std": 0.08341789245605469, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6394832134246826, "rewards/GeoVisalEntityMatch2ORM/std": 0.13940885663032532, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1306, "train_speed(iter/s)": 0.026094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 388.0, "completions/mean_length": 338.90625, "completions/min_length": 297.0, "epoch": 0.15660196501317997, "grad_norm": 1.3316541974798426, "kl": 0.7723104953765869, "learning_rate": 9.469016149097384e-07, "loss": 0.0007726351614110172, "memory(GiB)": 165.8, "reward": 2.6956019401550293, "reward_std": 0.08813001215457916, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6956018805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.18219240009784698, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1307, "train_speed(iter/s)": 0.026096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/mean_length": 345.5208435058594, "completions/min_length": 309.0, "epoch": 0.15672178289000718, "grad_norm": 1.3772781637586038, "kl": 0.7822574079036713, "learning_rate": 9.468163201617061e-07, "loss": 0.0007832745905034244, "memory(GiB)": 165.8, "reward": 2.6510913372039795, "reward_std": 0.17210108041763306, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.2919985353946686, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7760912775993347, "rewards/GeoVisalEntityMatch2ORM/std": 0.1869485080242157, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1308, "train_speed(iter/s)": 0.026092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 398.0, "completions/mean_length": 343.44793701171875, "completions/min_length": 278.0, "epoch": 0.1568416007668344, "grad_norm": 1.2422718044762993, "kl": 0.7822161614894867, "learning_rate": 9.467309608098257e-07, "loss": 0.0007831156253814697, "memory(GiB)": 165.8, "reward": 2.4454572200775146, "reward_std": 0.0942734107375145, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6454572081565857, "rewards/GeoVisalEntityMatch2ORM/std": 0.12595710158348083, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1309, "train_speed(iter/s)": 0.02609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/mean_length": 340.2708435058594, "completions/min_length": 307.0, "epoch": 0.15696141864366164, "grad_norm": 1.3805435134121524, "kl": 0.8032982647418976, "learning_rate": 9.466455368664394e-07, "loss": 0.0008049681782722473, "memory(GiB)": 165.8, "reward": 2.521763563156128, "reward_std": 0.1628425121307373, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.24566414952278137, "rewards/GeoVisalEntityMatch2ORM/mean": 0.605096697807312, "rewards/GeoVisalEntityMatch2ORM/std": 0.2619187533855438, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1310, "train_speed(iter/s)": 0.02609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 375.0, "completions/mean_length": 331.57293701171875, "completions/min_length": 283.0, "epoch": 0.15708123652048886, "grad_norm": 1.3464770075557597, "kl": 0.8243263363838196, "learning_rate": 9.46560048343898e-07, "loss": 0.0008250350947491825, "memory(GiB)": 165.8, "reward": 2.3958334922790527, "reward_std": 0.23183131217956543, "rewards/GeoLocAccuracyV2ORM/mean": 0.7666666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.41918885707855225, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6291667222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.17302411794662476, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1311, "train_speed(iter/s)": 0.026095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 353.19793701171875, "completions/min_length": 286.0, "epoch": 0.15720105439731608, "grad_norm": 1.2218330116310352, "kl": 0.7427279353141785, "learning_rate": 9.464744952545624e-07, "loss": 0.0007450804114341736, "memory(GiB)": 165.8, "reward": 2.7559523582458496, "reward_std": 0.09253446757793427, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7559523582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.12649628520011902, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1312, "train_speed(iter/s)": 0.026099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 384.0, "completions/mean_length": 334.16668701171875, "completions/min_length": 263.0, "epoch": 0.1573208722741433, "grad_norm": 1.3268588984320406, "kl": 0.812477320432663, "learning_rate": 9.463888776108021e-07, "loss": 0.0008149693603627384, "memory(GiB)": 165.8, "reward": 2.5811965465545654, "reward_std": 0.08828005194664001, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5811966061592102, "rewards/GeoVisalEntityMatch2ORM/std": 0.28034913539886475, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1313, "train_speed(iter/s)": 0.026097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/mean_length": 340.8333435058594, "completions/min_length": 291.0, "epoch": 0.15744069015097054, "grad_norm": 1.0513586209192327, "kl": 0.8747760355472565, "learning_rate": 9.463031954249966e-07, "loss": 0.0014710029354318976, "memory(GiB)": 165.8, "reward": 2.6369545459747314, "reward_std": 0.06408561021089554, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6369543671607971, "rewards/GeoVisalEntityMatch2ORM/std": 0.14972582459449768, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1314, "train_speed(iter/s)": 0.026097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 397.0, "completions/mean_length": 341.29168701171875, "completions/min_length": 282.0, "epoch": 0.15756050802779775, "grad_norm": 1.1741422603141871, "kl": 0.7096894383430481, "learning_rate": 9.462174487095342e-07, "loss": 0.0007107804412953556, "memory(GiB)": 165.8, "reward": 2.519097328186035, "reward_std": 0.16617891192436218, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5503472685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.37860915064811707, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1315, "train_speed(iter/s)": 0.026097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 360.625, "completions/min_length": 307.0, "epoch": 0.15768032590462497, "grad_norm": 1.3091946604280305, "kl": 0.8255113363265991, "learning_rate": 9.461316374768131e-07, "loss": 0.0008273273706436157, "memory(GiB)": 165.8, "reward": 2.5438899993896484, "reward_std": 0.15977992117404938, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440855741500854, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6188898086547852, "rewards/GeoVisalEntityMatch2ORM/std": 0.15115886926651, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1316, "train_speed(iter/s)": 0.026101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/mean_length": 342.32293701171875, "completions/min_length": 278.0, "epoch": 0.15780014378145218, "grad_norm": 1.1610016727807024, "kl": 0.7841954827308655, "learning_rate": 9.460457617392401e-07, "loss": 0.0007853669812902808, "memory(GiB)": 165.8, "reward": 2.2190310955047607, "reward_std": 0.17191344499588013, "rewards/GeoLocAccuracyV2ORM/mean": 0.5979167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.48946353793144226, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6211144328117371, "rewards/GeoVisalEntityMatch2ORM/std": 0.12208753079175949, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1317, "train_speed(iter/s)": 0.026107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.0, "completions/mean_length": 349.9895935058594, "completions/min_length": 304.0, "epoch": 0.15791996165827943, "grad_norm": 1.2771892280670836, "kl": 0.7884626984596252, "learning_rate": 9.459598215092319e-07, "loss": 0.0007900446653366089, "memory(GiB)": 165.8, "reward": 2.6009557247161865, "reward_std": 0.10135547816753387, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.600955605506897, "rewards/GeoVisalEntityMatch2ORM/std": 0.1694459766149521, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1318, "train_speed(iter/s)": 0.02611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 364.25, "completions/min_length": 240.0, "epoch": 0.15803977953510664, "grad_norm": 1.293516170523878, "kl": 0.7711897194385529, "learning_rate": 9.458738167992146e-07, "loss": 0.00077010941458866, "memory(GiB)": 165.8, "reward": 2.6866815090179443, "reward_std": 0.09097251296043396, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6866816282272339, "rewards/GeoVisalEntityMatch2ORM/std": 0.19114843010902405, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1319, "train_speed(iter/s)": 0.026114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/mean_length": 335.8333435058594, "completions/min_length": 285.0, "epoch": 0.15815959741193386, "grad_norm": 1.2702381774103841, "kl": 0.7964518964290619, "learning_rate": 9.457877476216227e-07, "loss": 0.000798078894149512, "memory(GiB)": 165.8, "reward": 2.6848959922790527, "reward_std": 0.08776093274354935, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6848958730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.13104009628295898, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1320, "train_speed(iter/s)": 0.026114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 360.54168701171875, "completions/min_length": 314.0, "epoch": 0.15827941528876108, "grad_norm": 1.2411124242907314, "kl": 0.805934339761734, "learning_rate": 9.457016139889014e-07, "loss": 0.0008073846693150699, "memory(GiB)": 165.8, "reward": 2.5528831481933594, "reward_std": 0.1145838052034378, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5695496797561646, "rewards/GeoVisalEntityMatch2ORM/std": 0.15646612644195557, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1321, "train_speed(iter/s)": 0.026118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 361.0, "completions/min_length": 312.0, "epoch": 0.1583992331655883, "grad_norm": 1.2534601424606822, "kl": 0.7835288643836975, "learning_rate": 9.456154159135041e-07, "loss": 0.0007856860756874084, "memory(GiB)": 165.8, "reward": 2.6549460887908936, "reward_std": 0.10499825328588486, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6632794737815857, "rewards/GeoVisalEntityMatch2ORM/std": 0.1477617770433426, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1322, "train_speed(iter/s)": 0.026121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.0, "completions/mean_length": 352.1145935058594, "completions/min_length": 316.0, "epoch": 0.15851905104241554, "grad_norm": 1.2218528857608681, "kl": 0.7828995287418365, "learning_rate": 9.45529153407894e-07, "loss": 0.000783819705247879, "memory(GiB)": 165.8, "reward": 2.546998977661133, "reward_std": 0.15835978090763092, "rewards/GeoLocAccuracyV2ORM/mean": 0.887499988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.3013128936290741, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6594990491867065, "rewards/GeoVisalEntityMatch2ORM/std": 0.14964860677719116, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1323, "train_speed(iter/s)": 0.026122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 357.66668701171875, "completions/min_length": 309.0, "epoch": 0.15863886891924275, "grad_norm": 1.21759328192713, "kl": 0.7715765237808228, "learning_rate": 9.454428264845435e-07, "loss": 0.0007721005822531879, "memory(GiB)": 165.8, "reward": 2.6600446701049805, "reward_std": 0.08251605182886124, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6600446701049805, "rewards/GeoVisalEntityMatch2ORM/std": 0.1453220695257187, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1324, "train_speed(iter/s)": 0.026125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.0, "completions/mean_length": 344.875, "completions/min_length": 237.0, "epoch": 0.15875868679606997, "grad_norm": 1.2750820361480937, "kl": 0.7945329248905182, "learning_rate": 9.453564351559347e-07, "loss": 0.0007951284642331302, "memory(GiB)": 165.8, "reward": 2.471445083618164, "reward_std": 0.10222423076629639, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6631118059158325, "rewards/GeoVisalEntityMatch2ORM/std": 0.12221013009548187, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1325, "train_speed(iter/s)": 0.026128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 399.0, "completions/mean_length": 332.76043701171875, "completions/min_length": 271.0, "epoch": 0.1588785046728972, "grad_norm": 1.246431769746915, "kl": 0.7899272441864014, "learning_rate": 9.452699794345581e-07, "loss": 0.0007906965911388397, "memory(GiB)": 165.8, "reward": 2.357060194015503, "reward_std": 0.11023619771003723, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.42250296473503113, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5862268805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.23801617324352264, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1326, "train_speed(iter/s)": 0.026133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.0, "completions/mean_length": 362.94793701171875, "completions/min_length": 314.0, "epoch": 0.15899832254972443, "grad_norm": 1.2229692592829404, "kl": 0.7817171514034271, "learning_rate": 9.451834593329144e-07, "loss": 0.0007852911949157715, "memory(GiB)": 165.8, "reward": 2.683995008468628, "reward_std": 0.0901971310377121, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6839947700500488, "rewards/GeoVisalEntityMatch2ORM/std": 0.17986594140529633, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1327, "train_speed(iter/s)": 0.026139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 353.0, "completions/min_length": 320.0, "epoch": 0.15911814042655165, "grad_norm": 1.280076909259232, "kl": 0.7873581647872925, "learning_rate": 9.450968748635133e-07, "loss": 0.0007896858151070774, "memory(GiB)": 165.8, "reward": 2.408906936645508, "reward_std": 0.08967653661966324, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4089067876338959, "rewards/GeoVisalEntityMatch2ORM/std": 0.15874868631362915, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1328, "train_speed(iter/s)": 0.026134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 357.72918701171875, "completions/min_length": 306.0, "epoch": 0.15923795830337886, "grad_norm": 1.2341195382893766, "kl": 0.8271487653255463, "learning_rate": 9.450102260388738e-07, "loss": 0.000826741277705878, "memory(GiB)": 165.8, "reward": 2.546261787414551, "reward_std": 0.1422196626663208, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6879283785820007, "rewards/GeoVisalEntityMatch2ORM/std": 0.11577494442462921, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1329, "train_speed(iter/s)": 0.026135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.0, "completions/mean_length": 371.6770935058594, "completions/min_length": 330.0, "epoch": 0.15935777618020608, "grad_norm": 1.2270650697509564, "kl": 0.7675699889659882, "learning_rate": 9.449235128715241e-07, "loss": 0.0007692873477935791, "memory(GiB)": 165.8, "reward": 2.513396978378296, "reward_std": 0.09431232511997223, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7133970260620117, "rewards/GeoVisalEntityMatch2ORM/std": 0.17967192828655243, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1330, "train_speed(iter/s)": 0.026132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 368.8333435058594, "completions/min_length": 322.0, "epoch": 0.15947759405703332, "grad_norm": 1.128089072391474, "kl": 0.7501138150691986, "learning_rate": 9.448367353740018e-07, "loss": 0.0007502834778279066, "memory(GiB)": 165.8, "reward": 2.82066011428833, "reward_std": 0.06578251719474792, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8206596374511719, "rewards/GeoVisalEntityMatch2ORM/std": 0.09158926457166672, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1331, "train_speed(iter/s)": 0.026138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 414.0, "completions/mean_length": 357.72918701171875, "completions/min_length": 318.0, "epoch": 0.15959741193386054, "grad_norm": 1.2178894771108322, "kl": 0.8169600665569305, "learning_rate": 9.447498935588538e-07, "loss": 0.0008185779443010688, "memory(GiB)": 165.8, "reward": 2.6709325313568115, "reward_std": 0.06568174064159393, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6709325313568115, "rewards/GeoVisalEntityMatch2ORM/std": 0.2833852469921112, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1332, "train_speed(iter/s)": 0.02614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 368.57293701171875, "completions/min_length": 314.0, "epoch": 0.15971722981068776, "grad_norm": 1.1298755494701582, "kl": 0.7895027101039886, "learning_rate": 9.446629874386365e-07, "loss": 0.0007901992648839951, "memory(GiB)": 165.8, "reward": 2.769345283508301, "reward_std": 0.07154103368520737, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7693452835083008, "rewards/GeoVisalEntityMatch2ORM/std": 0.21357755362987518, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1333, "train_speed(iter/s)": 0.026143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 392.79168701171875, "completions/min_length": 335.0, "epoch": 0.15983704768751497, "grad_norm": 1.227926039139029, "kl": 0.7546357214450836, "learning_rate": 9.44576017025915e-07, "loss": 0.0007573738694190979, "memory(GiB)": 165.8, "reward": 2.3857638835906982, "reward_std": 0.17027361690998077, "rewards/GeoLocAccuracyV2ORM/mean": 0.8041667342185974, "rewards/GeoLocAccuracyV2ORM/std": 0.35181236267089844, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5815972685813904, "rewards/GeoVisalEntityMatch2ORM/std": 0.10580091923475266, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1334, "train_speed(iter/s)": 0.026146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 435.0, "completions/mean_length": 393.15625, "completions/min_length": 301.0, "epoch": 0.1599568655643422, "grad_norm": 1.2362862670396735, "kl": 0.8648509979248047, "learning_rate": 9.444889823332644e-07, "loss": 0.0008611828088760376, "memory(GiB)": 165.8, "reward": 2.2294981479644775, "reward_std": 0.22604936361312866, "rewards/GeoLocAccuracyV2ORM/mean": 0.7687500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.37312692403793335, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4919981360435486, "rewards/GeoVisalEntityMatch2ORM/std": 0.27684450149536133, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 1335, "train_speed(iter/s)": 0.026147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 383.8125, "completions/min_length": 344.0, "epoch": 0.16007668344116943, "grad_norm": 1.2661684473804309, "kl": 0.7623538374900818, "learning_rate": 9.444018833732689e-07, "loss": 0.0007634535431861877, "memory(GiB)": 165.8, "reward": 2.827120542526245, "reward_std": 0.10358832776546478, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8271204829216003, "rewards/GeoVisalEntityMatch2ORM/std": 0.1374596655368805, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1336, "train_speed(iter/s)": 0.026151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 392.88543701171875, "completions/min_length": 360.0, "epoch": 0.16019650131799665, "grad_norm": 1.189408733701442, "kl": 0.7637222707271576, "learning_rate": 9.443147201585215e-07, "loss": 0.0007641564006917179, "memory(GiB)": 165.8, "reward": 2.5516369342803955, "reward_std": 0.10279186069965363, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7349702715873718, "rewards/GeoVisalEntityMatch2ORM/std": 0.16818271577358246, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1337, "train_speed(iter/s)": 0.026155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 411.9895935058594, "completions/min_length": 364.0, "epoch": 0.16031631919482386, "grad_norm": 1.0152059115374676, "kl": 0.7412706911563873, "learning_rate": 9.442274927016251e-07, "loss": 0.000741447031032294, "memory(GiB)": 165.8, "reward": 2.574963092803955, "reward_std": 0.17703962326049805, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904594898224, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6895461082458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.13338850438594818, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1338, "train_speed(iter/s)": 0.026158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 386.79168701171875, "completions/min_length": 336.0, "epoch": 0.16043613707165108, "grad_norm": 1.2451886698885914, "kl": 0.7467002868652344, "learning_rate": 9.441402010151918e-07, "loss": 0.0007488764822483063, "memory(GiB)": 165.8, "reward": 2.323975086212158, "reward_std": 0.09216552972793579, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5739749073982239, "rewards/GeoVisalEntityMatch2ORM/std": 0.1751280277967453, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1339, "train_speed(iter/s)": 0.026162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.0, "completions/mean_length": 375.4375, "completions/min_length": 339.0, "epoch": 0.16055595494847832, "grad_norm": 1.1857302264397924, "kl": 0.81202033162117, "learning_rate": 9.440528451118425e-07, "loss": 0.0008143037557601929, "memory(GiB)": 165.8, "reward": 2.5734786987304688, "reward_std": 0.1030985414981842, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7838954925537109, "rewards/GeoVisalEntityMatch2ORM/std": 0.13120833039283752, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1340, "train_speed(iter/s)": 0.026165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 433.0, "completions/mean_length": 385.625, "completions/min_length": 321.0, "epoch": 0.16067577282530554, "grad_norm": 1.4875654188514702, "kl": 1.1471343338489532, "learning_rate": 9.43965425004208e-07, "loss": 0.001132787554524839, "memory(GiB)": 165.8, "reward": 2.1212780475616455, "reward_std": 0.21803975105285645, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4603583812713623, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6004445552825928, "rewards/GeoVisalEntityMatch2ORM/std": 0.13295184075832367, "rewards/MathFormat/mean": 0.9375, "rewards/MathFormat/std": 0.2433321326971054, "step": 1341, "train_speed(iter/s)": 0.026163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 386.53125, "completions/min_length": 310.0, "epoch": 0.16079559070213276, "grad_norm": 1.094592778525136, "kl": 0.7485767304897308, "learning_rate": 9.43877940704928e-07, "loss": 0.000752632855437696, "memory(GiB)": 165.8, "reward": 2.7201390266418457, "reward_std": 0.03856302797794342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7201389074325562, "rewards/GeoVisalEntityMatch2ORM/std": 0.08164667338132858, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1342, "train_speed(iter/s)": 0.026164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 410.78125, "completions/min_length": 375.0, "epoch": 0.16091540857895997, "grad_norm": 1.0598741044125082, "kl": 0.7757947444915771, "learning_rate": 9.437903922266515e-07, "loss": 0.0007767988136038184, "memory(GiB)": 165.8, "reward": 2.598454236984253, "reward_std": 0.11667903512716293, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6151207685470581, "rewards/GeoVisalEntityMatch2ORM/std": 0.19210654497146606, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1343, "train_speed(iter/s)": 0.026166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 404.6458435058594, "completions/min_length": 327.0, "epoch": 0.16103522645578722, "grad_norm": 1.088121212410132, "kl": 0.7138622999191284, "learning_rate": 9.437027795820372e-07, "loss": 0.0007146894931793213, "memory(GiB)": 165.8, "reward": 2.6662325859069824, "reward_std": 0.09087037295103073, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6662326455116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.20155596733093262, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1344, "train_speed(iter/s)": 0.02617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 396.91668701171875, "completions/min_length": 357.0, "epoch": 0.16115504433261443, "grad_norm": 1.0312082824329964, "kl": 0.7311885356903076, "learning_rate": 9.436151027837523e-07, "loss": 0.0007308998028747737, "memory(GiB)": 165.8, "reward": 2.4655017852783203, "reward_std": 0.17910322546958923, "rewards/GeoLocAccuracyV2ORM/mean": 0.875, "rewards/GeoLocAccuracyV2ORM/std": 0.33245497941970825, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5905019640922546, "rewards/GeoVisalEntityMatch2ORM/std": 0.26113855838775635, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1345, "train_speed(iter/s)": 0.026177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 492.0, "completions/mean_length": 423.88543701171875, "completions/min_length": 364.0, "epoch": 0.16127486220944165, "grad_norm": 10.413128509611994, "kl": 14.7004976272583, "learning_rate": 9.435273618444742e-07, "loss": 0.014548420906066895, "memory(GiB)": 165.8, "reward": 2.0798611640930176, "reward_std": 0.2055887132883072, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4652777910232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.21685776114463806, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1346, "train_speed(iter/s)": 0.026171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 390.0208435058594, "completions/min_length": 341.0, "epoch": 0.16139468008626887, "grad_norm": 0.8038030227492677, "kl": 0.6886113286018372, "learning_rate": 9.434395567768887e-07, "loss": 0.0006884559988975525, "memory(GiB)": 165.8, "reward": 2.820138931274414, "reward_std": 0.07049521803855896, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8284723162651062, "rewards/GeoVisalEntityMatch2ORM/std": 0.17875325679779053, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1347, "train_speed(iter/s)": 0.026172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 373.07293701171875, "completions/min_length": 319.0, "epoch": 0.16151449796309608, "grad_norm": 1.2389952450510022, "kl": 0.7533118426799774, "learning_rate": 9.433516875936916e-07, "loss": 0.000755173503421247, "memory(GiB)": 165.8, "reward": 2.628803014755249, "reward_std": 0.11881648749113083, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6288029551506042, "rewards/GeoVisalEntityMatch2ORM/std": 0.21655882894992828, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1348, "train_speed(iter/s)": 0.026177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 449.0, "completions/mean_length": 382.72918701171875, "completions/min_length": 328.0, "epoch": 0.16163431583992333, "grad_norm": 1.2005121754404466, "kl": 1.0129437148571014, "learning_rate": 9.432637543075875e-07, "loss": 0.0010108302813023329, "memory(GiB)": 165.8, "reward": 2.3994503021240234, "reward_std": 0.32968321442604065, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.34396424889564514, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6702835559844971, "rewards/GeoVisalEntityMatch2ORM/std": 0.13961179554462433, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1349, "train_speed(iter/s)": 0.026172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 403.3020935058594, "completions/min_length": 346.0, "epoch": 0.16175413371675054, "grad_norm": 1.2633186113532482, "kl": 0.7475851774215698, "learning_rate": 9.431757569312901e-07, "loss": 0.000749399303458631, "memory(GiB)": 165.8, "reward": 2.5783278942108154, "reward_std": 0.17154467105865479, "rewards/GeoLocAccuracyV2ORM/mean": 0.9729167222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.15250825881958008, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6054112911224365, "rewards/GeoVisalEntityMatch2ORM/std": 0.1329297423362732, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1350, "train_speed(iter/s)": 0.026176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.0, "completions/mean_length": 394.69793701171875, "completions/min_length": 361.0, "epoch": 0.16187395159357776, "grad_norm": 1.8523773711186793, "kl": 0.7858497202396393, "learning_rate": 9.430876954775234e-07, "loss": 0.0007866075029596686, "memory(GiB)": 165.8, "reward": 2.734722137451172, "reward_std": 0.10552352666854858, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7347222566604614, "rewards/GeoVisalEntityMatch2ORM/std": 0.18781080842018127, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1351, "train_speed(iter/s)": 0.026177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 393.875, "completions/min_length": 322.0, "epoch": 0.16199376947040497, "grad_norm": 1.2245008566142173, "kl": 0.7172812223434448, "learning_rate": 9.429995699590193e-07, "loss": 0.0007202749839052558, "memory(GiB)": 165.8, "reward": 2.5714287757873535, "reward_std": 0.10881449282169342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5714285373687744, "rewards/GeoVisalEntityMatch2ORM/std": 0.2070196568965912, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1352, "train_speed(iter/s)": 0.026174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 403.96875, "completions/min_length": 363.0, "epoch": 0.16211358734723222, "grad_norm": 1.1048092079932055, "kl": 0.7219367921352386, "learning_rate": 9.429113803885198e-07, "loss": 0.0007228826871141791, "memory(GiB)": 165.8, "reward": 2.6610491275787354, "reward_std": 0.08200560510158539, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6693824529647827, "rewards/GeoVisalEntityMatch2ORM/std": 0.2052929848432541, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1353, "train_speed(iter/s)": 0.026175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 399.75, "completions/min_length": 344.0, "epoch": 0.16223340522405943, "grad_norm": 1.1416244789783019, "kl": 0.6929376721382141, "learning_rate": 9.428231267787762e-07, "loss": 0.0006944413180463016, "memory(GiB)": 165.8, "reward": 2.7135169506073, "reward_std": 0.13290607929229736, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7218502759933472, "rewards/GeoVisalEntityMatch2ORM/std": 0.1326684057712555, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1354, "train_speed(iter/s)": 0.026179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 415.3645935058594, "completions/min_length": 360.0, "epoch": 0.16235322310088665, "grad_norm": 1.0677206843586264, "kl": 0.7717045843601227, "learning_rate": 9.427348091425485e-07, "loss": 0.0007732287049293518, "memory(GiB)": 165.8, "reward": 2.2911460399627686, "reward_std": 0.18058675527572632, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4120977520942688, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5078125, "rewards/GeoVisalEntityMatch2ORM/std": 0.09792712330818176, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1355, "train_speed(iter/s)": 0.026177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 399.9270935058594, "completions/min_length": 351.0, "epoch": 0.16247304097771387, "grad_norm": 1.3260645060881378, "kl": 0.7597281634807587, "learning_rate": 9.426464274926065e-07, "loss": 0.0007606310537084937, "memory(GiB)": 165.8, "reward": 2.7868926525115967, "reward_std": 0.10610974580049515, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7868924140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.2080146223306656, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1356, "train_speed(iter/s)": 0.026182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 412.76043701171875, "completions/min_length": 368.0, "epoch": 0.1625928588545411, "grad_norm": 1.0593969377536483, "kl": 0.7211466729640961, "learning_rate": 9.42557981841729e-07, "loss": 0.0007220730185508728, "memory(GiB)": 165.8, "reward": 2.758333206176758, "reward_std": 0.11843128502368927, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7583333253860474, "rewards/GeoVisalEntityMatch2ORM/std": 0.21385399997234344, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1357, "train_speed(iter/s)": 0.02618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 401.59375, "completions/min_length": 348.0, "epoch": 0.16271267673136833, "grad_norm": 1.1419063994934207, "kl": 0.7149665951728821, "learning_rate": 9.42469472202704e-07, "loss": 0.0007162243127822876, "memory(GiB)": 165.8, "reward": 2.77705454826355, "reward_std": 0.10858145356178284, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.777054488658905, "rewards/GeoVisalEntityMatch2ORM/std": 0.1771775782108307, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1358, "train_speed(iter/s)": 0.02618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.0, "completions/mean_length": 385.9895935058594, "completions/min_length": 333.0, "epoch": 0.16283249460819554, "grad_norm": 1.186383784847548, "kl": 0.7288549542427063, "learning_rate": 9.423808985883288e-07, "loss": 0.0007301768055185676, "memory(GiB)": 165.8, "reward": 2.136012077331543, "reward_std": 0.2223067283630371, "rewards/GeoLocAccuracyV2ORM/mean": 0.3895833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4508422911167145, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7464286684989929, "rewards/GeoVisalEntityMatch2ORM/std": 0.11323404312133789, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1359, "train_speed(iter/s)": 0.026166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 499.0, "completions/mean_length": 432.63543701171875, "completions/min_length": 382.0, "epoch": 0.16295231248502276, "grad_norm": 1.1986790127659097, "kl": 0.7476867139339447, "learning_rate": 9.422922610114102e-07, "loss": 0.0007491509313695133, "memory(GiB)": 165.8, "reward": 2.7289187908172607, "reward_std": 0.08757306635379791, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.728918731212616, "rewards/GeoVisalEntityMatch2ORM/std": 0.09608457237482071, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1360, "train_speed(iter/s)": 0.026159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 419.4270935058594, "completions/min_length": 387.0, "epoch": 0.16307213036184998, "grad_norm": 1.086848537315108, "kl": 0.736312747001648, "learning_rate": 9.422035594847639e-07, "loss": 0.0007378235459327698, "memory(GiB)": 165.8, "reward": 2.366152763366699, "reward_std": 0.2557799816131592, "rewards/GeoLocAccuracyV2ORM/mean": 0.7562500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.40543997287750244, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6099026203155518, "rewards/GeoVisalEntityMatch2ORM/std": 0.17777182161808014, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1361, "train_speed(iter/s)": 0.026163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 402.2395935058594, "completions/min_length": 345.0, "epoch": 0.16319194823867722, "grad_norm": 1.178092038959228, "kl": 0.7626036405563354, "learning_rate": 9.421147940212151e-07, "loss": 0.0007640390540473163, "memory(GiB)": 165.8, "reward": 2.5527429580688477, "reward_std": 0.09284564852714539, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5527428388595581, "rewards/GeoVisalEntityMatch2ORM/std": 0.19075754284858704, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1362, "train_speed(iter/s)": 0.026163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 393.85418701171875, "completions/min_length": 357.0, "epoch": 0.16331176611550444, "grad_norm": 1.0029010670301073, "kl": 0.7678036689758301, "learning_rate": 9.420259646335982e-07, "loss": 0.000769004225730896, "memory(GiB)": 165.8, "reward": 2.690972089767456, "reward_std": 0.07293331623077393, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6909722089767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.16397681832313538, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1363, "train_speed(iter/s)": 0.026152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 404.53125, "completions/min_length": 348.0, "epoch": 0.16343158399233165, "grad_norm": 1.1591454746100442, "kl": 0.7212720513343811, "learning_rate": 9.419370713347566e-07, "loss": 0.0007226541638374329, "memory(GiB)": 165.8, "reward": 2.6309194564819336, "reward_std": 0.13279548287391663, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6559193134307861, "rewards/GeoVisalEntityMatch2ORM/std": 0.2319478690624237, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1364, "train_speed(iter/s)": 0.02614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 408.9895935058594, "completions/min_length": 362.0, "epoch": 0.16355140186915887, "grad_norm": 1.2359549784269288, "kl": 0.6961467266082764, "learning_rate": 9.41848114137543e-07, "loss": 0.0006979095633141696, "memory(GiB)": 165.8, "reward": 2.685657024383545, "reward_std": 0.07779533416032791, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6856571435928345, "rewards/GeoVisalEntityMatch2ORM/std": 0.195921391248703, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1365, "train_speed(iter/s)": 0.026141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 473.0, "completions/mean_length": 417.5208435058594, "completions/min_length": 365.0, "epoch": 0.1636712197459861, "grad_norm": 1.0717915034921681, "kl": 0.7926663160324097, "learning_rate": 9.417590930548196e-07, "loss": 0.0007952215964905918, "memory(GiB)": 165.8, "reward": 2.5504300594329834, "reward_std": 0.06191056966781616, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5504299402236938, "rewards/GeoVisalEntityMatch2ORM/std": 0.1455593705177307, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1366, "train_speed(iter/s)": 0.02614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 414.8020935058594, "completions/min_length": 369.0, "epoch": 0.16379103762281333, "grad_norm": 1.0921110328063397, "kl": 0.6853801906108856, "learning_rate": 9.416700080994578e-07, "loss": 0.0006860122084617615, "memory(GiB)": 165.8, "reward": 2.578831911087036, "reward_std": 0.21187517046928406, "rewards/GeoLocAccuracyV2ORM/mean": 0.9333333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.2222689986228943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6454985737800598, "rewards/GeoVisalEntityMatch2ORM/std": 0.200053870677948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1367, "train_speed(iter/s)": 0.026143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/mean_length": 429.6770935058594, "completions/min_length": 374.0, "epoch": 0.16391085549964055, "grad_norm": 1.1400345741377593, "kl": 0.7153047621250153, "learning_rate": 9.415808592843382e-07, "loss": 0.0007166763534769416, "memory(GiB)": 165.8, "reward": 2.3480353355407715, "reward_std": 0.1679326742887497, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4897017478942871, "rewards/GeoVisalEntityMatch2ORM/std": 0.15916846692562103, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1368, "train_speed(iter/s)": 0.026148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 414.4895935058594, "completions/min_length": 374.0, "epoch": 0.16403067337646776, "grad_norm": 1.1502511329391065, "kl": 0.7258696854114532, "learning_rate": 9.414916466223502e-07, "loss": 0.0007264725863933563, "memory(GiB)": 165.8, "reward": 2.6496529579162598, "reward_std": 0.10514454543590546, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6579861044883728, "rewards/GeoVisalEntityMatch2ORM/std": 0.18808792531490326, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1369, "train_speed(iter/s)": 0.026147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 428.75, "completions/min_length": 353.0, "epoch": 0.16415049125329498, "grad_norm": 1.1665303747150415, "kl": 0.7328847646713257, "learning_rate": 9.414023701263931e-07, "loss": 0.0007347557693719864, "memory(GiB)": 165.8, "reward": 2.745089292526245, "reward_std": 0.10277415812015533, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7450892925262451, "rewards/GeoVisalEntityMatch2ORM/std": 0.17056165635585785, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1370, "train_speed(iter/s)": 0.026144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 436.47918701171875, "completions/min_length": 353.0, "epoch": 0.16427030913012222, "grad_norm": 1.1549511658438238, "kl": 0.7207675576210022, "learning_rate": 9.413130298093751e-07, "loss": 0.0007235109806060791, "memory(GiB)": 165.8, "reward": 2.6651580333709717, "reward_std": 0.06391631811857224, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6651579141616821, "rewards/GeoVisalEntityMatch2ORM/std": 0.08293378353118896, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1371, "train_speed(iter/s)": 0.026146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/mean_length": 451.0833435058594, "completions/min_length": 410.0, "epoch": 0.16439012700694944, "grad_norm": 1.1250326148829346, "kl": 0.7357475757598877, "learning_rate": 9.412236256842134e-07, "loss": 0.0007367432117462158, "memory(GiB)": 165.8, "reward": 2.5090527534484863, "reward_std": 0.10202206671237946, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5090526342391968, "rewards/GeoVisalEntityMatch2ORM/std": 0.11860325187444687, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1372, "train_speed(iter/s)": 0.02615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 499.0, "completions/mean_length": 446.125, "completions/min_length": 413.0, "epoch": 0.16450994488377665, "grad_norm": 1.1658031429799685, "kl": 0.7906830310821533, "learning_rate": 9.41134157763835e-07, "loss": 0.0007911573047749698, "memory(GiB)": 165.8, "reward": 1.9990156888961792, "reward_std": 0.16418613493442535, "rewards/GeoLocAccuracyV2ORM/mean": 0.6916666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.4513527452945709, "rewards/GeoVisalEntityMatch2ORM/mean": 0.557348906993866, "rewards/GeoVisalEntityMatch2ORM/std": 0.17503677308559418, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1373, "train_speed(iter/s)": 0.026148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 528.0, "completions/mean_length": 452.09375, "completions/min_length": 381.0, "epoch": 0.16462976276060387, "grad_norm": 1.2415396998659682, "kl": 0.7187368273735046, "learning_rate": 9.410446260611758e-07, "loss": 0.0007207182934507728, "memory(GiB)": 165.8, "reward": 2.63826060295105, "reward_std": 0.08912717550992966, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6382606029510498, "rewards/GeoVisalEntityMatch2ORM/std": 0.2302771657705307, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1374, "train_speed(iter/s)": 0.026152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/mean_length": 461.72918701171875, "completions/min_length": 405.0, "epoch": 0.16474958063743111, "grad_norm": 1.0487313179799522, "kl": 0.703742504119873, "learning_rate": 9.409550305891806e-07, "loss": 0.0007055047899484634, "memory(GiB)": 165.8, "reward": 2.688310384750366, "reward_std": 0.08957754075527191, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6883102655410767, "rewards/GeoVisalEntityMatch2ORM/std": 0.21122108399868011, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1375, "train_speed(iter/s)": 0.026147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 461.5520935058594, "completions/min_length": 386.0, "epoch": 0.16486939851425833, "grad_norm": 1.1078415129139183, "kl": 0.709080696105957, "learning_rate": 9.408653713608039e-07, "loss": 0.0007093201274983585, "memory(GiB)": 165.8, "reward": 2.287367820739746, "reward_std": 0.20472078025341034, "rewards/GeoLocAccuracyV2ORM/mean": 0.7791666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3653165102005005, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5290343761444092, "rewards/GeoVisalEntityMatch2ORM/std": 0.11935253441333771, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 1376, "train_speed(iter/s)": 0.026151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 531.0, "completions/mean_length": 479.19793701171875, "completions/min_length": 421.0, "epoch": 0.16498921639108555, "grad_norm": 1.0871556804985785, "kl": 0.7273484170436859, "learning_rate": 9.407756483890094e-07, "loss": 0.0007273306837305427, "memory(GiB)": 165.8, "reward": 2.6924076080322266, "reward_std": 0.07141745090484619, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6924077868461609, "rewards/GeoVisalEntityMatch2ORM/std": 0.11179278045892715, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1377, "train_speed(iter/s)": 0.026158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 506.0, "completions/mean_length": 471.32293701171875, "completions/min_length": 416.0, "epoch": 0.16510903426791276, "grad_norm": 1.1309709862443458, "kl": 0.7618379592895508, "learning_rate": 9.406858616867697e-07, "loss": 0.0007624551653862, "memory(GiB)": 165.8, "reward": 2.1998512744903564, "reward_std": 0.5048033595085144, "rewards/GeoLocAccuracyV2ORM/mean": 0.7645833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.4225185811519623, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6540179252624512, "rewards/GeoVisalEntityMatch2ORM/std": 0.18695782124996185, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1378, "train_speed(iter/s)": 0.026152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2708333333333333, "completions/max_length": 551.0, "completions/mean_length": 493.63543701171875, "completions/min_length": 434.0, "epoch": 0.16522885214474, "grad_norm": 1.0996463268217123, "kl": 0.7841748297214508, "learning_rate": 9.40596011267067e-07, "loss": 0.0007842531194910407, "memory(GiB)": 165.8, "reward": 1.9841580390930176, "reward_std": 0.5102988481521606, "rewards/GeoLocAccuracyV2ORM/mean": 0.6875, "rewards/GeoLocAccuracyV2ORM/std": 0.4659455716609955, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5570746660232544, "rewards/GeoVisalEntityMatch2ORM/std": 0.21613839268684387, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657154560089, "step": 1379, "train_speed(iter/s)": 0.026154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 552.0, "completions/mean_length": 491.4270935058594, "completions/min_length": 421.0, "epoch": 0.16534867002156722, "grad_norm": 1.1371016969668952, "kl": 0.8139247000217438, "learning_rate": 9.405060971428922e-07, "loss": 0.0008137226104736328, "memory(GiB)": 165.8, "reward": 2.2936675548553467, "reward_std": 0.5053422451019287, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5853340029716492, "rewards/GeoVisalEntityMatch2ORM/std": 0.1741451621055603, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 1380, "train_speed(iter/s)": 0.026158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.22916666666666666, "completions/max_length": 552.0, "completions/mean_length": 478.1458435058594, "completions/min_length": 395.0, "epoch": 0.16546848789839444, "grad_norm": 0.9513396228592435, "kl": 0.7347809076309204, "learning_rate": 9.404161193272459e-07, "loss": 0.0007337319548241794, "memory(GiB)": 165.8, "reward": 2.3016371726989746, "reward_std": 0.2882976830005646, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4315161108970642, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7808036208152771, "rewards/GeoVisalEntityMatch2ORM/std": 0.18529346585273743, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1381, "train_speed(iter/s)": 0.02616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.40625, "completions/max_length": 507.0, "completions/mean_length": 446.3333435058594, "completions/min_length": 358.0, "epoch": 0.16558830577522166, "grad_norm": 1.142952415085755, "kl": 0.824568510055542, "learning_rate": 9.403260778331379e-07, "loss": 0.0008246427169069648, "memory(GiB)": 165.8, "reward": 1.7543401718139648, "reward_std": 0.3365335762500763, "rewards/GeoLocAccuracyV2ORM/mean": 0.6041666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4915960431098938, "rewards/GeoVisalEntityMatch2ORM/mean": 0.546006977558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.15944448113441467, "rewards/MathFormat/mean": 0.6041666865348816, "rewards/MathFormat/std": 0.4915960431098938, "step": 1382, "train_speed(iter/s)": 0.026155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 456.0, "completions/mean_length": 401.34375, "completions/min_length": 368.0, "epoch": 0.16570812365204887, "grad_norm": 1.1795594053983247, "kl": 0.6969175338745117, "learning_rate": 9.402359726735867e-07, "loss": 0.0006973544950596988, "memory(GiB)": 165.8, "reward": 1.1229968070983887, "reward_std": 0.1959012746810913, "rewards/GeoLocAccuracyV2ORM/mean": 0.1979166716337204, "rewards/GeoLocAccuracyV2ORM/std": 0.4005205035209656, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6750801801681519, "rewards/GeoVisalEntityMatch2ORM/std": 0.16146698594093323, "rewards/MathFormat/mean": 0.25, "rewards/MathFormat/std": 0.435285747051239, "step": 1383, "train_speed(iter/s)": 0.026151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 528.0, "completions/mean_length": 453.46875, "completions/min_length": 372.0, "epoch": 0.16582794152887612, "grad_norm": 1.1519701965067028, "kl": 0.7550861239433289, "learning_rate": 9.401458038616205e-07, "loss": 0.000754786073230207, "memory(GiB)": 165.8, "reward": 2.142759323120117, "reward_std": 0.19134016335010529, "rewards/GeoLocAccuracyV2ORM/mean": 0.6208333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.4067630171775818, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5427592396736145, "rewards/GeoVisalEntityMatch2ORM/std": 0.2019631713628769, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357587695121765, "step": 1384, "train_speed(iter/s)": 0.026155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 506.0, "completions/mean_length": 440.2083435058594, "completions/min_length": 387.0, "epoch": 0.16594775940570333, "grad_norm": 1.102524812576912, "kl": 0.7597009241580963, "learning_rate": 9.400555714102765e-07, "loss": 0.0007603230769746006, "memory(GiB)": 165.8, "reward": 2.5256946086883545, "reward_std": 0.2572603225708008, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5881944894790649, "rewards/GeoVisalEntityMatch2ORM/std": 0.19705308973789215, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 1385, "train_speed(iter/s)": 0.026158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 434.69793701171875, "completions/min_length": 381.0, "epoch": 0.16606757728253055, "grad_norm": 1.1495395328981046, "kl": 0.7683009207248688, "learning_rate": 9.399652753326013e-07, "loss": 0.0007699380512349308, "memory(GiB)": 165.8, "reward": 2.6565394401550293, "reward_std": 0.1231820210814476, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6732060313224792, "rewards/GeoVisalEntityMatch2ORM/std": 0.13320837914943695, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1386, "train_speed(iter/s)": 0.026162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 401.76043701171875, "completions/min_length": 345.0, "epoch": 0.16618739515935776, "grad_norm": 1.2076143445787293, "kl": 0.7496321499347687, "learning_rate": 9.398749156416503e-07, "loss": 0.0007506857509724796, "memory(GiB)": 165.8, "reward": 2.803703784942627, "reward_std": 0.10998125374317169, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8120369911193848, "rewards/GeoVisalEntityMatch2ORM/std": 0.17710153758525848, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1387, "train_speed(iter/s)": 0.026161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 459.0, "completions/mean_length": 400.13543701171875, "completions/min_length": 363.0, "epoch": 0.166307213036185, "grad_norm": 1.1401894920641018, "kl": 0.8928702175617218, "learning_rate": 9.397844923504884e-07, "loss": 0.000894357799552381, "memory(GiB)": 165.8, "reward": 1.9963910579681396, "reward_std": 0.21834835410118103, "rewards/GeoLocAccuracyV2ORM/mean": 0.6375000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.4814998507499695, "rewards/GeoVisalEntityMatch2ORM/mean": 0.598474383354187, "rewards/GeoVisalEntityMatch2ORM/std": 0.16866645216941833, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906978726387024, "step": 1388, "train_speed(iter/s)": 0.026157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.0, "completions/mean_length": 382.1458435058594, "completions/min_length": 343.0, "epoch": 0.16642703091301222, "grad_norm": 1.3017628254721019, "kl": 0.7819384336471558, "learning_rate": 9.396940054721898e-07, "loss": 0.0007836396689526737, "memory(GiB)": 165.8, "reward": 2.551732063293457, "reward_std": 0.18241217732429504, "rewards/GeoLocAccuracyV2ORM/mean": 0.8229166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3837431073188782, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7288154363632202, "rewards/GeoVisalEntityMatch2ORM/std": 0.18130861222743988, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1389, "train_speed(iter/s)": 0.02615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 373.3020935058594, "completions/min_length": 334.0, "epoch": 0.16654684878983944, "grad_norm": 1.3009982119940535, "kl": 0.7773031294345856, "learning_rate": 9.396034550198375e-07, "loss": 0.000778714835178107, "memory(GiB)": 165.8, "reward": 2.6352059841156006, "reward_std": 0.09615375101566315, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6435391902923584, "rewards/GeoVisalEntityMatch2ORM/std": 0.1534617394208908, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1390, "train_speed(iter/s)": 0.02615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.0, "completions/mean_length": 357.78125, "completions/min_length": 305.0, "epoch": 0.16666666666666666, "grad_norm": 1.1465492547500955, "kl": 0.7771748900413513, "learning_rate": 9.39512841006524e-07, "loss": 0.0007809202070347965, "memory(GiB)": 165.8, "reward": 2.6626157760620117, "reward_std": 0.062326282262802124, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6626157760620117, "rewards/GeoVisalEntityMatch2ORM/std": 0.24756577610969543, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1391, "train_speed(iter/s)": 0.026154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.0, "completions/mean_length": 365.3333435058594, "completions/min_length": 331.0, "epoch": 0.1667864845434939, "grad_norm": 1.3091460114378186, "kl": 0.8267244696617126, "learning_rate": 9.394221634453512e-07, "loss": 0.0008284723153337836, "memory(GiB)": 165.8, "reward": 2.3720240592956543, "reward_std": 0.1920015662908554, "rewards/GeoLocAccuracyV2ORM/mean": 0.6145833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4892484247684479, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7574405074119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.2660508453845978, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1392, "train_speed(iter/s)": 0.026144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 381.46875, "completions/min_length": 312.0, "epoch": 0.16690630242032112, "grad_norm": 1.3758745027357466, "kl": 0.7840325832366943, "learning_rate": 9.393314223494296e-07, "loss": 0.0007858959725126624, "memory(GiB)": 165.8, "reward": 2.352480411529541, "reward_std": 0.12064060568809509, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5441468954086304, "rewards/GeoVisalEntityMatch2ORM/std": 0.24076446890830994, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1393, "train_speed(iter/s)": 0.026142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 386.5833435058594, "completions/min_length": 336.0, "epoch": 0.16702612029714833, "grad_norm": 1.1955960225942557, "kl": 0.7573138773441315, "learning_rate": 9.39240617731879e-07, "loss": 0.0007576098432764411, "memory(GiB)": 165.8, "reward": 2.7174277305603027, "reward_std": 0.07662047445774078, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7174272537231445, "rewards/GeoVisalEntityMatch2ORM/std": 0.10991556197404861, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1394, "train_speed(iter/s)": 0.026145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 367.84375, "completions/min_length": 303.0, "epoch": 0.16714593817397555, "grad_norm": 1.115093560178457, "kl": 0.792602926492691, "learning_rate": 9.39149749605829e-07, "loss": 0.0007960250368341804, "memory(GiB)": 165.8, "reward": 2.7731873989105225, "reward_std": 0.22338469326496124, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357589185237885, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8044372797012329, "rewards/GeoVisalEntityMatch2ORM/std": 0.19757574796676636, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1395, "train_speed(iter/s)": 0.026141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 598.0, "completions/mean_length": 372.5208435058594, "completions/min_length": 297.0, "epoch": 0.16726575605080277, "grad_norm": 1.1128011503569093, "kl": 0.7836426794528961, "learning_rate": 9.390588179844177e-07, "loss": 0.000786795630119741, "memory(GiB)": 165.8, "reward": 2.6078124046325684, "reward_std": 0.17892417311668396, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6286458373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.11684703826904297, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1396, "train_speed(iter/s)": 0.026147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 388.41668701171875, "completions/min_length": 342.0, "epoch": 0.16738557392763, "grad_norm": 1.6767759581757242, "kl": 0.7261988520622253, "learning_rate": 9.389678228807927e-07, "loss": 0.0007261162390932441, "memory(GiB)": 165.8, "reward": 2.551909923553467, "reward_std": 0.1323896199464798, "rewards/GeoLocAccuracyV2ORM/mean": 0.8166667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.33800238370895386, "rewards/GeoVisalEntityMatch2ORM/mean": 0.735243022441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.14709405601024628, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1397, "train_speed(iter/s)": 0.026148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 394.3125, "completions/min_length": 343.0, "epoch": 0.16750539180445723, "grad_norm": 1.2674706109022682, "kl": 0.778654009103775, "learning_rate": 9.388767643081108e-07, "loss": 0.0007799069280736148, "memory(GiB)": 165.8, "reward": 2.7427220344543457, "reward_std": 0.09185310453176498, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7427219152450562, "rewards/GeoVisalEntityMatch2ORM/std": 0.1768483966588974, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1398, "train_speed(iter/s)": 0.026142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 377.19793701171875, "completions/min_length": 326.0, "epoch": 0.16762520968128444, "grad_norm": 1.2672621030586482, "kl": 0.7562194764614105, "learning_rate": 9.387856422795377e-07, "loss": 0.0007586678257212043, "memory(GiB)": 165.8, "reward": 2.7860865592956543, "reward_std": 0.08038797229528427, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7860863208770752, "rewards/GeoVisalEntityMatch2ORM/std": 0.24367333948612213, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1399, "train_speed(iter/s)": 0.026135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 392.04168701171875, "completions/min_length": 343.0, "epoch": 0.16774502755811166, "grad_norm": 1.1604371489527834, "kl": 0.7788024544715881, "learning_rate": 9.386944568082486e-07, "loss": 0.0007806842913851142, "memory(GiB)": 165.8, "reward": 2.6219329833984375, "reward_std": 0.24048718810081482, "rewards/GeoLocAccuracyV2ORM/mean": 0.9312499761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.23001717031002045, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7010995745658875, "rewards/GeoVisalEntityMatch2ORM/std": 0.16353316605091095, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1400, "train_speed(iter/s)": 0.026138 }, { "epoch": 0.16774502755811166, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.004836309523809525, "eval_completions/max_length": 436.20238095238096, "eval_completions/mean_length": 385.62066868373324, "eval_completions/min_length": 343.7202380952381, "eval_kl": 33.04617444780611, "eval_loss": 0.03448853641748428, "eval_reward": 2.5881814793461846, "eval_reward_std": 0.13411900928864876, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.9246527854175794, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.11127587336869467, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.668240939222631, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.14108061519939274, "eval_rewards/MathFormat/mean": 0.995287700068383, "eval_rewards/MathFormat/std": 0.01680773434539636, "eval_runtime": 1728.6688, "eval_samples_per_second": 0.195, "eval_steps_per_second": 0.005, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 385.3958435058594, "completions/min_length": 337.0, "epoch": 0.1678648454349389, "grad_norm": 1.1619122098165897, "kl": 0.8099586963653564, "learning_rate": 9.386032079074275e-07, "loss": 0.0008105983724817634, "memory(GiB)": 165.8, "reward": 2.6381077766418457, "reward_std": 0.12448830157518387, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.648524284362793, "rewards/GeoVisalEntityMatch2ORM/std": 0.2758255898952484, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1401, "train_speed(iter/s)": 0.025305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 394.7708435058594, "completions/min_length": 340.0, "epoch": 0.16798466331176612, "grad_norm": 1.1952784440265047, "kl": 0.749343991279602, "learning_rate": 9.385118955902682e-07, "loss": 0.0007514606113545597, "memory(GiB)": 165.8, "reward": 2.502042770385742, "reward_std": 0.09854434430599213, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7020427584648132, "rewards/GeoVisalEntityMatch2ORM/std": 0.15308009088039398, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1402, "train_speed(iter/s)": 0.02531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 392.19793701171875, "completions/min_length": 361.0, "epoch": 0.16810448118859334, "grad_norm": 1.1174998027948135, "kl": 0.7454709708690643, "learning_rate": 9.38420519869973e-07, "loss": 0.0007466028328053653, "memory(GiB)": 165.8, "reward": 2.4619295597076416, "reward_std": 0.056006286293268204, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7119295597076416, "rewards/GeoVisalEntityMatch2ORM/std": 0.20449861884117126, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1403, "train_speed(iter/s)": 0.025316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 386.07293701171875, "completions/min_length": 344.0, "epoch": 0.16822429906542055, "grad_norm": 1.205922295405913, "kl": 0.7033374607563019, "learning_rate": 9.383290807597539e-07, "loss": 0.0007045170059427619, "memory(GiB)": 165.8, "reward": 2.649925708770752, "reward_std": 0.0664404034614563, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6499255895614624, "rewards/GeoVisalEntityMatch2ORM/std": 0.10138861835002899, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1404, "train_speed(iter/s)": 0.025323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 394.0, "completions/min_length": 352.0, "epoch": 0.1683441169422478, "grad_norm": 1.0519561321082074, "kl": 0.7929458916187286, "learning_rate": 9.382375782728316e-07, "loss": 0.0007938941707834601, "memory(GiB)": 165.8, "reward": 2.309722423553467, "reward_std": 0.0744323581457138, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5597223043441772, "rewards/GeoVisalEntityMatch2ORM/std": 0.1745559573173523, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1405, "train_speed(iter/s)": 0.025328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 389.25, "completions/min_length": 324.0, "epoch": 0.168463934819075, "grad_norm": 1.0158354401960856, "kl": 0.7636973559856415, "learning_rate": 9.381460124224361e-07, "loss": 0.0007647673483006656, "memory(GiB)": 165.8, "reward": 2.6652777194976807, "reward_std": 0.06799277663230896, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6652778387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.1714688241481781, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1406, "train_speed(iter/s)": 0.025332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 438.0, "completions/mean_length": 393.60418701171875, "completions/min_length": 358.0, "epoch": 0.16858375269590223, "grad_norm": 1.132339108347377, "kl": 0.898727297782898, "learning_rate": 9.380543832218068e-07, "loss": 0.0008947986061684787, "memory(GiB)": 165.8, "reward": 2.629601001739502, "reward_std": 0.28524237871170044, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.22834262251853943, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7296007871627808, "rewards/GeoVisalEntityMatch2ORM/std": 0.18505913019180298, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 1407, "train_speed(iter/s)": 0.025333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 412.69793701171875, "completions/min_length": 363.0, "epoch": 0.16870357057272944, "grad_norm": 1.1338564708565173, "kl": 0.7273289263248444, "learning_rate": 9.37962690684192e-07, "loss": 0.000728962360881269, "memory(GiB)": 165.8, "reward": 2.72445011138916, "reward_std": 0.10280806571245193, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7244502902030945, "rewards/GeoVisalEntityMatch2ORM/std": 0.1142563596367836, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1408, "train_speed(iter/s)": 0.025341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 407.22918701171875, "completions/min_length": 351.0, "epoch": 0.16882338844955666, "grad_norm": 1.1945568026328615, "kl": 0.728316456079483, "learning_rate": 9.378709348228493e-07, "loss": 0.0007291188230738044, "memory(GiB)": 165.8, "reward": 2.4882688522338867, "reward_std": 0.2214944213628769, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.33794400095939636, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6486855745315552, "rewards/GeoVisalEntityMatch2ORM/std": 0.20884199440479279, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1409, "train_speed(iter/s)": 0.025346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 394.4270935058594, "completions/min_length": 330.0, "epoch": 0.1689432063263839, "grad_norm": 0.8990804173254007, "kl": 0.7490922510623932, "learning_rate": 9.377791156510454e-07, "loss": 0.0007497755577787757, "memory(GiB)": 165.8, "reward": 2.559375286102295, "reward_std": 0.04354201629757881, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7593750357627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.1704322248697281, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1410, "train_speed(iter/s)": 0.025347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 402.8645935058594, "completions/min_length": 361.0, "epoch": 0.16906302420321112, "grad_norm": 1.2724078112471917, "kl": 0.8000740110874176, "learning_rate": 9.376872331820562e-07, "loss": 0.000801893591415137, "memory(GiB)": 165.8, "reward": 2.430034637451172, "reward_std": 0.0915377289056778, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6300348043441772, "rewards/GeoVisalEntityMatch2ORM/std": 0.18963831663131714, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1411, "train_speed(iter/s)": 0.025353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 851.0, "completions/mean_length": 404.66668701171875, "completions/min_length": 358.0, "epoch": 0.16918284208003834, "grad_norm": 1.1147634057898828, "kl": 0.7497604787349701, "learning_rate": 9.375952874291666e-07, "loss": 0.0007548233261331916, "memory(GiB)": 165.8, "reward": 2.47291898727417, "reward_std": 0.22926323115825653, "rewards/GeoLocAccuracyV2ORM/mean": 0.814583420753479, "rewards/GeoLocAccuracyV2ORM/std": 0.3424306809902191, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6687523126602173, "rewards/GeoVisalEntityMatch2ORM/std": 0.11224294453859329, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1412, "train_speed(iter/s)": 0.02536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 459.0, "completions/mean_length": 398.0833435058594, "completions/min_length": 367.0, "epoch": 0.16930265995686555, "grad_norm": 1.1318807374939415, "kl": 0.8003992736339569, "learning_rate": 9.375032784056709e-07, "loss": 0.0008010516758076847, "memory(GiB)": 165.8, "reward": 2.3043651580810547, "reward_std": 0.06781783699989319, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8043650388717651, "rewards/GeoVisalEntityMatch2ORM/std": 0.2292894572019577, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1413, "train_speed(iter/s)": 0.02536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 419.29168701171875, "completions/min_length": 364.0, "epoch": 0.1694224778336928, "grad_norm": 1.141604422711023, "kl": 0.7551960945129395, "learning_rate": 9.374112061248724e-07, "loss": 0.0007580084493383765, "memory(GiB)": 165.8, "reward": 2.548499584197998, "reward_std": 0.09623967856168747, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5484995245933533, "rewards/GeoVisalEntityMatch2ORM/std": 0.18547573685646057, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1414, "train_speed(iter/s)": 0.025365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 416.53125, "completions/min_length": 360.0, "epoch": 0.16954229571052, "grad_norm": 1.1954744105041821, "kl": 0.766292929649353, "learning_rate": 9.373190706000834e-07, "loss": 0.000767642050050199, "memory(GiB)": 165.8, "reward": 2.5758514404296875, "reward_std": 0.16813430190086365, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.27783626317977905, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6591848731040955, "rewards/GeoVisalEntityMatch2ORM/std": 0.24588608741760254, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1415, "train_speed(iter/s)": 0.025369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 409.6458435058594, "completions/min_length": 369.0, "epoch": 0.16966211358734723, "grad_norm": 1.1647269427401086, "kl": 0.7889576852321625, "learning_rate": 9.372268718446256e-07, "loss": 0.0007894039154052734, "memory(GiB)": 165.8, "reward": 2.5001420974731445, "reward_std": 0.1130092442035675, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5168087482452393, "rewards/GeoVisalEntityMatch2ORM/std": 0.2506677806377411, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1416, "train_speed(iter/s)": 0.025374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10416666666666667, "completions/max_length": 487.0, "completions/mean_length": 415.88543701171875, "completions/min_length": 361.0, "epoch": 0.16978193146417445, "grad_norm": 1.4437348618782264, "kl": 2.0452443957328796, "learning_rate": 9.371346098718299e-07, "loss": 0.0020199716091156006, "memory(GiB)": 165.8, "reward": 2.6258931159973145, "reward_std": 0.313543438911438, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3070802092552185, "rewards/GeoVisalEntityMatch2ORM/mean": 0.834226131439209, "rewards/GeoVisalEntityMatch2ORM/std": 0.11449424922466278, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 1417, "train_speed(iter/s)": 0.025375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 418.75, "completions/min_length": 364.0, "epoch": 0.1699017493410017, "grad_norm": 1.2748612267517903, "kl": 0.7346176207065582, "learning_rate": 9.370422846950361e-07, "loss": 0.0007353152032010257, "memory(GiB)": 165.8, "reward": 2.582998514175415, "reward_std": 0.12110867351293564, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7746652364730835, "rewards/GeoVisalEntityMatch2ORM/std": 0.14229218661785126, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1418, "train_speed(iter/s)": 0.02538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 424.1458435058594, "completions/min_length": 376.0, "epoch": 0.1700215672178289, "grad_norm": 0.8036761620918358, "kl": 0.7100772857666016, "learning_rate": 9.36949896327593e-07, "loss": 0.0013077458133921027, "memory(GiB)": 165.8, "reward": 2.840277671813965, "reward_std": 0.034831248223781586, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8402778506278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.11935770511627197, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1419, "train_speed(iter/s)": 0.025385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 465.0, "completions/mean_length": 409.0, "completions/min_length": 337.0, "epoch": 0.17014138509465612, "grad_norm": 1.1816237673262877, "kl": 0.7639248073101044, "learning_rate": 9.36857444782859e-07, "loss": 0.000764794647693634, "memory(GiB)": 165.8, "reward": 2.5247397422790527, "reward_std": 0.09349251538515091, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5330729484558105, "rewards/GeoVisalEntityMatch2ORM/std": 0.282248854637146, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1420, "train_speed(iter/s)": 0.025391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 431.94793701171875, "completions/min_length": 366.0, "epoch": 0.17026120297148334, "grad_norm": 1.1042483720465845, "kl": 0.7662049233913422, "learning_rate": 9.367649300742013e-07, "loss": 0.0007670993800275028, "memory(GiB)": 165.8, "reward": 2.6124134063720703, "reward_std": 0.11759600043296814, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.612413227558136, "rewards/GeoVisalEntityMatch2ORM/std": 0.18928147852420807, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1421, "train_speed(iter/s)": 0.025396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14583333333333334, "completions/max_length": 497.0, "completions/mean_length": 434.2395935058594, "completions/min_length": 385.0, "epoch": 0.17038102084831055, "grad_norm": 1.661518465371148, "kl": 2.3837312757968903, "learning_rate": 9.366723522149963e-07, "loss": 0.0023729330860078335, "memory(GiB)": 165.8, "reward": 2.38222074508667, "reward_std": 0.2985042333602905, "rewards/GeoLocAccuracyV2ORM/mean": 0.8541666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3547917604446411, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6738873720169067, "rewards/GeoVisalEntityMatch2ORM/std": 0.1150471568107605, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 1422, "train_speed(iter/s)": 0.025398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 471.0, "completions/mean_length": 408.3020935058594, "completions/min_length": 367.0, "epoch": 0.1705008387251378, "grad_norm": 1.2244546369378069, "kl": 0.7639439702033997, "learning_rate": 9.365797112186296e-07, "loss": 0.0007644295692443848, "memory(GiB)": 165.8, "reward": 2.2903409004211426, "reward_std": 0.07099588215351105, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7903409004211426, "rewards/GeoVisalEntityMatch2ORM/std": 0.19543200731277466, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1423, "train_speed(iter/s)": 0.025398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 497.0, "completions/mean_length": 442.35418701171875, "completions/min_length": 369.0, "epoch": 0.17062065660196502, "grad_norm": 1.1732261541385443, "kl": 0.7507740557193756, "learning_rate": 9.36487007098496e-07, "loss": 0.0007523000240325928, "memory(GiB)": 165.8, "reward": 2.524409055709839, "reward_std": 0.19143164157867432, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003034889698029, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5535756349563599, "rewards/GeoVisalEntityMatch2ORM/std": 0.14479230344295502, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1424, "train_speed(iter/s)": 0.025402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 472.0, "completions/mean_length": 424.7708435058594, "completions/min_length": 382.0, "epoch": 0.17074047447879223, "grad_norm": 1.1699590892488247, "kl": 0.8316620886325836, "learning_rate": 9.363942398679991e-07, "loss": 0.000830387114547193, "memory(GiB)": 165.8, "reward": 2.4343068599700928, "reward_std": 0.3973207175731659, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.36696958541870117, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7093066573143005, "rewards/GeoVisalEntityMatch2ORM/std": 0.16383948922157288, "rewards/MathFormat/mean": 0.9166666865348816, "rewards/MathFormat/std": 0.27783623337745667, "step": 1425, "train_speed(iter/s)": 0.025405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 443.8958435058594, "completions/min_length": 385.0, "epoch": 0.17086029235561945, "grad_norm": 1.1783655489419547, "kl": 0.7543862462043762, "learning_rate": 9.363014095405521e-07, "loss": 0.000755076645873487, "memory(GiB)": 165.8, "reward": 2.4846696853637695, "reward_std": 0.2319163680076599, "rewards/GeoLocAccuracyV2ORM/mean": 0.9395833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.21739082038402557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5555030107498169, "rewards/GeoVisalEntityMatch2ORM/std": 0.1390177309513092, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1426, "train_speed(iter/s)": 0.025409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/mean_length": 431.10418701171875, "completions/min_length": 390.0, "epoch": 0.1709801102324467, "grad_norm": 1.2170635462768236, "kl": 0.79318767786026, "learning_rate": 9.362085161295768e-07, "loss": 0.0007931118598207831, "memory(GiB)": 165.8, "reward": 2.672593832015991, "reward_std": 0.07154090702533722, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6725935935974121, "rewards/GeoVisalEntityMatch2ORM/std": 0.13953375816345215, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1427, "train_speed(iter/s)": 0.025415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 540.0, "completions/mean_length": 440.1458435058594, "completions/min_length": 385.0, "epoch": 0.1710999281092739, "grad_norm": 1.1232100185077845, "kl": 0.7161392569541931, "learning_rate": 9.361155596485045e-07, "loss": 0.0007176945800893009, "memory(GiB)": 165.8, "reward": 2.730208396911621, "reward_std": 0.1686803102493286, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7510417699813843, "rewards/GeoVisalEntityMatch2ORM/std": 0.21584577858448029, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1428, "train_speed(iter/s)": 0.025419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 493.0, "completions/mean_length": 420.66668701171875, "completions/min_length": 368.0, "epoch": 0.17121974598610112, "grad_norm": 1.3247929427658767, "kl": 0.7633887827396393, "learning_rate": 9.360225401107755e-07, "loss": 0.0007647375459782779, "memory(GiB)": 165.8, "reward": 2.183854103088379, "reward_std": 0.10860833525657654, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6838542222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.16740979254245758, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1429, "train_speed(iter/s)": 0.025419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 437.21875, "completions/min_length": 384.0, "epoch": 0.17133956386292834, "grad_norm": 1.2129356885725904, "kl": 0.6960363388061523, "learning_rate": 9.359294575298394e-07, "loss": 0.0006974290008656681, "memory(GiB)": 165.8, "reward": 2.757606029510498, "reward_std": 0.09142500907182693, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7576059103012085, "rewards/GeoVisalEntityMatch2ORM/std": 0.10732242465019226, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1430, "train_speed(iter/s)": 0.025424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 424.91668701171875, "completions/min_length": 364.0, "epoch": 0.17145938173975558, "grad_norm": 1.1718014176076426, "kl": 0.7660209536552429, "learning_rate": 9.358363119191543e-07, "loss": 0.0007663121214136481, "memory(GiB)": 165.8, "reward": 2.755927801132202, "reward_std": 0.16300229728221893, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7767609357833862, "rewards/GeoVisalEntityMatch2ORM/std": 0.22124071419239044, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1431, "train_speed(iter/s)": 0.025429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 441.0833435058594, "completions/min_length": 381.0, "epoch": 0.1715791996165828, "grad_norm": 1.0922135989127695, "kl": 0.6990582346916199, "learning_rate": 9.357431032921881e-07, "loss": 0.0007000919431447983, "memory(GiB)": 165.8, "reward": 2.78511905670166, "reward_std": 0.1699783056974411, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8059524297714233, "rewards/GeoVisalEntityMatch2ORM/std": 0.11474376171827316, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1432, "train_speed(iter/s)": 0.025434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 506.0, "completions/mean_length": 432.41668701171875, "completions/min_length": 377.0, "epoch": 0.17169901749341002, "grad_norm": 0.7928155961709257, "kl": 0.7081745862960815, "learning_rate": 9.356498316624177e-07, "loss": 0.0013035163283348083, "memory(GiB)": 165.8, "reward": 2.653571605682373, "reward_std": 0.20447321236133575, "rewards/GeoLocAccuracyV2ORM/mean": 0.9541666507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.19783920049667358, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7202380895614624, "rewards/GeoVisalEntityMatch2ORM/std": 0.31900832056999207, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1433, "train_speed(iter/s)": 0.025439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 426.5833435058594, "completions/min_length": 378.0, "epoch": 0.17181883537023723, "grad_norm": 1.173866741741328, "kl": 0.783248096704483, "learning_rate": 9.355564970433287e-07, "loss": 0.0007853160495869815, "memory(GiB)": 165.8, "reward": 2.6245040893554688, "reward_std": 0.14117205142974854, "rewards/GeoLocAccuracyV2ORM/mean": 0.8333333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.3265986144542694, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7911707162857056, "rewards/GeoVisalEntityMatch2ORM/std": 0.15548986196517944, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1434, "train_speed(iter/s)": 0.025444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 494.0, "completions/mean_length": 426.91668701171875, "completions/min_length": 369.0, "epoch": 0.17193865324706445, "grad_norm": 1.178477405913868, "kl": 0.8239186406135559, "learning_rate": 9.354630994484162e-07, "loss": 0.0008248090744018555, "memory(GiB)": 165.8, "reward": 2.254216194152832, "reward_std": 0.2976590394973755, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.4120977520942688, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6792162656784058, "rewards/GeoVisalEntityMatch2ORM/std": 0.18044839799404144, "rewards/MathFormat/mean": 0.7916666865348816, "rewards/MathFormat/std": 0.40824827551841736, "step": 1435, "train_speed(iter/s)": 0.025447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 508.0, "completions/mean_length": 453.7395935058594, "completions/min_length": 375.0, "epoch": 0.1720584711238917, "grad_norm": 0.9205400880070791, "kl": 0.7086577713489532, "learning_rate": 9.353696388911845e-07, "loss": 0.0007088457932695746, "memory(GiB)": 165.8, "reward": 2.6006076335906982, "reward_std": 0.21533964574337006, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6631076335906982, "rewards/GeoVisalEntityMatch2ORM/std": 0.20698244869709015, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 1436, "train_speed(iter/s)": 0.025451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 406.5833435058594, "completions/min_length": 353.0, "epoch": 0.1721782890007189, "grad_norm": 1.1573690097594977, "kl": 0.7539483606815338, "learning_rate": 9.352761153851466e-07, "loss": 0.0007559533114545047, "memory(GiB)": 165.8, "reward": 2.7150301933288574, "reward_std": 0.06682602316141129, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7150297164916992, "rewards/GeoVisalEntityMatch2ORM/std": 0.22581154108047485, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1437, "train_speed(iter/s)": 0.025457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 418.8020935058594, "completions/min_length": 347.0, "epoch": 0.17229810687754613, "grad_norm": 0.9826332511724436, "kl": 0.7099397778511047, "learning_rate": 9.351825289438245e-07, "loss": 0.0007108151912689209, "memory(GiB)": 165.8, "reward": 2.295684576034546, "reward_std": 0.1915547400712967, "rewards/GeoLocAccuracyV2ORM/mean": 0.7104166746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.4153575897216797, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5852679014205933, "rewards/GeoVisalEntityMatch2ORM/std": 0.24970117211341858, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1438, "train_speed(iter/s)": 0.025461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3020833333333333, "completions/max_length": 487.0, "completions/mean_length": 422.5520935058594, "completions/min_length": 365.0, "epoch": 0.17241792475437334, "grad_norm": 0.9893955705791185, "kl": 0.7783386707305908, "learning_rate": 9.350888795807499e-07, "loss": 0.0007776891579851508, "memory(GiB)": 165.8, "reward": 2.1744790077209473, "reward_std": 0.38504695892333984, "rewards/GeoLocAccuracyV2ORM/mean": 0.6979166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.46157148480415344, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7786458730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.23086987435817719, "rewards/MathFormat/mean": 0.6979166865348816, "rewards/MathFormat/std": 0.46157148480415344, "step": 1439, "train_speed(iter/s)": 0.025462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 480.0, "completions/mean_length": 414.4270935058594, "completions/min_length": 366.0, "epoch": 0.17253774263120059, "grad_norm": 1.193396323389615, "kl": 0.7596202790737152, "learning_rate": 9.349951673094632e-07, "loss": 0.0007594352355226874, "memory(GiB)": 165.8, "reward": 2.3873512744903564, "reward_std": 0.3169859051704407, "rewards/GeoLocAccuracyV2ORM/mean": 0.8854166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3201904296875, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6165179014205933, "rewards/GeoVisalEntityMatch2ORM/std": 0.1896384358406067, "rewards/MathFormat/mean": 0.8854166865348816, "rewards/MathFormat/std": 0.3201904296875, "step": 1440, "train_speed(iter/s)": 0.025464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 461.0, "completions/mean_length": 411.51043701171875, "completions/min_length": 365.0, "epoch": 0.1726575605080278, "grad_norm": 1.1946914452321378, "kl": 0.799110472202301, "learning_rate": 9.349013921435141e-07, "loss": 0.0008013894548639655, "memory(GiB)": 165.8, "reward": 2.3000993728637695, "reward_std": 0.21172422170639038, "rewards/GeoLocAccuracyV2ORM/mean": 0.7749999761581421, "rewards/GeoLocAccuracyV2ORM/std": 0.3949683606624603, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6500992774963379, "rewards/GeoVisalEntityMatch2ORM/std": 0.10956843942403793, "rewards/MathFormat/mean": 0.875, "rewards/MathFormat/std": 0.33245500922203064, "step": 1441, "train_speed(iter/s)": 0.025465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 409.90625, "completions/min_length": 364.0, "epoch": 0.17277737838485502, "grad_norm": 1.0464536837841205, "kl": 0.6934177279472351, "learning_rate": 9.348075540964611e-07, "loss": 0.0006938924780115485, "memory(GiB)": 165.8, "reward": 2.5549604892730713, "reward_std": 0.26270782947540283, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6966270208358765, "rewards/GeoVisalEntityMatch2ORM/std": 0.191894993185997, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1442, "train_speed(iter/s)": 0.02547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 407.32293701171875, "completions/min_length": 338.0, "epoch": 0.17289719626168223, "grad_norm": 1.2865234495321223, "kl": 0.7912116050720215, "learning_rate": 9.347136531818721e-07, "loss": 0.0007925530662760139, "memory(GiB)": 165.8, "reward": 2.49415922164917, "reward_std": 0.11125215142965317, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6858259439468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.12961609661579132, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1443, "train_speed(iter/s)": 0.025475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 404.2395935058594, "completions/min_length": 368.0, "epoch": 0.17301701413850948, "grad_norm": 1.1886458165692149, "kl": 0.7325757741928101, "learning_rate": 9.346196894133238e-07, "loss": 0.0007344335317611694, "memory(GiB)": 165.8, "reward": 2.570241689682007, "reward_std": 0.16573981940746307, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6452414989471436, "rewards/GeoVisalEntityMatch2ORM/std": 0.3167569041252136, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1444, "train_speed(iter/s)": 0.025482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 394.28125, "completions/min_length": 348.0, "epoch": 0.1731368320153367, "grad_norm": 1.193706154554277, "kl": 0.7312528789043427, "learning_rate": 9.345256628044023e-07, "loss": 0.0007326118648052216, "memory(GiB)": 165.8, "reward": 2.569345235824585, "reward_std": 0.1358075588941574, "rewards/GeoLocAccuracyV2ORM/mean": 0.8250000476837158, "rewards/GeoLocAccuracyV2ORM/std": 0.33245500922203064, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7443453073501587, "rewards/GeoVisalEntityMatch2ORM/std": 0.12831833958625793, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1445, "train_speed(iter/s)": 0.025483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 395.19793701171875, "completions/min_length": 344.0, "epoch": 0.1732566498921639, "grad_norm": 1.006590289392037, "kl": 0.7514280378818512, "learning_rate": 9.344315733687027e-07, "loss": 0.0007530562579631805, "memory(GiB)": 165.8, "reward": 2.698474884033203, "reward_std": 0.059925101697444916, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6984747648239136, "rewards/GeoVisalEntityMatch2ORM/std": 0.27861613035202026, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1446, "train_speed(iter/s)": 0.02549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 402.09375, "completions/min_length": 351.0, "epoch": 0.17337646776899113, "grad_norm": 1.083777885057448, "kl": 0.7416415810585022, "learning_rate": 9.343374211198289e-07, "loss": 0.0007430246914736927, "memory(GiB)": 165.8, "reward": 2.6145834922790527, "reward_std": 0.06569784134626389, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6145833730697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.1062254011631012, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1447, "train_speed(iter/s)": 0.025494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 402.32293701171875, "completions/min_length": 340.0, "epoch": 0.17349628564581834, "grad_norm": 1.1048964491517625, "kl": 0.749098151922226, "learning_rate": 9.342432060713941e-07, "loss": 0.0007519039209000766, "memory(GiB)": 165.8, "reward": 2.698489189147949, "reward_std": 0.07958817481994629, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6984890699386597, "rewards/GeoVisalEntityMatch2ORM/std": 0.15996117889881134, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1448, "train_speed(iter/s)": 0.025499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 406.3645935058594, "completions/min_length": 355.0, "epoch": 0.1736161035226456, "grad_norm": 0.7960642928619655, "kl": 0.745891660451889, "learning_rate": 9.341489282370207e-07, "loss": 0.0007468561525456607, "memory(GiB)": 165.8, "reward": 2.809194564819336, "reward_std": 0.029252339154481888, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8091947436332703, "rewards/GeoVisalEntityMatch2ORM/std": 0.18529027700424194, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1449, "train_speed(iter/s)": 0.025504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 402.09375, "completions/min_length": 352.0, "epoch": 0.1737359213994728, "grad_norm": 0.9825242601612401, "kl": 0.7397292852401733, "learning_rate": 9.340545876303401e-07, "loss": 0.0007407466764561832, "memory(GiB)": 165.8, "reward": 2.797309398651123, "reward_std": 0.0663740336894989, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7973091006278992, "rewards/GeoVisalEntityMatch2ORM/std": 0.1336372196674347, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1450, "train_speed(iter/s)": 0.02551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 417.7708435058594, "completions/min_length": 372.0, "epoch": 0.17385573927630002, "grad_norm": 1.083684126926535, "kl": 0.7611185312271118, "learning_rate": 9.339601842649924e-07, "loss": 0.0007621223921887577, "memory(GiB)": 165.8, "reward": 2.6880455017089844, "reward_std": 0.26905274391174316, "rewards/GeoLocAccuracyV2ORM/mean": 0.9229166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.24169538915157318, "rewards/GeoVisalEntityMatch2ORM/mean": 0.775545597076416, "rewards/GeoVisalEntityMatch2ORM/std": 0.1946517527103424, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1451, "train_speed(iter/s)": 0.025514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 412.54168701171875, "completions/min_length": 369.0, "epoch": 0.17397555715312724, "grad_norm": 1.2327553860732468, "kl": 0.7486860454082489, "learning_rate": 9.338657181546277e-07, "loss": 0.0007495532627217472, "memory(GiB)": 165.8, "reward": 2.5083088874816895, "reward_std": 0.11558254063129425, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5083085298538208, "rewards/GeoVisalEntityMatch2ORM/std": 0.15339303016662598, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1452, "train_speed(iter/s)": 0.025519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 413.53125, "completions/min_length": 360.0, "epoch": 0.17409537502995448, "grad_norm": 1.119080422743739, "kl": 0.7679162323474884, "learning_rate": 9.337711893129039e-07, "loss": 0.0007691408391110599, "memory(GiB)": 165.8, "reward": 2.369295835494995, "reward_std": 0.07904098927974701, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6192957162857056, "rewards/GeoVisalEntityMatch2ORM/std": 0.30417656898498535, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1453, "train_speed(iter/s)": 0.025524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 403.76043701171875, "completions/min_length": 356.0, "epoch": 0.1742151929067817, "grad_norm": 1.2713579091418783, "kl": 0.7472270727157593, "learning_rate": 9.33676597753489e-07, "loss": 0.0007475254824385047, "memory(GiB)": 165.8, "reward": 2.70491099357605, "reward_std": 0.09298691153526306, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7049107551574707, "rewards/GeoVisalEntityMatch2ORM/std": 0.18788297474384308, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1454, "train_speed(iter/s)": 0.025531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 427.78125, "completions/min_length": 381.0, "epoch": 0.1743350107836089, "grad_norm": 1.2104119104564373, "kl": 0.7251640558242798, "learning_rate": 9.335819434900598e-07, "loss": 0.0007255077362060547, "memory(GiB)": 165.8, "reward": 2.6987266540527344, "reward_std": 0.08544151484966278, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6987268328666687, "rewards/GeoVisalEntityMatch2ORM/std": 0.12569618225097656, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1455, "train_speed(iter/s)": 0.025536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 421.85418701171875, "completions/min_length": 380.0, "epoch": 0.17445482866043613, "grad_norm": 0.9301856954264438, "kl": 0.7065258026123047, "learning_rate": 9.334872265363018e-07, "loss": 0.0007061784854158759, "memory(GiB)": 165.8, "reward": 2.672917127609253, "reward_std": 0.08281442523002625, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6812499761581421, "rewards/GeoVisalEntityMatch2ORM/std": 0.18835385143756866, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1456, "train_speed(iter/s)": 0.025543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 486.0, "completions/mean_length": 418.76043701171875, "completions/min_length": 369.0, "epoch": 0.17457464653726337, "grad_norm": 1.1836899712502176, "kl": 0.7592629194259644, "learning_rate": 9.333924469059103e-07, "loss": 0.0007614145870320499, "memory(GiB)": 165.8, "reward": 2.5845553874969482, "reward_std": 0.07825113087892532, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5845553874969482, "rewards/GeoVisalEntityMatch2ORM/std": 0.12712745368480682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1457, "train_speed(iter/s)": 0.025548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 398.34375, "completions/min_length": 339.0, "epoch": 0.1746944644140906, "grad_norm": 0.782546916657475, "kl": 0.6937177181243896, "learning_rate": 9.332976046125885e-07, "loss": 0.0006945996428839862, "memory(GiB)": 165.8, "reward": 2.6755123138427734, "reward_std": 0.030887287110090256, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6755125522613525, "rewards/GeoVisalEntityMatch2ORM/std": 0.22254134714603424, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1458, "train_speed(iter/s)": 0.025549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 410.96875, "completions/min_length": 356.0, "epoch": 0.1748142822909178, "grad_norm": 1.1602042061722582, "kl": 0.7669640183448792, "learning_rate": 9.3320269967005e-07, "loss": 0.0007681946153752506, "memory(GiB)": 165.8, "reward": 2.6776175498962402, "reward_std": 0.08681400120258331, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6776175498962402, "rewards/GeoVisalEntityMatch2ORM/std": 0.1373373419046402, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1459, "train_speed(iter/s)": 0.025554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 400.75, "completions/min_length": 345.0, "epoch": 0.17493410016774502, "grad_norm": 0.9810307964476694, "kl": 0.7405024468898773, "learning_rate": 9.331077320920166e-07, "loss": 0.001337103545665741, "memory(GiB)": 165.8, "reward": 2.7310519218444824, "reward_std": 0.06047177314758301, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7310516238212585, "rewards/GeoVisalEntityMatch2ORM/std": 0.12139730900526047, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1460, "train_speed(iter/s)": 0.025559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 471.0, "completions/mean_length": 403.7708435058594, "completions/min_length": 347.0, "epoch": 0.17505391804457224, "grad_norm": 1.1667172572498057, "kl": 0.7704333961009979, "learning_rate": 9.330127018922193e-07, "loss": 0.000771448016166687, "memory(GiB)": 165.8, "reward": 2.065769672393799, "reward_std": 0.3011453151702881, "rewards/GeoLocAccuracyV2ORM/mean": 0.6145833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.45351937413215637, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6699363589286804, "rewards/GeoVisalEntityMatch2ORM/std": 0.2092616707086563, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687689781189, "step": 1461, "train_speed(iter/s)": 0.02556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 407.01043701171875, "completions/min_length": 356.0, "epoch": 0.17517373592139948, "grad_norm": 1.149702262185882, "kl": 0.7277829647064209, "learning_rate": 9.329176090843983e-07, "loss": 0.0007298340788111091, "memory(GiB)": 165.8, "reward": 2.390625, "reward_std": 0.08460237085819244, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.640625, "rewards/GeoVisalEntityMatch2ORM/std": 0.20823770761489868, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1462, "train_speed(iter/s)": 0.025565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 405.59375, "completions/min_length": 361.0, "epoch": 0.1752935537982267, "grad_norm": 1.2581331012764998, "kl": 0.7309393882751465, "learning_rate": 9.328224536823028e-07, "loss": 0.0007311254739761353, "memory(GiB)": 165.8, "reward": 2.1813244819641113, "reward_std": 0.07857296615839005, "rewards/GeoLocAccuracyV2ORM/mean": 0.550000011920929, "rewards/GeoLocAccuracyV2ORM/std": 0.4579128921031952, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6313244104385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.12778092920780182, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1463, "train_speed(iter/s)": 0.025572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 400.0208435058594, "completions/min_length": 346.0, "epoch": 0.17541337167505391, "grad_norm": 0.93809130034312, "kl": 0.7619942128658295, "learning_rate": 9.327272356996911e-07, "loss": 0.0007624874706380069, "memory(GiB)": 165.8, "reward": 2.5211806297302246, "reward_std": 0.18159298598766327, "rewards/GeoLocAccuracyV2ORM/mean": 0.9395833015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.21739082038402557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5920138955116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.3008151948451996, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1464, "train_speed(iter/s)": 0.025578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 406.6458435058594, "completions/min_length": 363.0, "epoch": 0.17553318955188113, "grad_norm": 1.120298602943407, "kl": 0.7579326033592224, "learning_rate": 9.326319551503303e-07, "loss": 0.0007585212588310242, "memory(GiB)": 165.8, "reward": 2.475334882736206, "reward_std": 0.1091923788189888, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4753348231315613, "rewards/GeoVisalEntityMatch2ORM/std": 0.13819953799247742, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1465, "train_speed(iter/s)": 0.025583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/mean_length": 417.16668701171875, "completions/min_length": 377.0, "epoch": 0.17565300742870837, "grad_norm": 1.1781384060794848, "kl": 0.7180923819541931, "learning_rate": 9.325366120479969e-07, "loss": 0.0007197409868240356, "memory(GiB)": 165.8, "reward": 2.39787220954895, "reward_std": 0.06774424016475677, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3978721797466278, "rewards/GeoVisalEntityMatch2ORM/std": 0.07568991929292679, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1466, "train_speed(iter/s)": 0.025588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 414.0, "completions/min_length": 366.0, "epoch": 0.1757728253055356, "grad_norm": 1.1247432201029823, "kl": 0.7747445106506348, "learning_rate": 9.324412064064763e-07, "loss": 0.00077781081199646, "memory(GiB)": 165.8, "reward": 2.7513890266418457, "reward_std": 0.11450827121734619, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.761805534362793, "rewards/GeoVisalEntityMatch2ORM/std": 0.1635885387659073, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1467, "train_speed(iter/s)": 0.025595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 430.0, "completions/mean_length": 382.8020935058594, "completions/min_length": 333.0, "epoch": 0.1758926431823628, "grad_norm": 1.1545462142090401, "kl": 1.0454140305519104, "learning_rate": 9.323457382395628e-07, "loss": 0.0010432973504066467, "memory(GiB)": 165.8, "reward": 2.093120574951172, "reward_std": 0.40285080671310425, "rewards/GeoLocAccuracyV2ORM/mean": 0.6812500357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.43680959939956665, "rewards/GeoVisalEntityMatch2ORM/mean": 0.578537106513977, "rewards/GeoVisalEntityMatch2ORM/std": 0.16023968160152435, "rewards/MathFormat/mean": 0.8333333730697632, "rewards/MathFormat/std": 0.374634325504303, "step": 1468, "train_speed(iter/s)": 0.025595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 410.5833435058594, "completions/min_length": 360.0, "epoch": 0.17601246105919002, "grad_norm": 1.1059300697366614, "kl": 0.7266902923583984, "learning_rate": 9.322502075610599e-07, "loss": 0.0007270897622220218, "memory(GiB)": 165.8, "reward": 2.6242189407348633, "reward_std": 0.12023280560970306, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.624218761920929, "rewards/GeoVisalEntityMatch2ORM/std": 0.15687966346740723, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1469, "train_speed(iter/s)": 0.025598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 430.0, "completions/mean_length": 377.28125, "completions/min_length": 341.0, "epoch": 0.17613227893601727, "grad_norm": 1.2086586852421322, "kl": 0.7325466275215149, "learning_rate": 9.3215461438478e-07, "loss": 0.0007323188474401832, "memory(GiB)": 165.8, "reward": 2.4187211990356445, "reward_std": 0.2827132046222687, "rewards/GeoLocAccuracyV2ORM/mean": 0.7958333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.35656747221946716, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6645544171333313, "rewards/GeoVisalEntityMatch2ORM/std": 0.10932611674070358, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 1470, "train_speed(iter/s)": 0.025599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 391.57293701171875, "completions/min_length": 345.0, "epoch": 0.17625209681284448, "grad_norm": 0.8223745484861045, "kl": 0.6939631402492523, "learning_rate": 9.320589587245449e-07, "loss": 0.0006964417989365757, "memory(GiB)": 165.8, "reward": 2.8687169551849365, "reward_std": 0.037450458854436874, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8687168955802917, "rewards/GeoVisalEntityMatch2ORM/std": 0.15145358443260193, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1471, "train_speed(iter/s)": 0.025604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 377.57293701171875, "completions/min_length": 329.0, "epoch": 0.1763719146896717, "grad_norm": 1.2109896311551316, "kl": 0.7790753841400146, "learning_rate": 9.319632405941849e-07, "loss": 0.0007800410385243595, "memory(GiB)": 165.8, "reward": 2.6866989135742188, "reward_std": 0.08956015110015869, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6866987943649292, "rewards/GeoVisalEntityMatch2ORM/std": 0.14145515859127045, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1472, "train_speed(iter/s)": 0.025609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 477.0, "completions/mean_length": 398.4583435058594, "completions/min_length": 341.0, "epoch": 0.17649173256649892, "grad_norm": 1.1228279397793766, "kl": 0.7672972083091736, "learning_rate": 9.318674600075398e-07, "loss": 0.0007688999176025391, "memory(GiB)": 165.8, "reward": 2.5546298027038574, "reward_std": 0.10661262273788452, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5712963342666626, "rewards/GeoVisalEntityMatch2ORM/std": 0.14676646888256073, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1473, "train_speed(iter/s)": 0.025615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 379.0833435058594, "completions/min_length": 334.0, "epoch": 0.17661155044332613, "grad_norm": 1.293820274537545, "kl": 0.7379516661167145, "learning_rate": 9.317716169784582e-07, "loss": 0.0007390528917312622, "memory(GiB)": 165.8, "reward": 2.4375, "reward_std": 0.08699092268943787, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6875, "rewards/GeoVisalEntityMatch2ORM/std": 0.1681947559118271, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1474, "train_speed(iter/s)": 0.025618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.0, "completions/mean_length": 396.07293701171875, "completions/min_length": 353.0, "epoch": 0.17673136832015338, "grad_norm": 1.1101873234154227, "kl": 0.7422748506069183, "learning_rate": 9.316757115207975e-07, "loss": 0.0007430911064147949, "memory(GiB)": 165.8, "reward": 2.5737271308898926, "reward_std": 0.0779281035065651, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5737268924713135, "rewards/GeoVisalEntityMatch2ORM/std": 0.11529120057821274, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1475, "train_speed(iter/s)": 0.025623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 370.4270935058594, "completions/min_length": 323.0, "epoch": 0.1768511861969806, "grad_norm": 1.2928081991540499, "kl": 0.7637367844581604, "learning_rate": 9.315797436484247e-07, "loss": 0.0007659420371055603, "memory(GiB)": 165.8, "reward": 2.704166889190674, "reward_std": 0.0941455215215683, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7125000357627869, "rewards/GeoVisalEntityMatch2ORM/std": 0.0977225974202156, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1476, "train_speed(iter/s)": 0.025624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3958333333333333, "completions/max_length": 392.0, "completions/mean_length": 365.32293701171875, "completions/min_length": 331.0, "epoch": 0.1769710040738078, "grad_norm": 1.1110178214626931, "kl": 1.120056837797165, "learning_rate": 9.314837133752154e-07, "loss": 0.0011194547405466437, "memory(GiB)": 165.8, "reward": 1.9670140743255615, "reward_std": 0.4992724061012268, "rewards/GeoLocAccuracyV2ORM/mean": 0.581250011920929, "rewards/GeoLocAccuracyV2ORM/std": 0.4891049861907959, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7711805701255798, "rewards/GeoVisalEntityMatch2ORM/std": 0.15326809883117676, "rewards/MathFormat/mean": 0.6145833730697632, "rewards/MathFormat/std": 0.48924845457077026, "step": 1477, "train_speed(iter/s)": 0.025624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 392.54168701171875, "completions/min_length": 355.0, "epoch": 0.17709082195063502, "grad_norm": 1.1264174763827508, "kl": 0.7663790881633759, "learning_rate": 9.313876207150542e-07, "loss": 0.0007679661503061652, "memory(GiB)": 165.8, "reward": 2.578145742416382, "reward_std": 0.07615865021944046, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5781456828117371, "rewards/GeoVisalEntityMatch2ORM/std": 0.26435232162475586, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1478, "train_speed(iter/s)": 0.025629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 384.09375, "completions/min_length": 346.0, "epoch": 0.17721063982746227, "grad_norm": 1.0045999198238647, "kl": 0.7454087436199188, "learning_rate": 9.312914656818351e-07, "loss": 0.0007474621525034308, "memory(GiB)": 165.8, "reward": 2.810908794403076, "reward_std": 0.046702153980731964, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8109085559844971, "rewards/GeoVisalEntityMatch2ORM/std": 0.15231014788150787, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1479, "train_speed(iter/s)": 0.025634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 379.34375, "completions/min_length": 345.0, "epoch": 0.17733045770428948, "grad_norm": 1.1540033745565759, "kl": 0.7531791031360626, "learning_rate": 9.311952482894607e-07, "loss": 0.0007550939917564392, "memory(GiB)": 165.8, "reward": 2.619365692138672, "reward_std": 0.19009771943092346, "rewards/GeoLocAccuracyV2ORM/mean": 0.9249999523162842, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6943655014038086, "rewards/GeoVisalEntityMatch2ORM/std": 0.20741266012191772, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1480, "train_speed(iter/s)": 0.025639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 391.78125, "completions/min_length": 343.0, "epoch": 0.1774502755811167, "grad_norm": 0.9181619284740898, "kl": 0.7053283751010895, "learning_rate": 9.310989685518428e-07, "loss": 0.0007052223081700504, "memory(GiB)": 165.8, "reward": 2.5343503952026367, "reward_std": 0.06287337094545364, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5343502163887024, "rewards/GeoVisalEntityMatch2ORM/std": 0.3283066749572754, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1481, "train_speed(iter/s)": 0.025644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 423.0, "completions/mean_length": 385.13543701171875, "completions/min_length": 334.0, "epoch": 0.17757009345794392, "grad_norm": 1.1495585458772706, "kl": 0.7658005654811859, "learning_rate": 9.310026264829025e-07, "loss": 0.0007668038597330451, "memory(GiB)": 165.8, "reward": 2.554811477661133, "reward_std": 0.08777008205652237, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5548115372657776, "rewards/GeoVisalEntityMatch2ORM/std": 0.14815247058868408, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1482, "train_speed(iter/s)": 0.025649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 443.0, "completions/mean_length": 396.38543701171875, "completions/min_length": 357.0, "epoch": 0.17768991133477116, "grad_norm": 1.1134119868093464, "kl": 0.8112735450267792, "learning_rate": 9.309062220965691e-07, "loss": 0.0008125007152557373, "memory(GiB)": 165.8, "reward": 2.2196223735809326, "reward_std": 0.14786286652088165, "rewards/GeoLocAccuracyV2ORM/mean": 0.7166666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.4454841613769531, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7529555559158325, "rewards/GeoVisalEntityMatch2ORM/std": 0.14496609568595886, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1483, "train_speed(iter/s)": 0.025648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 459.0, "completions/mean_length": 402.32293701171875, "completions/min_length": 351.0, "epoch": 0.17780972921159838, "grad_norm": 1.1252624063423238, "kl": 0.8356106579303741, "learning_rate": 9.308097554067817e-07, "loss": 0.0008335299789905548, "memory(GiB)": 165.8, "reward": 2.65023136138916, "reward_std": 0.2665526270866394, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7127315402030945, "rewards/GeoVisalEntityMatch2ORM/std": 0.17684248089790344, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490598559379578, "step": 1484, "train_speed(iter/s)": 0.025649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 397.69793701171875, "completions/min_length": 356.0, "epoch": 0.1779295470884256, "grad_norm": 1.0083006477231162, "kl": 0.788876473903656, "learning_rate": 9.307132264274882e-07, "loss": 0.0007914276793599129, "memory(GiB)": 165.8, "reward": 2.782407522201538, "reward_std": 0.05025249719619751, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7824074625968933, "rewards/GeoVisalEntityMatch2ORM/std": 0.11406287550926208, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1485, "train_speed(iter/s)": 0.025654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 397.6770935058594, "completions/min_length": 353.0, "epoch": 0.1780493649652528, "grad_norm": 0.979846413981384, "kl": 0.7690758407115936, "learning_rate": 9.306166351726452e-07, "loss": 0.0007700473070144653, "memory(GiB)": 165.8, "reward": 2.5655879974365234, "reward_std": 0.1243671327829361, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.590587854385376, "rewards/GeoVisalEntityMatch2ORM/std": 0.15259522199630737, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1486, "train_speed(iter/s)": 0.025655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 386.7083435058594, "completions/min_length": 320.0, "epoch": 0.17816918284208003, "grad_norm": 1.1199280739029573, "kl": 0.7711856365203857, "learning_rate": 9.305199816562189e-07, "loss": 0.0007726385956630111, "memory(GiB)": 165.8, "reward": 2.796086072921753, "reward_std": 0.08349750936031342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7960858345031738, "rewards/GeoVisalEntityMatch2ORM/std": 0.1700616478919983, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1487, "train_speed(iter/s)": 0.025659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 389.34375, "completions/min_length": 345.0, "epoch": 0.17828900071890727, "grad_norm": 1.0184121267988218, "kl": 0.7922171354293823, "learning_rate": 9.304232658921837e-07, "loss": 0.0007928311824798584, "memory(GiB)": 165.8, "reward": 2.671354293823242, "reward_std": 0.06571009755134583, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6713541746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.2512856125831604, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1488, "train_speed(iter/s)": 0.025664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 450.0, "completions/mean_length": 396.3125, "completions/min_length": 355.0, "epoch": 0.1784088185957345, "grad_norm": 1.1905968116447578, "kl": 0.8419474959373474, "learning_rate": 9.303264878945239e-07, "loss": 0.0008426979184150696, "memory(GiB)": 165.8, "reward": 2.193634510040283, "reward_std": 0.14468948543071747, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906978726387024, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6728009581565857, "rewards/GeoVisalEntityMatch2ORM/std": 0.10459078103303909, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906978726387024, "step": 1489, "train_speed(iter/s)": 0.025664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 459.0, "completions/mean_length": 398.40625, "completions/min_length": 353.0, "epoch": 0.1785286364725617, "grad_norm": 1.1635405978311473, "kl": 0.7437747120857239, "learning_rate": 9.302296476772321e-07, "loss": 0.000743697106372565, "memory(GiB)": 165.8, "reward": 2.120192527770996, "reward_std": 0.11068667471408844, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.435285747051239, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6201923489570618, "rewards/GeoVisalEntityMatch2ORM/std": 0.1674775779247284, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.435285747051239, "step": 1490, "train_speed(iter/s)": 0.025664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 395.8020935058594, "completions/min_length": 328.0, "epoch": 0.17864845434938892, "grad_norm": 1.1350929855637362, "kl": 0.7429419457912445, "learning_rate": 9.301327452543102e-07, "loss": 0.0007434263825416565, "memory(GiB)": 165.8, "reward": 2.722201347351074, "reward_std": 0.06672251969575882, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7222012281417847, "rewards/GeoVisalEntityMatch2ORM/std": 0.20508719980716705, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1491, "train_speed(iter/s)": 0.025668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 400.15625, "completions/min_length": 337.0, "epoch": 0.17876827222621616, "grad_norm": 1.1470621989868495, "kl": 0.7786243259906769, "learning_rate": 9.300357806397689e-07, "loss": 0.0007800832390785217, "memory(GiB)": 165.8, "reward": 2.4869792461395264, "reward_std": 0.18705317378044128, "rewards/GeoLocAccuracyV2ORM/mean": 0.7750000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.36157551407814026, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7119791507720947, "rewards/GeoVisalEntityMatch2ORM/std": 0.11211516708135605, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1492, "train_speed(iter/s)": 0.025674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 404.29168701171875, "completions/min_length": 370.0, "epoch": 0.17888809010304338, "grad_norm": 0.9979929472555419, "kl": 0.7502643764019012, "learning_rate": 9.299387538476283e-07, "loss": 0.0007515450706705451, "memory(GiB)": 165.8, "reward": 2.85486102104187, "reward_std": 0.0576217919588089, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8548611402511597, "rewards/GeoVisalEntityMatch2ORM/std": 0.19106630980968475, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1493, "train_speed(iter/s)": 0.025678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 387.8958435058594, "completions/min_length": 350.0, "epoch": 0.1790079079798706, "grad_norm": 0.8499116005693098, "kl": 0.7773120403289795, "learning_rate": 9.298416648919167e-07, "loss": 0.0007777487626299262, "memory(GiB)": 165.8, "reward": 2.7689733505249023, "reward_std": 0.05911140888929367, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7689732909202576, "rewards/GeoVisalEntityMatch2ORM/std": 0.2579536736011505, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1494, "train_speed(iter/s)": 0.02568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 389.90625, "completions/min_length": 333.0, "epoch": 0.1791277258566978, "grad_norm": 0.9451085396509177, "kl": 0.7142197787761688, "learning_rate": 9.297445137866725e-07, "loss": 0.0007154023041948676, "memory(GiB)": 165.8, "reward": 2.6341147422790527, "reward_std": 0.06871434301137924, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6341146230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.19066746532917023, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1495, "train_speed(iter/s)": 0.025686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 410.5520935058594, "completions/min_length": 366.0, "epoch": 0.17924754373352506, "grad_norm": 1.1310440520100487, "kl": 0.8169895112514496, "learning_rate": 9.296473005459422e-07, "loss": 0.0008186350460164249, "memory(GiB)": 165.8, "reward": 2.648611068725586, "reward_std": 0.13145802915096283, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6652778387069702, "rewards/GeoVisalEntityMatch2ORM/std": 0.12047548592090607, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1496, "train_speed(iter/s)": 0.025691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.0, "completions/mean_length": 391.8645935058594, "completions/min_length": 346.0, "epoch": 0.17936736161035227, "grad_norm": 1.1330601814774728, "kl": 0.7255091667175293, "learning_rate": 9.295500251837816e-07, "loss": 0.0007259771227836609, "memory(GiB)": 165.8, "reward": 2.7184524536132812, "reward_std": 0.17570756375789642, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490598559379578, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7497023940086365, "rewards/GeoVisalEntityMatch2ORM/std": 0.15972894430160522, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1497, "train_speed(iter/s)": 0.025696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 425.34375, "completions/min_length": 386.0, "epoch": 0.1794871794871795, "grad_norm": 1.059562738524262, "kl": 0.7470121085643768, "learning_rate": 9.294526877142555e-07, "loss": 0.0007473429432138801, "memory(GiB)": 165.8, "reward": 2.5500290393829346, "reward_std": 0.1669853776693344, "rewards/GeoLocAccuracyV2ORM/mean": 0.9250000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.23440854251384735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6250289678573608, "rewards/GeoVisalEntityMatch2ORM/std": 0.17543341219425201, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1498, "train_speed(iter/s)": 0.025701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 400.10418701171875, "completions/min_length": 360.0, "epoch": 0.1796069973640067, "grad_norm": 1.0013394427161122, "kl": 0.7798360586166382, "learning_rate": 9.293552881514377e-07, "loss": 0.0007814317941665649, "memory(GiB)": 165.8, "reward": 2.6916255950927734, "reward_std": 0.15273499488830566, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416666030883789, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7499586939811707, "rewards/GeoVisalEntityMatch2ORM/std": 0.23387852311134338, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1499, "train_speed(iter/s)": 0.025706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 403.82293701171875, "completions/min_length": 367.0, "epoch": 0.17972681524083392, "grad_norm": 1.129725912539559, "kl": 0.7442188858985901, "learning_rate": 9.292578265094107e-07, "loss": 0.0007460688939318061, "memory(GiB)": 165.8, "reward": 2.652660846710205, "reward_std": 0.12489677965641022, "rewards/GeoLocAccuracyV2ORM/mean": 0.981249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.13003036379814148, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6714105606079102, "rewards/GeoVisalEntityMatch2ORM/std": 0.17271505296230316, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1500, "train_speed(iter/s)": 0.025713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 396.90625, "completions/min_length": 348.0, "epoch": 0.17984663311766116, "grad_norm": 1.1601022966354533, "kl": 0.7617154121398926, "learning_rate": 9.291603028022663e-07, "loss": 0.0007604460115544498, "memory(GiB)": 165.8, "reward": 2.7974536418914795, "reward_std": 0.08655793964862823, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.797453761100769, "rewards/GeoVisalEntityMatch2ORM/std": 0.14950190484523773, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1501, "train_speed(iter/s)": 0.025718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 411.9375, "completions/min_length": 370.0, "epoch": 0.17996645099448838, "grad_norm": 1.1632131396448893, "kl": 0.7750996947288513, "learning_rate": 9.290627170441053e-07, "loss": 0.0007758935680612922, "memory(GiB)": 165.8, "reward": 2.6382155418395996, "reward_std": 0.15541991591453552, "rewards/GeoLocAccuracyV2ORM/mean": 0.8666666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.29970744252204895, "rewards/GeoVisalEntityMatch2ORM/mean": 0.771548867225647, "rewards/GeoVisalEntityMatch2ORM/std": 0.2276672124862671, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1502, "train_speed(iter/s)": 0.025723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/mean_length": 426.19793701171875, "completions/min_length": 366.0, "epoch": 0.1800862688713156, "grad_norm": 1.125529803061741, "kl": 0.7411282360553741, "learning_rate": 9.289650692490372e-07, "loss": 0.0007420927286148071, "memory(GiB)": 165.8, "reward": 2.592245578765869, "reward_std": 0.17890338599681854, "rewards/GeoLocAccuracyV2ORM/mean": 0.9041666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.28429660201072693, "rewards/GeoVisalEntityMatch2ORM/mean": 0.688078761100769, "rewards/GeoVisalEntityMatch2ORM/std": 0.12796001136302948, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1503, "train_speed(iter/s)": 0.025728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 417.82293701171875, "completions/min_length": 337.0, "epoch": 0.1802060867481428, "grad_norm": 1.0878889601441217, "kl": 0.7405883073806763, "learning_rate": 9.288673594311805e-07, "loss": 0.0007408261299133301, "memory(GiB)": 165.8, "reward": 2.572801113128662, "reward_std": 0.2482019066810608, "rewards/GeoLocAccuracyV2ORM/mean": 0.8083333373069763, "rewards/GeoLocAccuracyV2ORM/std": 0.34325581789016724, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7644675970077515, "rewards/GeoVisalEntityMatch2ORM/std": 0.13290007412433624, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1504, "train_speed(iter/s)": 0.025729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 507.0, "completions/mean_length": 440.90625, "completions/min_length": 394.0, "epoch": 0.18032590462497006, "grad_norm": 1.0786273424724657, "kl": 0.7719080746173859, "learning_rate": 9.287695876046631e-07, "loss": 0.0007717448170296848, "memory(GiB)": 165.8, "reward": 2.2650465965270996, "reward_std": 0.19346441328525543, "rewards/GeoLocAccuracyV2ORM/mean": 0.6979166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.3925568461418152, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5775463581085205, "rewards/GeoVisalEntityMatch2ORM/std": 0.1825243979692459, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1505, "train_speed(iter/s)": 0.025734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 427.85418701171875, "completions/min_length": 385.0, "epoch": 0.18044572250179727, "grad_norm": 0.9488533690278497, "kl": 0.7432443797588348, "learning_rate": 9.28671753783621e-07, "loss": 0.0007437914609909058, "memory(GiB)": 165.8, "reward": 2.7349729537963867, "reward_std": 0.10509376227855682, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7599730491638184, "rewards/GeoVisalEntityMatch2ORM/std": 0.23830562829971313, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1506, "train_speed(iter/s)": 0.025738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 423.9583435058594, "completions/min_length": 366.0, "epoch": 0.1805655403786245, "grad_norm": 1.1357055568249383, "kl": 0.7693997621536255, "learning_rate": 9.285738579822004e-07, "loss": 0.0007705986499786377, "memory(GiB)": 165.8, "reward": 2.425694465637207, "reward_std": 0.13794633746147156, "rewards/GeoLocAccuracyV2ORM/mean": 0.7833333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.35737836360931396, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6423611044883728, "rewards/GeoVisalEntityMatch2ORM/std": 0.24064579606056213, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1507, "train_speed(iter/s)": 0.025743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 448.41668701171875, "completions/min_length": 387.0, "epoch": 0.1806853582554517, "grad_norm": 0.9120711015551806, "kl": 0.734334796667099, "learning_rate": 9.284759002145552e-07, "loss": 0.0013313170056790113, "memory(GiB)": 165.8, "reward": 2.6484220027923584, "reward_std": 0.09662193059921265, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.665088415145874, "rewards/GeoVisalEntityMatch2ORM/std": 0.16562236845493317, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1508, "train_speed(iter/s)": 0.025748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 495.0, "completions/mean_length": 425.8645935058594, "completions/min_length": 377.0, "epoch": 0.18080517613227895, "grad_norm": 1.1034338890226236, "kl": 0.7407543063163757, "learning_rate": 9.28377880494849e-07, "loss": 0.0007424578070640564, "memory(GiB)": 165.8, "reward": 2.584672689437866, "reward_std": 0.12564370036125183, "rewards/GeoLocAccuracyV2ORM/mean": 0.9833333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.11486070603132248, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6013393402099609, "rewards/GeoVisalEntityMatch2ORM/std": 0.14632010459899902, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1509, "train_speed(iter/s)": 0.025753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 433.6458435058594, "completions/min_length": 384.0, "epoch": 0.18092499400910617, "grad_norm": 1.1218443504622604, "kl": 0.7516365349292755, "learning_rate": 9.282797988372541e-07, "loss": 0.0007534921169281006, "memory(GiB)": 165.8, "reward": 2.516493320465088, "reward_std": 0.1345297247171402, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.537326455116272, "rewards/GeoVisalEntityMatch2ORM/std": 0.14235888421535492, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1510, "train_speed(iter/s)": 0.025758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/mean_length": 403.04168701171875, "completions/min_length": 338.0, "epoch": 0.18104481188593338, "grad_norm": 1.161783972880219, "kl": 0.7829611599445343, "learning_rate": 9.281816552559523e-07, "loss": 0.0007831367547623813, "memory(GiB)": 165.8, "reward": 2.785764217376709, "reward_std": 0.10005956143140793, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7857639193534851, "rewards/GeoVisalEntityMatch2ORM/std": 0.1608271449804306, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1511, "train_speed(iter/s)": 0.025763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.0, "completions/mean_length": 429.22918701171875, "completions/min_length": 373.0, "epoch": 0.1811646297627606, "grad_norm": 1.0089455423987725, "kl": 0.7173976302146912, "learning_rate": 9.280834497651332e-07, "loss": 0.0007192915072664618, "memory(GiB)": 165.8, "reward": 2.6053977012634277, "reward_std": 0.13475042581558228, "rewards/GeoLocAccuracyV2ORM/mean": 0.8416666984558105, "rewards/GeoLocAccuracyV2ORM/std": 0.3204164206981659, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7637310028076172, "rewards/GeoVisalEntityMatch2ORM/std": 0.11445218324661255, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1512, "train_speed(iter/s)": 0.025768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 462.0, "completions/mean_length": 418.88543701171875, "completions/min_length": 370.0, "epoch": 0.18128444763958781, "grad_norm": 1.3857512500637226, "kl": 1.9082509875297546, "learning_rate": 9.279851823789965e-07, "loss": 0.0018599405884742737, "memory(GiB)": 165.8, "reward": 2.552372694015503, "reward_std": 0.34890303015708923, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087526738643646, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6357060670852661, "rewards/GeoVisalEntityMatch2ORM/std": 0.1758609265089035, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087526738643646, "step": 1513, "train_speed(iter/s)": 0.025769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 552.0, "completions/mean_length": 416.1770935058594, "completions/min_length": 383.0, "epoch": 0.18140426551641506, "grad_norm": 1.2063764801218566, "kl": 1.5452472269535065, "learning_rate": 9.278868531117502e-07, "loss": 0.0013921757927164435, "memory(GiB)": 165.8, "reward": 2.7169642448425293, "reward_std": 0.18787896633148193, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7377976179122925, "rewards/GeoVisalEntityMatch2ORM/std": 0.23292817175388336, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1514, "train_speed(iter/s)": 0.025774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 470.0, "completions/mean_length": 424.5625, "completions/min_length": 392.0, "epoch": 0.18152408339324227, "grad_norm": 1.169978699192504, "kl": 0.7292346954345703, "learning_rate": 9.277884619776115e-07, "loss": 0.0007298588752746582, "memory(GiB)": 165.8, "reward": 2.6240291595458984, "reward_std": 0.16785484552383423, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.649029016494751, "rewards/GeoVisalEntityMatch2ORM/std": 0.19557702541351318, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1515, "train_speed(iter/s)": 0.025779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2604166666666667, "completions/max_length": 506.0, "completions/mean_length": 417.63543701171875, "completions/min_length": 368.0, "epoch": 0.1816439012700695, "grad_norm": 1.1175222496327022, "kl": 0.7243479490280151, "learning_rate": 9.276900089908066e-07, "loss": 0.0007259560516104102, "memory(GiB)": 165.8, "reward": 2.279935598373413, "reward_std": 0.21812216937541962, "rewards/GeoLocAccuracyV2ORM/mean": 0.731249988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.4437430500984192, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8091021776199341, "rewards/GeoVisalEntityMatch2ORM/std": 0.2145698368549347, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657452583313, "step": 1516, "train_speed(iter/s)": 0.025778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 477.0, "completions/mean_length": 430.5625, "completions/min_length": 387.0, "epoch": 0.1817637191468967, "grad_norm": 1.0272750186420418, "kl": 0.7117988169193268, "learning_rate": 9.275914941655702e-07, "loss": 0.000713179528247565, "memory(GiB)": 165.8, "reward": 2.78125, "reward_std": 0.09412064403295517, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.78125, "rewards/GeoVisalEntityMatch2ORM/std": 0.10206206887960434, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1517, "train_speed(iter/s)": 0.025783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 419.1145935058594, "completions/min_length": 364.0, "epoch": 0.18188353702372395, "grad_norm": 1.1406352382786258, "kl": 0.7454806864261627, "learning_rate": 9.274929175161467e-07, "loss": 0.0007462998619303107, "memory(GiB)": 165.8, "reward": 2.7057912349700928, "reward_std": 0.09611745178699493, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.705791175365448, "rewards/GeoVisalEntityMatch2ORM/std": 0.15167231857776642, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1518, "train_speed(iter/s)": 0.025789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 434.5, "completions/min_length": 383.0, "epoch": 0.18200335490055117, "grad_norm": 1.094270475721453, "kl": 0.7364680767059326, "learning_rate": 9.273942790567886e-07, "loss": 0.0007387201185338199, "memory(GiB)": 165.8, "reward": 2.4529762268066406, "reward_std": 0.1133325845003128, "rewards/GeoLocAccuracyV2ORM/mean": 0.7979166507720947, "rewards/GeoLocAccuracyV2ORM/std": 0.36418014764785767, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6550595164299011, "rewards/GeoVisalEntityMatch2ORM/std": 0.10507996380329132, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1519, "train_speed(iter/s)": 0.025794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 484.0, "completions/mean_length": 416.2083435058594, "completions/min_length": 368.0, "epoch": 0.18212317277737838, "grad_norm": 1.1479014713565105, "kl": 0.752326250076294, "learning_rate": 9.27295578801758e-07, "loss": 0.0007541676750406623, "memory(GiB)": 165.8, "reward": 2.2335317134857178, "reward_std": 0.16391479969024658, "rewards/GeoLocAccuracyV2ORM/mean": 0.5833333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4955946207046509, "rewards/GeoVisalEntityMatch2ORM/mean": 0.9001984000205994, "rewards/GeoVisalEntityMatch2ORM/std": 0.12636445462703705, "rewards/MathFormat/mean": 0.75, "rewards/MathFormat/std": 0.4352857768535614, "step": 1520, "train_speed(iter/s)": 0.025794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/mean_length": 427.5625, "completions/min_length": 381.0, "epoch": 0.1822429906542056, "grad_norm": 1.141353633259654, "kl": 0.7488620281219482, "learning_rate": 9.271968167653256e-07, "loss": 0.0007510880823247135, "memory(GiB)": 165.8, "reward": 2.6785266399383545, "reward_std": 0.08663728833198547, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6785264015197754, "rewards/GeoVisalEntityMatch2ORM/std": 0.17004260420799255, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1521, "train_speed(iter/s)": 0.025799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 439.97918701171875, "completions/min_length": 389.0, "epoch": 0.18236280853103284, "grad_norm": 1.5477815451122245, "kl": 0.705186516046524, "learning_rate": 9.270979929617711e-07, "loss": 0.000707050203345716, "memory(GiB)": 165.8, "reward": 2.7730655670166016, "reward_std": 0.06165711209177971, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7730655074119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.1036907285451889, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1522, "train_speed(iter/s)": 0.025803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 461.0, "completions/mean_length": 420.57293701171875, "completions/min_length": 376.0, "epoch": 0.18248262640786006, "grad_norm": 1.1472395864560567, "kl": 0.7903362214565277, "learning_rate": 9.269991074053831e-07, "loss": 0.0007909586420282722, "memory(GiB)": 165.8, "reward": 2.3544769287109375, "reward_std": 0.09277661144733429, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6044766902923584, "rewards/GeoVisalEntityMatch2ORM/std": 0.15108872950077057, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1523, "train_speed(iter/s)": 0.025807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.0, "completions/mean_length": 416.60418701171875, "completions/min_length": 353.0, "epoch": 0.18260244428468728, "grad_norm": 0.9364373007531658, "kl": 0.7001054286956787, "learning_rate": 9.269001601104593e-07, "loss": 0.0007014473667368293, "memory(GiB)": 165.8, "reward": 2.6872520446777344, "reward_std": 0.07328447699546814, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6872519850730896, "rewards/GeoVisalEntityMatch2ORM/std": 0.21773524582386017, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1524, "train_speed(iter/s)": 0.025812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.22916666666666666, "completions/max_length": 462.0, "completions/mean_length": 415.66668701171875, "completions/min_length": 382.0, "epoch": 0.1827222621615145, "grad_norm": 0.9499279284337165, "kl": 0.9257128536701202, "learning_rate": 9.26801151091306e-07, "loss": 0.0009260302176699042, "memory(GiB)": 165.8, "reward": 2.3085317611694336, "reward_std": 0.2461453378200531, "rewards/GeoLocAccuracyV2ORM/mean": 0.7708333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4225029945373535, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7564484477043152, "rewards/GeoVisalEntityMatch2ORM/std": 0.1958051323890686, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687689781189, "step": 1525, "train_speed(iter/s)": 0.025813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 418.4583435058594, "completions/min_length": 375.0, "epoch": 0.1828420800383417, "grad_norm": 1.1430284613291237, "kl": 0.8097476363182068, "learning_rate": 9.267020803622388e-07, "loss": 0.0008102121646516025, "memory(GiB)": 165.8, "reward": 2.4136576652526855, "reward_std": 0.2368025779724121, "rewards/GeoLocAccuracyV2ORM/mean": 0.8395833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.3620058596134186, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5740741491317749, "rewards/GeoVisalEntityMatch2ORM/std": 0.13731776177883148, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1526, "train_speed(iter/s)": 0.025819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.0, "completions/mean_length": 418.35418701171875, "completions/min_length": 365.0, "epoch": 0.18296189791516895, "grad_norm": 1.1731647227630722, "kl": 0.7325335741043091, "learning_rate": 9.266029479375821e-07, "loss": 0.0007334774127230048, "memory(GiB)": 165.8, "reward": 2.390104293823242, "reward_std": 0.0806976854801178, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6401041746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.1912297159433365, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1527, "train_speed(iter/s)": 0.025824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 450.0, "completions/mean_length": 409.1770935058594, "completions/min_length": 359.0, "epoch": 0.18308171579199617, "grad_norm": 1.0946714182388309, "kl": 0.8933117389678955, "learning_rate": 9.265037538316689e-07, "loss": 0.0008892081677913666, "memory(GiB)": 165.8, "reward": 2.316319465637207, "reward_std": 0.3122015595436096, "rewards/GeoLocAccuracyV2ORM/mean": 0.7645833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3805755078792572, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6038194894790649, "rewards/GeoVisalEntityMatch2ORM/std": 0.17787329852581024, "rewards/MathFormat/mean": 0.9479166865348816, "rewards/MathFormat/std": 0.22336149215698242, "step": 1528, "train_speed(iter/s)": 0.025825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 485.0, "completions/mean_length": 422.10418701171875, "completions/min_length": 376.0, "epoch": 0.18320153366882339, "grad_norm": 0.9528078284878404, "kl": 0.7382507920265198, "learning_rate": 9.264044980588414e-07, "loss": 0.0007402009214274585, "memory(GiB)": 165.8, "reward": 2.6418981552124023, "reward_std": 0.06552455574274063, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6418981552124023, "rewards/GeoVisalEntityMatch2ORM/std": 0.12608443200588226, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1529, "train_speed(iter/s)": 0.025829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 402.54168701171875, "completions/min_length": 352.0, "epoch": 0.1833213515456506, "grad_norm": 1.203965477106873, "kl": 0.8008994460105896, "learning_rate": 9.26305180633451e-07, "loss": 0.00080137699842453, "memory(GiB)": 165.8, "reward": 2.1911377906799316, "reward_std": 0.16101175546646118, "rewards/GeoLocAccuracyV2ORM/mean": 0.53125, "rewards/GeoLocAccuracyV2ORM/std": 0.45775482058525085, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6598875522613525, "rewards/GeoVisalEntityMatch2ORM/std": 0.15929298102855682, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1530, "train_speed(iter/s)": 0.025834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 463.0, "completions/mean_length": 406.6875, "completions/min_length": 362.0, "epoch": 0.18344116942247785, "grad_norm": 0.9882695432195596, "kl": 0.7639400959014893, "learning_rate": 9.262058015698575e-07, "loss": 0.0007639303803443909, "memory(GiB)": 165.8, "reward": 2.590029716491699, "reward_std": 0.21745480597019196, "rewards/GeoLocAccuracyV2ORM/mean": 0.96875, "rewards/GeoLocAccuracyV2ORM/std": 0.17490600049495697, "rewards/GeoVisalEntityMatch2ORM/mean": 0.652529776096344, "rewards/GeoVisalEntityMatch2ORM/std": 0.21080656349658966, "rewards/MathFormat/mean": 0.96875, "rewards/MathFormat/std": 0.17490600049495697, "step": 1531, "train_speed(iter/s)": 0.025835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.0, "completions/mean_length": 397.32293701171875, "completions/min_length": 346.0, "epoch": 0.18356098729930506, "grad_norm": 1.0377964020166786, "kl": 0.7689419984817505, "learning_rate": 9.261063608824299e-07, "loss": 0.0007695680251345038, "memory(GiB)": 165.8, "reward": 2.7687289714813232, "reward_std": 0.06084952503442764, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7687290906906128, "rewards/GeoVisalEntityMatch2ORM/std": 0.2281446009874344, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1532, "train_speed(iter/s)": 0.025841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.0, "completions/mean_length": 387.54168701171875, "completions/min_length": 344.0, "epoch": 0.18368080517613228, "grad_norm": 0.9854386087206269, "kl": 0.7651952505111694, "learning_rate": 9.260068585855459e-07, "loss": 0.0007665567100048065, "memory(GiB)": 165.8, "reward": 2.847916603088379, "reward_std": 0.05442138761281967, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8479167222976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.2082403302192688, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1533, "train_speed(iter/s)": 0.025839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 404.2083435058594, "completions/min_length": 368.0, "epoch": 0.1838006230529595, "grad_norm": 1.1012845679106136, "kl": 0.746228963136673, "learning_rate": 9.259072946935924e-07, "loss": 0.0007480258936993778, "memory(GiB)": 165.8, "reward": 2.357147216796875, "reward_std": 0.08011587709188461, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5571470260620117, "rewards/GeoVisalEntityMatch2ORM/std": 0.1969437301158905, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1534, "train_speed(iter/s)": 0.025843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 458.0, "completions/mean_length": 399.47918701171875, "completions/min_length": 350.0, "epoch": 0.1839204409297867, "grad_norm": 1.0418899787557026, "kl": 0.7318676114082336, "learning_rate": 9.258076692209651e-07, "loss": 0.0007335643167607486, "memory(GiB)": 165.8, "reward": 2.4299769401550293, "reward_std": 0.06837528944015503, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6799768805503845, "rewards/GeoVisalEntityMatch2ORM/std": 0.2722657322883606, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1535, "train_speed(iter/s)": 0.025848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 397.51043701171875, "completions/min_length": 326.0, "epoch": 0.18404025880661395, "grad_norm": 1.1538643362381367, "kl": 0.7477692067623138, "learning_rate": 9.257079821820683e-07, "loss": 0.0007497022743336856, "memory(GiB)": 165.8, "reward": 2.6556396484375, "reward_std": 0.05859040841460228, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6556397676467896, "rewards/GeoVisalEntityMatch2ORM/std": 0.18196465075016022, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1536, "train_speed(iter/s)": 0.025845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 397.97918701171875, "completions/min_length": 342.0, "epoch": 0.18416007668344117, "grad_norm": 1.1864837223404332, "kl": 0.7398412823677063, "learning_rate": 9.256082335913156e-07, "loss": 0.000741551339160651, "memory(GiB)": 165.8, "reward": 2.5966849327087402, "reward_std": 0.08355804532766342, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5966849327087402, "rewards/GeoVisalEntityMatch2ORM/std": 0.20241892337799072, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1537, "train_speed(iter/s)": 0.025849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 390.0208435058594, "completions/min_length": 347.0, "epoch": 0.1842798945602684, "grad_norm": 1.2083794406392543, "kl": 0.7862778604030609, "learning_rate": 9.255084234631292e-07, "loss": 0.0007865926017984748, "memory(GiB)": 165.8, "reward": 2.6049768924713135, "reward_std": 0.0753495842218399, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6049768924713135, "rewards/GeoVisalEntityMatch2ORM/std": 0.1441776156425476, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1538, "train_speed(iter/s)": 0.02585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.0, "completions/mean_length": 397.22918701171875, "completions/min_length": 357.0, "epoch": 0.1843997124370956, "grad_norm": 1.2215598303205126, "kl": 0.7591046690940857, "learning_rate": 9.254085518119406e-07, "loss": 0.000762899755500257, "memory(GiB)": 165.8, "reward": 2.5839948654174805, "reward_std": 0.09331437945365906, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7839946746826172, "rewards/GeoVisalEntityMatch2ORM/std": 0.12382100522518158, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1539, "train_speed(iter/s)": 0.025853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.0, "completions/mean_length": 390.21875, "completions/min_length": 335.0, "epoch": 0.18451953031392285, "grad_norm": 1.0226774423560983, "kl": 0.7920413017272949, "learning_rate": 9.253086186521899e-07, "loss": 0.000793417333625257, "memory(GiB)": 165.8, "reward": 2.577605962753296, "reward_std": 0.06421636790037155, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5776060223579407, "rewards/GeoVisalEntityMatch2ORM/std": 0.17950399219989777, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1540, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 403.875, "completions/min_length": 350.0, "epoch": 0.18463934819075006, "grad_norm": 0.9064291149081207, "kl": 0.7040851712226868, "learning_rate": 9.252086239983262e-07, "loss": 0.0007057413458824158, "memory(GiB)": 165.8, "reward": 2.822537899017334, "reward_std": 0.04923940822482109, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.822537899017334, "rewards/GeoVisalEntityMatch2ORM/std": 0.1849176585674286, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1541, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.23958333333333334, "completions/max_length": 476.0, "completions/mean_length": 403.15625, "completions/min_length": 361.0, "epoch": 0.18475916606757728, "grad_norm": 1.1122346163640846, "kl": 0.8269584476947784, "learning_rate": 9.251085678648071e-07, "loss": 0.0008281829650513828, "memory(GiB)": 165.8, "reward": 2.2337136268615723, "reward_std": 0.17624002695083618, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906978726387024, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7128803133964539, "rewards/GeoVisalEntityMatch2ORM/std": 0.17951802909374237, "rewards/MathFormat/mean": 0.7604166865348816, "rewards/MathFormat/std": 0.42906978726387024, "step": 1542, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 478.0, "completions/mean_length": 416.34375, "completions/min_length": 361.0, "epoch": 0.1848789839444045, "grad_norm": 0.9825853125436527, "kl": 0.7775149047374725, "learning_rate": 9.250084502660996e-07, "loss": 0.0007770086522214115, "memory(GiB)": 165.8, "reward": 2.4875166416168213, "reward_std": 0.290361613035202, "rewards/GeoLocAccuracyV2ORM/mean": 0.8645833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.34396421909332275, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7583498954772949, "rewards/GeoVisalEntityMatch2ORM/std": 0.07743577659130096, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396421909332275, "step": 1543, "train_speed(iter/s)": 0.025853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 482.0, "completions/mean_length": 420.0, "completions/min_length": 357.0, "epoch": 0.18499880182123174, "grad_norm": 1.0715204118181336, "kl": 0.7352784872055054, "learning_rate": 9.249082712166796e-07, "loss": 0.000737374066375196, "memory(GiB)": 165.8, "reward": 2.572420597076416, "reward_std": 0.10350751876831055, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5724207162857056, "rewards/GeoVisalEntityMatch2ORM/std": 0.23678328096866608, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1544, "train_speed(iter/s)": 0.025855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.0, "completions/mean_length": 415.1770935058594, "completions/min_length": 356.0, "epoch": 0.18511861969805896, "grad_norm": 1.0028432815307249, "kl": 0.7108296155929565, "learning_rate": 9.248080307310313e-07, "loss": 0.0007125536794774234, "memory(GiB)": 165.8, "reward": 2.7977428436279297, "reward_std": 0.10207995772361755, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.797743022441864, "rewards/GeoVisalEntityMatch2ORM/std": 0.1822715550661087, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1545, "train_speed(iter/s)": 0.025854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/mean_length": 401.41668701171875, "completions/min_length": 356.0, "epoch": 0.18523843757488617, "grad_norm": 1.0716647401116426, "kl": 0.6911731958389282, "learning_rate": 9.247077288236487e-07, "loss": 0.000692105561029166, "memory(GiB)": 165.8, "reward": 2.819378614425659, "reward_std": 0.09334488213062286, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8277117013931274, "rewards/GeoVisalEntityMatch2ORM/std": 0.17457817494869232, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1546, "train_speed(iter/s)": 0.025858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 409.13543701171875, "completions/min_length": 374.0, "epoch": 0.1853582554517134, "grad_norm": 1.1427843964062634, "kl": 0.7490173876285553, "learning_rate": 9.246073655090336e-07, "loss": 0.0007506112451665103, "memory(GiB)": 165.8, "reward": 2.680844783782959, "reward_std": 0.10312922298908234, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6808449625968933, "rewards/GeoVisalEntityMatch2ORM/std": 0.2303694635629654, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1547, "train_speed(iter/s)": 0.02586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 382.2395935058594, "completions/min_length": 333.0, "epoch": 0.1854780733285406, "grad_norm": 1.0909229271141652, "kl": 0.7999933660030365, "learning_rate": 9.245069408016977e-07, "loss": 0.0008007189026102424, "memory(GiB)": 165.8, "reward": 2.819791793823242, "reward_std": 0.09374470263719559, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8197916746139526, "rewards/GeoVisalEntityMatch2ORM/std": 0.1543201059103012, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1548, "train_speed(iter/s)": 0.025861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.0, "completions/mean_length": 406.3645935058594, "completions/min_length": 374.0, "epoch": 0.18559789120536785, "grad_norm": 1.253642796764175, "kl": 0.7471753358840942, "learning_rate": 9.244064547161608e-07, "loss": 0.0007480084896087646, "memory(GiB)": 165.8, "reward": 2.65803599357605, "reward_std": 0.09207768738269806, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6580357551574707, "rewards/GeoVisalEntityMatch2ORM/std": 0.12857049703598022, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1549, "train_speed(iter/s)": 0.025865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/mean_length": 418.0520935058594, "completions/min_length": 369.0, "epoch": 0.18571770908219506, "grad_norm": 0.8196918750841714, "kl": 0.7396382391452789, "learning_rate": 9.243059072669519e-07, "loss": 0.0016342699527740479, "memory(GiB)": 165.8, "reward": 2.325930118560791, "reward_std": 0.06180352717638016, "rewards/GeoLocAccuracyV2ORM/mean": 0.8000000715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.3482286036014557, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5259301066398621, "rewards/GeoVisalEntityMatch2ORM/std": 0.18651306629180908, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1550, "train_speed(iter/s)": 0.025867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 412.01043701171875, "completions/min_length": 355.0, "epoch": 0.18583752695902228, "grad_norm": 1.0490302757946406, "kl": 0.7830150127410889, "learning_rate": 9.24205298468609e-07, "loss": 0.0007852105190977454, "memory(GiB)": 165.8, "reward": 2.4546875953674316, "reward_std": 0.1796010434627533, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4411657154560089, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7255208492279053, "rewards/GeoVisalEntityMatch2ORM/std": 0.17729619145393372, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1551, "train_speed(iter/s)": 0.025871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 414.91668701171875, "completions/min_length": 365.0, "epoch": 0.1859573448358495, "grad_norm": 1.107823345610822, "kl": 0.779066801071167, "learning_rate": 9.241046283356788e-07, "loss": 0.000780529051553458, "memory(GiB)": 165.8, "reward": 2.709134578704834, "reward_std": 0.0846923440694809, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.709134578704834, "rewards/GeoVisalEntityMatch2ORM/std": 0.20616932213306427, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1552, "train_speed(iter/s)": 0.025867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 439.2395935058594, "completions/min_length": 397.0, "epoch": 0.18607716271267674, "grad_norm": 1.0580947691286318, "kl": 0.7493856251239777, "learning_rate": 9.240038968827171e-07, "loss": 0.000753020285628736, "memory(GiB)": 165.8, "reward": 2.714616298675537, "reward_std": 0.11255623400211334, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206206142902374, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7250330448150635, "rewards/GeoVisalEntityMatch2ORM/std": 0.2185661941766739, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1553, "train_speed(iter/s)": 0.025868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 489.0, "completions/mean_length": 425.3125, "completions/min_length": 381.0, "epoch": 0.18619698058950396, "grad_norm": 1.0395175623212616, "kl": 0.804871141910553, "learning_rate": 9.239031041242878e-07, "loss": 0.0008057206869125366, "memory(GiB)": 165.8, "reward": 2.1484375, "reward_std": 0.1435205340385437, "rewards/GeoLocAccuracyV2ORM/mean": 0.6916667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.4513527452945709, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4567708373069763, "rewards/GeoVisalEntityMatch2ORM/std": 0.12430692464113235, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1554, "train_speed(iter/s)": 0.02586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 428.4375, "completions/min_length": 383.0, "epoch": 0.18631679846633117, "grad_norm": 0.9573509164239717, "kl": 0.7143208384513855, "learning_rate": 9.238022500749647e-07, "loss": 0.000714945956133306, "memory(GiB)": 165.8, "reward": 2.8265624046325684, "reward_std": 0.066226065158844, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8265625834465027, "rewards/GeoVisalEntityMatch2ORM/std": 0.1808779388666153, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1555, "train_speed(iter/s)": 0.025863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/mean_length": 440.57293701171875, "completions/min_length": 399.0, "epoch": 0.1864366163431584, "grad_norm": 1.0674302929737332, "kl": 0.7319180071353912, "learning_rate": 9.237013347493298e-07, "loss": 0.0007343143224716187, "memory(GiB)": 165.8, "reward": 2.5925145149230957, "reward_std": 0.07601076364517212, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5925144553184509, "rewards/GeoVisalEntityMatch2ORM/std": 0.1419806033372879, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1556, "train_speed(iter/s)": 0.025864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3229166666666667, "completions/max_length": 498.0, "completions/mean_length": 424.47918701171875, "completions/min_length": 374.0, "epoch": 0.18655643421998563, "grad_norm": 1.148611771409309, "kl": 0.9020689725875854, "learning_rate": 9.236003581619743e-07, "loss": 0.0009026527404785156, "memory(GiB)": 165.8, "reward": 2.1689815521240234, "reward_std": 0.3915916085243225, "rewards/GeoLocAccuracyV2ORM/mean": 0.6041666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4915960431098938, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8877314329147339, "rewards/GeoVisalEntityMatch2ORM/std": 0.12997205555438995, "rewards/MathFormat/mean": 0.6770833730697632, "rewards/MathFormat/std": 0.4700457453727722, "step": 1557, "train_speed(iter/s)": 0.025857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 496.0, "completions/mean_length": 443.65625, "completions/min_length": 413.0, "epoch": 0.18667625209681285, "grad_norm": 1.1578114058523807, "kl": 0.7749276459217072, "learning_rate": 9.234993203274979e-07, "loss": 0.0007758265128359199, "memory(GiB)": 165.8, "reward": 2.5596232414245605, "reward_std": 0.11266926676034927, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5596230030059814, "rewards/GeoVisalEntityMatch2ORM/std": 0.19837838411331177, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1558, "train_speed(iter/s)": 0.025849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2604166666666667, "completions/max_length": 506.0, "completions/mean_length": 419.90625, "completions/min_length": 368.0, "epoch": 0.18679606997364007, "grad_norm": 1.1446296471477106, "kl": 0.7601854801177979, "learning_rate": 9.233982212605095e-07, "loss": 0.0007605813443660736, "memory(GiB)": 165.8, "reward": 2.172309160232544, "reward_std": 0.15785323083400726, "rewards/GeoLocAccuracyV2ORM/mean": 0.7395833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.4411657154560089, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6931424140930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.19616073369979858, "rewards/MathFormat/mean": 0.7395833730697632, "rewards/MathFormat/std": 0.4411657154560089, "step": 1559, "train_speed(iter/s)": 0.025848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 433.22918701171875, "completions/min_length": 398.0, "epoch": 0.18691588785046728, "grad_norm": 0.9755111299523397, "kl": 0.7610388100147247, "learning_rate": 9.232970609756266e-07, "loss": 0.0007627531886100769, "memory(GiB)": 165.8, "reward": 2.7328040599823, "reward_std": 0.05216916650533676, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7328042984008789, "rewards/GeoVisalEntityMatch2ORM/std": 0.12872998416423798, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1560, "train_speed(iter/s)": 0.025847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 525.0, "completions/mean_length": 463.07293701171875, "completions/min_length": 393.0, "epoch": 0.1870357057272945, "grad_norm": 1.1167238725210962, "kl": 0.7285856306552887, "learning_rate": 9.231958394874758e-07, "loss": 0.0007296676631085575, "memory(GiB)": 165.8, "reward": 2.723214626312256, "reward_std": 0.06929899752140045, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.723214328289032, "rewards/GeoVisalEntityMatch2ORM/std": 0.16807571053504944, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1561, "train_speed(iter/s)": 0.02585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11458333333333333, "completions/max_length": 507.0, "completions/mean_length": 465.85418701171875, "completions/min_length": 408.0, "epoch": 0.18715552360412174, "grad_norm": 1.0547645868097124, "kl": 0.9448213279247284, "learning_rate": 9.230945568106923e-07, "loss": 0.0009396101231686771, "memory(GiB)": 165.8, "reward": 2.328125, "reward_std": 0.4965953826904297, "rewards/GeoLocAccuracyV2ORM/mean": 0.6666666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.4738790988922119, "rewards/GeoVisalEntityMatch2ORM/mean": 0.765625, "rewards/GeoVisalEntityMatch2ORM/std": 0.1476237177848816, "rewards/MathFormat/mean": 0.8958333730697632, "rewards/MathFormat/std": 0.3070802092552185, "step": 1562, "train_speed(iter/s)": 0.025852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3020833333333333, "completions/max_length": 506.0, "completions/mean_length": 456.0625, "completions/min_length": 405.0, "epoch": 0.18727534148094896, "grad_norm": 1.1095158962734197, "kl": 1.1513846814632416, "learning_rate": 9.229932129599205e-07, "loss": 0.0011499673128128052, "memory(GiB)": 165.8, "reward": 2.211197853088379, "reward_std": 0.4868985414505005, "rewards/GeoLocAccuracyV2ORM/mean": 0.71875, "rewards/GeoLocAccuracyV2ORM/std": 0.4519694149494171, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7736979722976685, "rewards/GeoVisalEntityMatch2ORM/std": 0.17593248188495636, "rewards/MathFormat/mean": 0.71875, "rewards/MathFormat/std": 0.4519694149494171, "step": 1563, "train_speed(iter/s)": 0.025851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/mean_length": 473.76043701171875, "completions/min_length": 420.0, "epoch": 0.18739515935777618, "grad_norm": 1.0753687020189133, "kl": 0.7018692195415497, "learning_rate": 9.228918079498131e-07, "loss": 0.000703178346157074, "memory(GiB)": 165.8, "reward": 2.655815839767456, "reward_std": 0.096713125705719, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6558159589767456, "rewards/GeoVisalEntityMatch2ORM/std": 0.26984432339668274, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1564, "train_speed(iter/s)": 0.025853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333332, "completions/max_length": 514.0, "completions/mean_length": 473.1458435058594, "completions/min_length": 420.0, "epoch": 0.1875149772346034, "grad_norm": 0.9170896986935152, "kl": 0.7524492740631104, "learning_rate": 9.22790341795032e-07, "loss": 0.0007532959571108222, "memory(GiB)": 165.8, "reward": 2.4930331707000732, "reward_std": 0.1884874552488327, "rewards/GeoLocAccuracyV2ORM/mean": 0.7291666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.44672298431396484, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7846996784210205, "rewards/GeoVisalEntityMatch2ORM/std": 0.223700612783432, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1565, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08333333333333333, "completions/max_length": 552.0, "completions/mean_length": 471.2083435058594, "completions/min_length": 416.0, "epoch": 0.18763479511143064, "grad_norm": 0.9469628329208932, "kl": 0.8530783653259277, "learning_rate": 9.226888145102481e-07, "loss": 0.000849436386488378, "memory(GiB)": 165.8, "reward": 2.6346354484558105, "reward_std": 0.3527928590774536, "rewards/GeoLocAccuracyV2ORM/mean": 0.9270833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.26136448979377747, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7804688215255737, "rewards/GeoVisalEntityMatch2ORM/std": 0.17438624799251556, "rewards/MathFormat/mean": 0.9270833730697632, "rewards/MathFormat/std": 0.26136448979377747, "step": 1566, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052083333333333336, "completions/max_length": 506.0, "completions/mean_length": 460.5208435058594, "completions/min_length": 423.0, "epoch": 0.18775461298825785, "grad_norm": 1.379904251876959, "kl": 1.9614253640174866, "learning_rate": 9.225872261101409e-07, "loss": 0.0020161594729870558, "memory(GiB)": 165.8, "reward": 2.6215128898620605, "reward_std": 0.3229779601097107, "rewards/GeoLocAccuracyV2ORM/mean": 0.9583333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.20087528228759766, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7048460841178894, "rewards/GeoVisalEntityMatch2ORM/std": 0.15503962337970734, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 1567, "train_speed(iter/s)": 0.025854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/mean_length": 450.8125, "completions/min_length": 398.0, "epoch": 0.18787443086508507, "grad_norm": 1.1121264612922503, "kl": 0.7503319680690765, "learning_rate": 9.224855766093984e-07, "loss": 0.0007520777871832252, "memory(GiB)": 165.8, "reward": 2.557715654373169, "reward_std": 0.1482616513967514, "rewards/GeoLocAccuracyV2ORM/mean": 0.8583333492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.3069944977760315, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6993822455406189, "rewards/GeoVisalEntityMatch2ORM/std": 0.16394194960594177, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1568, "train_speed(iter/s)": 0.02586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13541666666666666, "completions/max_length": 506.0, "completions/mean_length": 465.32293701171875, "completions/min_length": 406.0, "epoch": 0.18799424874191228, "grad_norm": 1.2302769577626012, "kl": 0.8204765021800995, "learning_rate": 9.223838660227182e-07, "loss": 0.0008192112436518073, "memory(GiB)": 165.8, "reward": 2.5062997341156006, "reward_std": 0.26878178119659424, "rewards/GeoLocAccuracyV2ORM/mean": 0.8270833492279053, "rewards/GeoLocAccuracyV2ORM/std": 0.37570399045944214, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8146328926086426, "rewards/GeoVisalEntityMatch2ORM/std": 0.20404809713363647, "rewards/MathFormat/mean": 0.8645833730697632, "rewards/MathFormat/std": 0.34396424889564514, "step": 1569, "train_speed(iter/s)": 0.025862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16666666666666666, "completions/max_length": 507.0, "completions/mean_length": 464.2395935058594, "completions/min_length": 408.0, "epoch": 0.18811406661873953, "grad_norm": 1.1409215355976736, "kl": 0.846222460269928, "learning_rate": 9.222820943648062e-07, "loss": 0.0008460978860966861, "memory(GiB)": 165.8, "reward": 2.318500518798828, "reward_std": 0.33401593565940857, "rewards/GeoLocAccuracyV2ORM/mean": 0.6625000238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.43267834186553955, "rewards/GeoVisalEntityMatch2ORM/mean": 0.8018338084220886, "rewards/GeoVisalEntityMatch2ORM/std": 0.16421571373939514, "rewards/MathFormat/mean": 0.8541666865348816, "rewards/MathFormat/std": 0.3547917604446411, "step": 1570, "train_speed(iter/s)": 0.025861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 458.125, "completions/min_length": 389.0, "epoch": 0.18823388449556674, "grad_norm": 1.0461762723584505, "kl": 0.7504425942897797, "learning_rate": 9.221802616503773e-07, "loss": 0.0007502424414269626, "memory(GiB)": 165.8, "reward": 2.388115644454956, "reward_std": 0.22478213906288147, "rewards/GeoLocAccuracyV2ORM/mean": 0.7666667699813843, "rewards/GeoLocAccuracyV2ORM/std": 0.3768754303455353, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6318655014038086, "rewards/GeoVisalEntityMatch2ORM/std": 0.17858324944972992, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1571, "train_speed(iter/s)": 0.025861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.22916666666666666, "completions/max_length": 461.0, "completions/mean_length": 426.0625, "completions/min_length": 398.0, "epoch": 0.18835370237239396, "grad_norm": 0.8469820555765183, "kl": 0.8352518975734711, "learning_rate": 9.220783678941551e-07, "loss": 0.0008351604337804019, "memory(GiB)": 165.8, "reward": 2.0781748294830322, "reward_std": 0.25074052810668945, "rewards/GeoLocAccuracyV2ORM/mean": 0.6312500238418579, "rewards/GeoLocAccuracyV2ORM/std": 0.4526675343513489, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6656746864318848, "rewards/GeoVisalEntityMatch2ORM/std": 0.17479008436203003, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1572, "train_speed(iter/s)": 0.025849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 436.3645935058594, "completions/min_length": 354.0, "epoch": 0.18847352024922118, "grad_norm": 1.1140416877609354, "kl": 0.7565912902355194, "learning_rate": 9.219764131108721e-07, "loss": 0.0007583598489873111, "memory(GiB)": 165.8, "reward": 2.77514910697937, "reward_std": 0.12449200451374054, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7834821939468384, "rewards/GeoVisalEntityMatch2ORM/std": 0.13242670893669128, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1573, "train_speed(iter/s)": 0.025849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 474.0, "completions/mean_length": 426.25, "completions/min_length": 365.0, "epoch": 0.1885933381260484, "grad_norm": 0.986957141828896, "kl": 0.8471943140029907, "learning_rate": 9.218743973152698e-07, "loss": 0.0008455304196104407, "memory(GiB)": 165.8, "reward": 2.3059563636779785, "reward_std": 0.30862709879875183, "rewards/GeoLocAccuracyV2ORM/mean": 0.9166666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.26224401593208313, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4309562146663666, "rewards/GeoVisalEntityMatch2ORM/std": 0.12134649604558945, "rewards/MathFormat/mean": 0.9583333730697632, "rewards/MathFormat/std": 0.20087528228759766, "step": 1574, "train_speed(iter/s)": 0.025851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/mean_length": 416.1145935058594, "completions/min_length": 370.0, "epoch": 0.18871315600287564, "grad_norm": 1.1073997216190699, "kl": 0.7344731688499451, "learning_rate": 9.217723205220982e-07, "loss": 0.0007369915838353336, "memory(GiB)": 165.8, "reward": 2.6865079402923584, "reward_std": 0.07765067368745804, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6865078806877136, "rewards/GeoVisalEntityMatch2ORM/std": 0.18979094922542572, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1575, "train_speed(iter/s)": 0.025855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 483.0, "completions/mean_length": 408.8020935058594, "completions/min_length": 357.0, "epoch": 0.18883297387970285, "grad_norm": 1.1381635851693364, "kl": 0.7646501064300537, "learning_rate": 9.216701827461164e-07, "loss": 0.0007652963395230472, "memory(GiB)": 165.8, "reward": 2.3942131996154785, "reward_std": 0.1388426423072815, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6337963342666626, "rewards/GeoVisalEntityMatch2ORM/std": 0.21624664962291718, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1576, "train_speed(iter/s)": 0.025853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.0, "completions/mean_length": 386.54168701171875, "completions/min_length": 347.0, "epoch": 0.18895279175653007, "grad_norm": 1.0889611499894734, "kl": 0.7334115505218506, "learning_rate": 9.21567984002092e-07, "loss": 0.0007341305608861148, "memory(GiB)": 165.8, "reward": 2.6255788803100586, "reward_std": 0.043574150651693344, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.625578761100769, "rewards/GeoVisalEntityMatch2ORM/std": 0.24013273417949677, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1577, "train_speed(iter/s)": 0.025855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 599.0, "completions/mean_length": 359.41668701171875, "completions/min_length": 315.0, "epoch": 0.18907260963335729, "grad_norm": 0.9853071681676704, "kl": 0.8256312608718872, "learning_rate": 9.21465724304802e-07, "loss": 0.0008301734924316406, "memory(GiB)": 165.8, "reward": 2.437152862548828, "reward_std": 0.19993625581264496, "rewards/GeoLocAccuracyV2ORM/mean": 0.9645833969116211, "rewards/GeoLocAccuracyV2ORM/std": 0.1716662347316742, "rewards/GeoVisalEntityMatch2ORM/mean": 0.4829861521720886, "rewards/GeoVisalEntityMatch2ORM/std": 0.08107390254735947, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1578, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 440.0, "completions/mean_length": 368.69793701171875, "completions/min_length": 323.0, "epoch": 0.18919242751018453, "grad_norm": 1.2490057535987964, "kl": 0.8392705321311951, "learning_rate": 9.213634036690314e-07, "loss": 0.0008402727544307709, "memory(GiB)": 165.8, "reward": 2.7207465171813965, "reward_std": 0.11221443116664886, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7290799021720886, "rewards/GeoVisalEntityMatch2ORM/std": 0.20060577988624573, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1579, "train_speed(iter/s)": 0.025855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 444.0, "completions/mean_length": 383.5833435058594, "completions/min_length": 340.0, "epoch": 0.18931224538701175, "grad_norm": 1.1287458628020142, "kl": 0.7826318144798279, "learning_rate": 9.212610221095747e-07, "loss": 0.0007834335556253791, "memory(GiB)": 165.8, "reward": 2.611576795578003, "reward_std": 0.10904596745967865, "rewards/GeoLocAccuracyV2ORM/mean": 0.9916666746139526, "rewards/GeoLocAccuracyV2ORM/std": 0.08164965361356735, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6199100017547607, "rewards/GeoVisalEntityMatch2ORM/std": 0.15226532518863678, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1580, "train_speed(iter/s)": 0.025851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 417.0, "completions/mean_length": 369.4270935058594, "completions/min_length": 324.0, "epoch": 0.18943206326383896, "grad_norm": 1.083710742124467, "kl": 0.8096842169761658, "learning_rate": 9.211585796412349e-07, "loss": 0.0008102605934254825, "memory(GiB)": 165.8, "reward": 2.7418153285980225, "reward_std": 0.06797659397125244, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7418155074119568, "rewards/GeoVisalEntityMatch2ORM/std": 0.18962891399860382, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1581, "train_speed(iter/s)": 0.025855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 419.0, "completions/mean_length": 368.3125, "completions/min_length": 324.0, "epoch": 0.18955188114066618, "grad_norm": 1.188028725241921, "kl": 0.8610745072364807, "learning_rate": 9.210560762788237e-07, "loss": 0.0008571396465413272, "memory(GiB)": 165.8, "reward": 2.560342311859131, "reward_std": 0.3020947277545929, "rewards/GeoLocAccuracyV2ORM/mean": 0.8729166388511658, "rewards/GeoLocAccuracyV2ORM/std": 0.29752635955810547, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6978422999382019, "rewards/GeoVisalEntityMatch2ORM/std": 0.15294940769672394, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1582, "train_speed(iter/s)": 0.025856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 437.0, "completions/mean_length": 362.10418701171875, "completions/min_length": 309.0, "epoch": 0.18967169901749342, "grad_norm": 1.3215392974539908, "kl": 0.7922541201114655, "learning_rate": 9.209535120371621e-07, "loss": 0.0007933378219604492, "memory(GiB)": 165.8, "reward": 2.448512077331543, "reward_std": 0.17594420909881592, "rewards/GeoLocAccuracyV2ORM/mean": 0.9416667222976685, "rewards/GeoLocAccuracyV2ORM/std": 0.20909158885478973, "rewards/GeoVisalEntityMatch2ORM/mean": 0.506845235824585, "rewards/GeoVisalEntityMatch2ORM/std": 0.20118851959705353, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1583, "train_speed(iter/s)": 0.025851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 429.0, "completions/mean_length": 374.4583435058594, "completions/min_length": 331.0, "epoch": 0.18979151689432064, "grad_norm": 1.2292793761489351, "kl": 0.8494140207767487, "learning_rate": 9.208508869310791e-07, "loss": 0.0008506104350090027, "memory(GiB)": 165.8, "reward": 2.423200845718384, "reward_std": 0.1098226010799408, "rewards/GeoLocAccuracyV2ORM/mean": 0.7604166865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.42906975746154785, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6627840995788574, "rewards/GeoVisalEntityMatch2ORM/std": 0.18952523171901703, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1584, "train_speed(iter/s)": 0.025846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 383.34375, "completions/min_length": 342.0, "epoch": 0.18991133477114785, "grad_norm": 1.0848299954047733, "kl": 0.8267372846603394, "learning_rate": 9.207482009754134e-07, "loss": 0.0008153766393661499, "memory(GiB)": 165.8, "reward": 2.607494354248047, "reward_std": 0.190183624625206, "rewards/GeoLocAccuracyV2ORM/mean": 0.9791666865348816, "rewards/GeoLocAccuracyV2ORM/std": 0.14357587695121765, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6387442350387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.13790550827980042, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206206142902374, "step": 1585, "train_speed(iter/s)": 0.025848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.0, "completions/mean_length": 360.1875, "completions/min_length": 313.0, "epoch": 0.19003115264797507, "grad_norm": 1.2271149595615787, "kl": 0.8219163715839386, "learning_rate": 9.206454541850117e-07, "loss": 0.0008216450805775821, "memory(GiB)": 165.8, "reward": 2.545809745788574, "reward_std": 0.07012787461280823, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5458096861839294, "rewards/GeoVisalEntityMatch2ORM/std": 0.17219601571559906, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1586, "train_speed(iter/s)": 0.025842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.0, "completions/mean_length": 372.76043701171875, "completions/min_length": 332.0, "epoch": 0.1901509705248023, "grad_norm": 1.2283492435664303, "kl": 0.8088662028312683, "learning_rate": 9.205426465747301e-07, "loss": 0.0008098980179056525, "memory(GiB)": 165.8, "reward": 2.7709386348724365, "reward_std": 0.06579622626304626, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7709385752677917, "rewards/GeoVisalEntityMatch2ORM/std": 0.16960418224334717, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1587, "train_speed(iter/s)": 0.025844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 506.0, "completions/mean_length": 366.09375, "completions/min_length": 322.0, "epoch": 0.19027078840162953, "grad_norm": 1.1761251207378307, "kl": 0.7986933887004852, "learning_rate": 9.204397781594331e-07, "loss": 0.0008017098298296332, "memory(GiB)": 165.8, "reward": 2.593921422958374, "reward_std": 0.19587916135787964, "rewards/GeoLocAccuracyV2ORM/mean": 0.9895833730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.10206207633018494, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6147546768188477, "rewards/GeoVisalEntityMatch2ORM/std": 0.26957279443740845, "rewards/MathFormat/mean": 0.9895833730697632, "rewards/MathFormat/std": 0.10206207633018494, "step": 1588, "train_speed(iter/s)": 0.025845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.0, "completions/mean_length": 365.4583435058594, "completions/min_length": 308.0, "epoch": 0.19039060627845675, "grad_norm": 1.231002353072004, "kl": 0.821923017501831, "learning_rate": 9.203368489539943e-07, "loss": 0.0008234878769144416, "memory(GiB)": 165.8, "reward": 2.557589292526245, "reward_std": 0.1148611456155777, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5575892925262451, "rewards/GeoVisalEntityMatch2ORM/std": 0.19450511038303375, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1589, "train_speed(iter/s)": 0.025848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 429.0, "completions/mean_length": 366.2395935058594, "completions/min_length": 315.0, "epoch": 0.19051042415528396, "grad_norm": 1.3177295036720955, "kl": 0.8232317268848419, "learning_rate": 9.202338589732959e-07, "loss": 0.0008239696617238224, "memory(GiB)": 165.8, "reward": 2.513744354248047, "reward_std": 0.1121734082698822, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5137442350387573, "rewards/GeoVisalEntityMatch2ORM/std": 0.14609235525131226, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1590, "train_speed(iter/s)": 0.025845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.0, "completions/mean_length": 381.88543701171875, "completions/min_length": 312.0, "epoch": 0.19063024203211118, "grad_norm": 1.1326844800593014, "kl": 0.814611554145813, "learning_rate": 9.201308082322285e-07, "loss": 0.0008160993456840515, "memory(GiB)": 165.8, "reward": 2.534623146057129, "reward_std": 0.07283617556095123, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5346230864524841, "rewards/GeoVisalEntityMatch2ORM/std": 0.1383751630783081, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1591, "train_speed(iter/s)": 0.025838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 381.5208435058594, "completions/min_length": 336.0, "epoch": 0.19075005990893842, "grad_norm": 1.2116751542625719, "kl": 0.8104937970638275, "learning_rate": 9.200276967456926e-07, "loss": 0.0008115669479593635, "memory(GiB)": 165.8, "reward": 2.3379838466644287, "reward_std": 0.08853018283843994, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.3379836082458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.15084078907966614, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1592, "train_speed(iter/s)": 0.025842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 389.5520935058594, "completions/min_length": 332.0, "epoch": 0.19086987778576564, "grad_norm": 1.1345669374317133, "kl": 0.7841762006282806, "learning_rate": 9.199245245285963e-07, "loss": 0.0007851918926462531, "memory(GiB)": 165.8, "reward": 2.547048568725586, "reward_std": 0.13594762980937958, "rewards/GeoLocAccuracyV2ORM/mean": 0.9750000834465027, "rewards/GeoLocAccuracyV2ORM/std": 0.1399247944355011, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5720486640930176, "rewards/GeoVisalEntityMatch2ORM/std": 0.12703610956668854, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1593, "train_speed(iter/s)": 0.02584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.21875, "completions/max_length": 420.0, "completions/mean_length": 375.5625, "completions/min_length": 336.0, "epoch": 0.19098969566259286, "grad_norm": 1.2055033465239664, "kl": 2.093999207019806, "learning_rate": 9.19821291595857e-07, "loss": 0.0020932331681251526, "memory(GiB)": 165.8, "reward": 2.0413691997528076, "reward_std": 0.23470889031887054, "rewards/GeoLocAccuracyV2ORM/mean": 0.7062500715255737, "rewards/GeoLocAccuracyV2ORM/std": 0.4410066306591034, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5538691282272339, "rewards/GeoVisalEntityMatch2ORM/std": 0.209766685962677, "rewards/MathFormat/mean": 0.78125, "rewards/MathFormat/std": 0.4155687391757965, "step": 1594, "train_speed(iter/s)": 0.025834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/mean_length": 385.3125, "completions/min_length": 327.0, "epoch": 0.19110951353942007, "grad_norm": 1.2332082195996348, "kl": 0.8040294647216797, "learning_rate": 9.197179979624011e-07, "loss": 0.0008046751609072089, "memory(GiB)": 165.8, "reward": 2.513434886932373, "reward_std": 0.17696672677993774, "rewards/GeoLocAccuracyV2ORM/mean": 0.8458333015441895, "rewards/GeoLocAccuracyV2ORM/std": 0.36067673563957214, "rewards/GeoVisalEntityMatch2ORM/mean": 0.6676015257835388, "rewards/GeoVisalEntityMatch2ORM/std": 0.11910288780927658, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1595, "train_speed(iter/s)": 0.025829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041666666666666664, "completions/max_length": 455.0, "completions/mean_length": 392.3645935058594, "completions/min_length": 341.0, "epoch": 0.19122933141624732, "grad_norm": 1.190605796652443, "kl": 0.9616099298000336, "learning_rate": 9.196146436431634e-07, "loss": 0.0009540418977849185, "memory(GiB)": 165.8, "reward": 2.53515625, "reward_std": 0.3005697429180145, "rewards/GeoLocAccuracyV2ORM/mean": 0.8958333730697632, "rewards/GeoLocAccuracyV2ORM/std": 0.2783094048500061, "rewards/GeoVisalEntityMatch2ORM/mean": 0.66015625, "rewards/GeoVisalEntityMatch2ORM/std": 0.16357950866222382, "rewards/MathFormat/mean": 0.9791666865348816, "rewards/MathFormat/std": 0.14357589185237885, "step": 1596, "train_speed(iter/s)": 0.025818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 409.5, "completions/min_length": 346.0, "epoch": 0.19134914929307453, "grad_norm": 1.1196570314497019, "kl": 0.7658331394195557, "learning_rate": 9.195112286530873e-07, "loss": 0.0007674346561543643, "memory(GiB)": 165.8, "reward": 2.2366321086883545, "reward_std": 0.18442603945732117, "rewards/GeoLocAccuracyV2ORM/mean": 0.6500000357627869, "rewards/GeoLocAccuracyV2ORM/std": 0.39894598722457886, "rewards/GeoVisalEntityMatch2ORM/mean": 0.5866319537162781, "rewards/GeoVisalEntityMatch2ORM/std": 0.18120700120925903, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1597, "train_speed(iter/s)": 0.025817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 413.41668701171875, "completions/min_length": 357.0, "epoch": 0.19146896716990175, "grad_norm": 1.0383134505407579, "kl": 0.722345620393753, "learning_rate": 9.194077530071257e-07, "loss": 0.000723938166629523, "memory(GiB)": 165.8, "reward": 2.6778647899627686, "reward_std": 0.1523234248161316, "rewards/GeoLocAccuracyV2ORM/mean": 0.949999988079071, "rewards/GeoLocAccuracyV2ORM/std": 0.19466570019721985, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7278646230697632, "rewards/GeoVisalEntityMatch2ORM/std": 0.22282099723815918, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1598, "train_speed(iter/s)": 0.025818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 478.0, "completions/mean_length": 417.9583435058594, "completions/min_length": 358.0, "epoch": 0.19158878504672897, "grad_norm": 1.127130020076262, "kl": 0.7351385056972504, "learning_rate": 9.193042167202396e-07, "loss": 0.0007363309850916266, "memory(GiB)": 165.8, "reward": 2.4638476371765137, "reward_std": 0.09613928198814392, "rewards/GeoLocAccuracyV2ORM/mean": 0.75, "rewards/GeoLocAccuracyV2ORM/std": 0.4352857768535614, "rewards/GeoVisalEntityMatch2ORM/mean": 0.7138475775718689, "rewards/GeoVisalEntityMatch2ORM/std": 0.1786070466041565, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1599, "train_speed(iter/s)": 0.02582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 413.63543701171875, "completions/min_length": 347.0, "epoch": 0.19170860292355618, "grad_norm": 1.1089767624905056, "kl": 0.7886564433574677, "learning_rate": 9.192006198073992e-07, "loss": 0.0007903253426775336, "memory(GiB)": 165.8, "reward": 2.495126485824585, "reward_std": 0.08899035304784775, "rewards/GeoLocAccuracyV2ORM/mean": 1.0, "rewards/GeoLocAccuracyV2ORM/std": 0.0, "rewards/GeoVisalEntityMatch2ORM/mean": 0.49512648582458496, "rewards/GeoVisalEntityMatch2ORM/std": 0.09865111112594604, "rewards/MathFormat/mean": 1.0, "rewards/MathFormat/std": 0.0, "step": 1600, "train_speed(iter/s)": 0.025824 }, { "epoch": 0.19170860292355618, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.023065476190476192, "eval_completions/max_length": 470.5297619047619, "eval_completions/mean_length": 424.8791013445173, "eval_completions/min_length": 381.0297619047619, "eval_kl": 2.5205880048729123, "eval_loss": 0.0025004686322063208, "eval_reward": 2.563958971628121, "eval_reward_std": 0.13917694836189703, "eval_rewards/GeoLocAccuracyV2ORM/mean": 0.9096230250738916, "eval_rewards/GeoLocAccuracyV2ORM/std": 0.12250615119756687, "eval_rewards/GeoVisalEntityMatch2ORM/mean": 0.6764093009488923, "eval_rewards/GeoVisalEntityMatch2ORM/std": 0.1377178593760445, "eval_rewards/MathFormat/mean": 0.9779265891937983, "eval_rewards/MathFormat/std": 0.030480736689198585, "eval_runtime": 1838.4838, "eval_samples_per_second": 0.183, "eval_steps_per_second": 0.004, "step": 1600 } ], "logging_steps": 1, "max_steps": 8346, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }