{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22857142857142856, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_fraction": 0.0, "completion_length": 2124.791679382324, "dapo/avg_reward_std": 0.28261276125907897, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42666667342185977, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.001142857142857143, "grad_norm": 0.03718917816877365, "kl": 0.0, "learning_rate": 0.0, "loss": -0.0465, "reward": 0.6372265852987766, "reward_std": 0.9629172012209892, "step": 1 }, { "clip_fraction": 0.0, "completion_length": 2559.6631774902344, "dapo/avg_reward_std": 0.2737089714833668, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39285715403301374, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 32.291666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.002285714285714286, "grad_norm": 0.031548872590065, "kl": 0.0, "learning_rate": 1e-07, "loss": 0.0292, "reward": 0.2883484517224133, "reward_std": 0.9225177392363548, "step": 2 }, { "clip_fraction": 0.0, "completion_length": 2259.0243072509766, "dapo/avg_reward_std": 0.30627372419392623, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40740741734151487, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 38.33333333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.0034285714285714284, "grad_norm": 0.028476394712924957, "kl": 3.738701343536377e-05, "learning_rate": 2e-07, "loss": 0.0118, "reward": 0.5692771524190903, "reward_std": 0.9722258150577545, "step": 3 }, { "clip_fraction": 0.0, "completion_length": 2388.763916015625, "dapo/avg_reward_std": 0.2417103610932827, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34895834093913436, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 29.479166666666664, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.004571428571428572, "grad_norm": 0.03074878267943859, "kl": 3.4555792808532715e-05, "learning_rate": 3e-07, "loss": 0.0428, "reward": 0.5176859218627214, "reward_std": 0.9351213574409485, "step": 4 }, { "clip_fraction": 0.0, "completion_length": 2228.9131927490234, "dapo/avg_reward_std": 0.24784977205338016, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3494623731220922, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 34.375, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.005714285714285714, "grad_norm": 0.03052515536546707, "kl": 4.2438507080078125e-05, "learning_rate": 4e-07, "loss": 0.0573, "reward": 0.5747799873352051, "reward_std": 0.9150463417172432, "step": 5 }, { "clip_fraction": 0.0, "completion_length": 2526.2743377685547, "dapo/avg_reward_std": 0.31032066589052026, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4772727367552844, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 39.58333333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.006857142857142857, "grad_norm": 0.031065233051776886, "kl": 6.331503391265869e-05, "learning_rate": 5e-07, "loss": 0.068, "reward": 0.49577395524829626, "reward_std": 0.9604900777339935, "step": 6 }, { "clip_fraction": 0.0, "completion_length": 2096.857650756836, "dapo/avg_reward_std": 0.30248596491637053, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.43827161303272955, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 33.33333333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.008, "grad_norm": 0.03395611792802811, "kl": 3.603100776672363e-05, "learning_rate": 6e-07, "loss": 0.0104, "reward": 0.6337036956101656, "reward_std": 0.9339632987976074, "step": 7 }, { "clip_fraction": 0.0, "completion_length": 2080.482681274414, "dapo/avg_reward_std": 0.2619025791063905, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3489583395421505, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 27.82738095238095, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.009142857142857144, "grad_norm": 0.030713744461536407, "kl": 3.699958324432373e-05, "learning_rate": 7e-07, "loss": 0.0191, "reward": 0.5047293808311224, "reward_std": 0.9456561654806137, "step": 8 }, { "clip_fraction": 0.0, "completion_length": 2575.715316772461, "dapo/avg_reward_std": 0.26183396059533826, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4275362387947414, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 56.25, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.010285714285714285, "grad_norm": 0.02862783893942833, "kl": 3.787875175476074e-05, "learning_rate": 8e-07, "loss": 0.0251, "reward": 0.49641977716237307, "reward_std": 0.9346907436847687, "step": 9 }, { "clip_fraction": 0.0, "completion_length": 2574.7951431274414, "dapo/avg_reward_std": 0.2888991279261453, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46031746694019865, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 61.875, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.011428571428571429, "grad_norm": 0.03313002362847328, "kl": 2.9653310775756836e-05, "learning_rate": 9e-07, "loss": 0.0131, "reward": 0.6514056231826544, "reward_std": 0.9486276879906654, "step": 10 }, { "clip_fraction": 0.0, "completion_length": 2648.3541870117188, "dapo/avg_reward_std": 0.1985154973136054, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.23333333631356556, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 22.747252747252745, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.012571428571428572, "grad_norm": 0.02842891961336136, "kl": 4.6372413635253906e-05, "learning_rate": 1e-06, "loss": 0.0228, "reward": 0.3831507060676813, "reward_std": 0.9138674512505531, "step": 11 }, { "clip_fraction": 0.0, "completion_length": 2340.7708435058594, "dapo/avg_reward_std": 0.21896107792854308, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25000000558793545, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 29.791666666666664, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.013714285714285714, "grad_norm": 0.02896970883011818, "kl": 3.764033317565918e-05, "learning_rate": 9.997258721585931e-07, "loss": 0.0141, "reward": 0.3742078524082899, "reward_std": 0.9111683145165443, "step": 12 }, { "clip_fraction": 0.0, "completion_length": 2731.9687576293945, "dapo/avg_reward_std": 0.2593883651274222, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39506174016881873, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 43.95833333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.014857142857142857, "grad_norm": 0.028494343161582947, "kl": 4.1812658309936523e-05, "learning_rate": 9.989038226169207e-07, "loss": 0.0482, "reward": 0.37119605229236186, "reward_std": 0.9484475553035736, "step": 13 }, { "clip_fraction": 0.0, "completion_length": 2346.684066772461, "dapo/avg_reward_std": 0.2633256334247011, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3787878860126842, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 40.416666666666664, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.016, "grad_norm": 0.03419339284300804, "kl": 3.219395875930786e-05, "learning_rate": 9.975348529157229e-07, "loss": 0.0443, "reward": 0.5307169873267412, "reward_std": 0.8819384500384331, "step": 14 }, { "clip_fraction": 0.0, "completion_length": 2438.8437881469727, "dapo/avg_reward_std": 0.31698794450078693, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48412699571677614, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 49.99999999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.017142857142857144, "grad_norm": 0.03230522945523262, "kl": 3.4749507904052734e-05, "learning_rate": 9.956206309337066e-07, "loss": 0.0519, "reward": 0.6968788839876652, "reward_std": 0.9826493486762047, "step": 15 }, { "clip_fraction": 0.0, "completion_length": 2835.3125076293945, "dapo/avg_reward_std": 0.2820873036980629, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36111111876865226, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 49.375, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.018285714285714287, "grad_norm": 0.026719439774751663, "kl": 3.375113010406494e-05, "learning_rate": 9.931634888554935e-07, "loss": 0.0158, "reward": 0.4585288055241108, "reward_std": 0.9621468484401703, "step": 16 }, { "clip_fraction": 0.0, "completion_length": 2489.513870239258, "dapo/avg_reward_std": 0.24821309347947437, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35000000447034835, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 51.25, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.019428571428571427, "grad_norm": 0.030841730535030365, "kl": 3.2588839530944824e-05, "learning_rate": 9.901664203302124e-07, "loss": 0.0342, "reward": 0.4615583084523678, "reward_std": 0.8882262408733368, "step": 17 }, { "clip_fraction": 0.0, "completion_length": 2291.8854217529297, "dapo/avg_reward_std": 0.3492339625954628, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4000000149011612, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 46.87499999999999, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02057142857142857, "grad_norm": 0.4981432557106018, "kl": 4.331767559051514e-05, "learning_rate": 9.866330768241983e-07, "loss": 0.0782, "reward": 0.5650830613449216, "reward_std": 0.960162565112114, "step": 18 }, { "clip_fraction": 0.0, "completion_length": 1727.9479217529297, "dapo/avg_reward_std": 0.2201171379822951, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2863247940937678, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 27.01388888888889, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.021714285714285714, "grad_norm": 0.034473638981580734, "kl": 2.7894973754882812e-05, "learning_rate": 9.825677631722435e-07, "loss": -0.0027, "reward": 0.5283844769001007, "reward_std": 0.9302913695573807, "step": 19 }, { "clip_fraction": 0.0, "completion_length": 1848.9062576293945, "dapo/avg_reward_std": 0.2080523163983316, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3030303070942561, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 40.74404761904762, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.022857142857142857, "grad_norm": 0.03650596737861633, "kl": 2.997368574142456e-05, "learning_rate": 9.779754323328192e-07, "loss": 0.0066, "reward": 0.47246094793081284, "reward_std": 0.925552561879158, "step": 20 }, { "clip_fraction": 0.0, "completion_length": 2310.6354370117188, "dapo/avg_reward_std": 0.18431008011102676, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26250000260770323, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 32.53472222222222, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.024, "grad_norm": 0.02872428111732006, "kl": 3.707408905029297e-05, "learning_rate": 9.728616793536587e-07, "loss": 0.0041, "reward": 0.5466808546334505, "reward_std": 0.9614025354385376, "step": 21 }, { "clip_fraction": 0.0, "completion_length": 2628.4618072509766, "dapo/avg_reward_std": 0.27239492272629456, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3235294157091309, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 26.875, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.025142857142857144, "grad_norm": 0.03156612813472748, "kl": 4.024803638458252e-05, "learning_rate": 9.672327345550543e-07, "loss": 0.0396, "reward": 0.4231120813637972, "reward_std": 0.9312948659062386, "step": 22 }, { "clip_fraction": 0.0, "completion_length": 2495.7673873901367, "dapo/avg_reward_std": 0.30711027341229574, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3988095335662365, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 31.249999999999993, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.026285714285714287, "grad_norm": 0.028224533423781395, "kl": 3.413856029510498e-05, "learning_rate": 9.610954559391704e-07, "loss": 0.0195, "reward": 0.5285261562094092, "reward_std": 0.9373103529214859, "step": 23 }, { "clip_fraction": 0.0, "completion_length": 1944.9201278686523, "dapo/avg_reward_std": 0.29968351125717163, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4533333480358124, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 44.27083333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.027428571428571427, "grad_norm": 0.03633056953549385, "kl": 3.1538307666778564e-05, "learning_rate": 9.54457320834625e-07, "loss": 0.0693, "reward": 0.5397752095013857, "reward_std": 0.9495814517140388, "step": 24 }, { "clip_fraction": 0.0, "completion_length": 2616.593780517578, "dapo/avg_reward_std": 0.16712580593127124, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.19811321232678755, "dapo/num_sampling_attempts": 6.625, "dapo/sampling_efficiency": 19.166666666666664, "dapo/total_prompts_processed": 39.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02857142857142857, "grad_norm": 0.024344539269804955, "kl": 3.676116466522217e-05, "learning_rate": 9.473264167865171e-07, "loss": 0.0139, "reward": 0.3185653127729893, "reward_std": 0.9151088818907738, "step": 25 }, { "clip_fraction": 0.0, "completion_length": 2116.7257232666016, "dapo/avg_reward_std": 0.27600910129218265, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33908046319566926, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 44.6875, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.029714285714285714, "grad_norm": 0.031155193224549294, "kl": 3.579258918762207e-05, "learning_rate": 9.397114317029974e-07, "loss": 0.0725, "reward": 0.5197067707777023, "reward_std": 0.8911866471171379, "step": 26 }, { "clip_fraction": 0.0, "completion_length": 2148.781265258789, "dapo/avg_reward_std": 0.24896243140101432, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31666667349636557, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 22.63888888888889, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.030857142857142857, "grad_norm": 0.03762076795101166, "kl": 3.104656934738159e-05, "learning_rate": 9.316216432703916e-07, "loss": -0.0333, "reward": 0.5081147998571396, "reward_std": 0.9414060413837433, "step": 27 }, { "clip_fraction": 0.0, "completion_length": 2357.4062881469727, "dapo/avg_reward_std": 0.22747237629750194, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2990196110571132, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 34.49404761904761, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.032, "grad_norm": 0.02982812374830246, "kl": 2.621859312057495e-05, "learning_rate": 9.230669076497687e-07, "loss": 0.0231, "reward": 0.7687274925410748, "reward_std": 0.9382865354418755, "step": 28 }, { "clip_fraction": 0.0, "completion_length": 2772.941047668457, "dapo/avg_reward_std": 0.2300749086972439, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28282828854792047, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 48.482142857142854, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03314285714285714, "grad_norm": 0.030160676687955856, "kl": 2.812594175338745e-05, "learning_rate": 9.140576474687263e-07, "loss": 0.0019, "reward": 0.41888202354311943, "reward_std": 0.9044449031352997, "step": 29 }, { "clip_fraction": 0.0, "completion_length": 2038.208366394043, "dapo/avg_reward_std": 0.1657373425437183, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.21544715943859843, "dapo/num_sampling_attempts": 5.125, "dapo/sampling_efficiency": 45.71969696969697, "dapo/total_prompts_processed": 30.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03428571428571429, "grad_norm": 0.040263354778289795, "kl": 3.8951635360717773e-05, "learning_rate": 9.046048391230247e-07, "loss": 0.0158, "reward": 0.6328074131160975, "reward_std": 0.913766622543335, "step": 30 }, { "clip_fraction": 0.0, "completion_length": 2610.149299621582, "dapo/avg_reward_std": 0.24689391613006592, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39333333909511564, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 50.74404761904762, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03542857142857143, "grad_norm": 0.03027450665831566, "kl": 3.1307339668273926e-05, "learning_rate": 8.9471999940354e-07, "loss": 0.0264, "reward": 0.6263847425580025, "reward_std": 0.9919310808181763, "step": 31 }, { "clip_fraction": 0.0, "completion_length": 2505.697952270508, "dapo/avg_reward_std": 0.26817766793312564, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34946237216072695, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 33.68055555555555, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.036571428571428574, "grad_norm": 0.02961750328540802, "kl": 2.7127563953399658e-05, "learning_rate": 8.844151714648274e-07, "loss": 0.0166, "reward": 0.6057538501918316, "reward_std": 0.9584499895572662, "step": 32 }, { "clip_fraction": 0.0, "completion_length": 2879.420181274414, "dapo/avg_reward_std": 0.24957223816050422, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2824074120985137, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 35.51136363636363, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.037714285714285714, "grad_norm": 0.028292173519730568, "kl": 2.950429916381836e-05, "learning_rate": 8.737029101523929e-07, "loss": 0.032, "reward": 0.4974850555881858, "reward_std": 0.9284666180610657, "step": 33 }, { "clip_fraction": 0.0, "completion_length": 2605.826400756836, "dapo/avg_reward_std": 0.27582160755991936, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41666667101283866, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 42.70833333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.038857142857142854, "grad_norm": 0.028110038489103317, "kl": 3.172457218170166e-05, "learning_rate": 8.625962667065487e-07, "loss": 0.0358, "reward": 0.5906332535669208, "reward_std": 0.8970795348286629, "step": 34 }, { "clip_fraction": 0.0, "completion_length": 2197.09033203125, "dapo/avg_reward_std": 0.2899627904097239, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3722222303350767, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 33.035714285714285, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04, "grad_norm": 0.03307325020432472, "kl": 3.203749656677246e-05, "learning_rate": 8.511087728614862e-07, "loss": 0.024, "reward": 0.6485824584960938, "reward_std": 0.9721796959638596, "step": 35 }, { "clip_fraction": 0.0, "completion_length": 2999.3507080078125, "dapo/avg_reward_std": 0.20956570729613305, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26250000707805154, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 22.51488095238095, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04114285714285714, "grad_norm": 0.028769005089998245, "kl": 3.2588839530944824e-05, "learning_rate": 8.392544243589427e-07, "loss": 0.0619, "reward": 0.48274967167526484, "reward_std": 0.8917501345276833, "step": 36 }, { "clip_fraction": 0.0, "completion_length": 2790.3020935058594, "dapo/avg_reward_std": 0.30638546783190507, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42307692995438206, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 35.20833333333333, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04228571428571429, "grad_norm": 0.026894288137555122, "kl": 3.5509467124938965e-05, "learning_rate": 8.270476638965461e-07, "loss": 0.0283, "reward": 0.5098943561315536, "reward_std": 0.9712026715278625, "step": 37 }, { "clip_fraction": 0.0, "completion_length": 2677.1493530273438, "dapo/avg_reward_std": 0.18201035128699408, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2481481538878547, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 25.416666666666664, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04342857142857143, "grad_norm": 0.027049226686358452, "kl": 2.641230821609497e-05, "learning_rate": 8.145033635316128e-07, "loss": 0.0457, "reward": 0.507211847230792, "reward_std": 0.9677048400044441, "step": 38 }, { "clip_fraction": 0.0, "completion_length": 3130.437530517578, "dapo/avg_reward_std": 0.2055508976473528, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3137254956014016, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 26.160714285714278, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.044571428571428574, "grad_norm": 0.027378324419260025, "kl": 4.1447579860687256e-05, "learning_rate": 8.01636806561836e-07, "loss": 0.0522, "reward": 0.5557294674217701, "reward_std": 0.9394431114196777, "step": 39 }, { "clip_fraction": 0.0, "completion_length": 2026.0486297607422, "dapo/avg_reward_std": 0.20257248067193562, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666749450896, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 29.86111111111111, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.045714285714285714, "grad_norm": 0.032405752688646317, "kl": 1.9609928131103516e-05, "learning_rate": 7.884636689049422e-07, "loss": 0.0336, "reward": 0.5694049745798111, "reward_std": 0.9232507050037384, "step": 40 }, { "clip_fraction": 0.0, "completion_length": 2640.326416015625, "dapo/avg_reward_std": 0.21237638321789828, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34343435231483344, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 29.791666666666664, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.046857142857142854, "grad_norm": 0.027951980009675026, "kl": 2.6788562536239624e-05, "learning_rate": 7.75e-07, "loss": 0.0234, "reward": 0.5206635389477015, "reward_std": 0.9366661533713341, "step": 41 }, { "clip_fraction": 0.0, "completion_length": 2681.18058013916, "dapo/avg_reward_std": 0.24859387196343521, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3218390854268238, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 35.416666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.048, "grad_norm": 0.03045503795146942, "kl": 3.679096698760986e-05, "learning_rate": 7.612622032536507e-07, "loss": 0.0237, "reward": 0.4700614605098963, "reward_std": 0.9389084428548813, "step": 42 }, { "clip_fraction": 0.0, "completion_length": 2398.7118072509766, "dapo/avg_reward_std": 0.2748411413161985, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.322580651890847, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 29.999999999999996, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04914285714285714, "grad_norm": 0.02945004403591156, "kl": 2.7336180210113525e-05, "learning_rate": 7.472670160550848e-07, "loss": -0.0567, "reward": 0.6530590765178204, "reward_std": 0.929742157459259, "step": 43 }, { "clip_fraction": 0.0, "completion_length": 1968.3437805175781, "dapo/avg_reward_std": 0.20995861871374977, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2685185232096248, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 42.410714285714285, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05028571428571429, "grad_norm": 0.0354490801692009, "kl": 1.671910285949707e-05, "learning_rate": 7.330314893841101e-07, "loss": 0.0869, "reward": 0.6298563629388809, "reward_std": 0.9230287447571754, "step": 44 }, { "clip_fraction": 0.0, "completion_length": 2218.2743225097656, "dapo/avg_reward_std": 0.260509067773819, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36666667262713115, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 33.229166666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05142857142857143, "grad_norm": 0.02954520471394062, "kl": 2.514384686946869e-05, "learning_rate": 7.185729670371604e-07, "loss": 0.0031, "reward": 0.6325996220111847, "reward_std": 0.9546400979161263, "step": 45 }, { "clip_fraction": 0.0, "completion_length": 2081.1458587646484, "dapo/avg_reward_std": 0.2187695243666249, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2849462402443732, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 37.22222222222222, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.052571428571428575, "grad_norm": 0.033979643136262894, "kl": 2.872943878173828e-05, "learning_rate": 7.039090644965509e-07, "loss": -0.0104, "reward": 0.5167231820523739, "reward_std": 0.9025325626134872, "step": 46 }, { "clip_fraction": 0.0, "completion_length": 2117.541702270508, "dapo/avg_reward_std": 0.18839570879936218, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26811594580826553, "dapo/num_sampling_attempts": 5.75, "dapo/sampling_efficiency": 20.441919191919194, "dapo/total_prompts_processed": 34.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.053714285714285714, "grad_norm": 0.03177877888083458, "kl": 3.078579902648926e-05, "learning_rate": 6.890576474687263e-07, "loss": 0.0077, "reward": 0.3684711689129472, "reward_std": 0.8811993673443794, "step": 47 }, { "clip_fraction": 0.0, "completion_length": 2177.4444885253906, "dapo/avg_reward_std": 0.19605370469995448, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2702702763112816, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 39.40972222222222, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.054857142857142854, "grad_norm": 0.04067355766892433, "kl": 2.4996697902679443e-05, "learning_rate": 6.740368101176495e-07, "loss": 0.0053, "reward": 0.5635924749076366, "reward_std": 0.9323460608720779, "step": 48 }, { "clip_fraction": 0.0, "completion_length": 3022.513885498047, "dapo/avg_reward_std": 0.22437315998655377, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30808081003752624, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 51.880411255411246, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.056, "grad_norm": 0.028243908658623695, "kl": 3.2588839530944824e-05, "learning_rate": 6.588648530198504e-07, "loss": 0.0463, "reward": 0.5983518976718187, "reward_std": 0.97667645663023, "step": 49 }, { "clip_fraction": 0.0, "completion_length": 2369.423614501953, "dapo/avg_reward_std": 0.25065614397709185, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36538461996958804, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 51.666666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05714285714285714, "grad_norm": 0.03361990302801132, "kl": 2.4838373064994812e-05, "learning_rate": 6.435602608679916e-07, "loss": -0.0041, "reward": 0.6849855165928602, "reward_std": 0.9522178247570992, "step": 50 }, { "clip_fraction": 0.0, "completion_length": 2274.833396911621, "dapo/avg_reward_std": 0.22345838612980312, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666745311684, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 27.132936507936506, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05828571428571429, "grad_norm": 0.031927697360515594, "kl": 1.7890706658363342e-05, "learning_rate": 6.281416799501187e-07, "loss": 0.0196, "reward": 0.8541890066117048, "reward_std": 0.9146186113357544, "step": 51 }, { "clip_fraction": 0.0, "completion_length": 2918.0799102783203, "dapo/avg_reward_std": 0.28684074508732765, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333386429425, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 41.36904761904762, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05942857142857143, "grad_norm": 0.026396779343485832, "kl": 2.3087020963430405e-05, "learning_rate": 6.126278954320294e-07, "loss": 0.0343, "reward": 0.44786757230758667, "reward_std": 0.9706326127052307, "step": 52 }, { "clip_fraction": 0.0, "completion_length": 2045.833339691162, "dapo/avg_reward_std": 0.2355064716604021, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2870370431078805, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 31.354166666666664, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.060571428571428575, "grad_norm": 0.04913632944226265, "kl": 2.1755695343017578e-05, "learning_rate": 5.97037808470444e-07, "loss": 0.0387, "reward": 0.6510349959135056, "reward_std": 0.9507962614297867, "step": 53 }, { "clip_fraction": 0.0, "completion_length": 1948.9444427490234, "dapo/avg_reward_std": 0.243668794631958, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.366666671037674, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 56.5625, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.061714285714285715, "grad_norm": 0.040572620928287506, "kl": 2.1360814571380615e-05, "learning_rate": 5.813904131848564e-07, "loss": 0.0417, "reward": 0.5514028863981366, "reward_std": 0.9589040726423264, "step": 54 }, { "clip_fraction": 0.0, "completion_length": 2484.541648864746, "dapo/avg_reward_std": 0.30484401606596434, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42307693224686843, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 42.18749999999999, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06285714285714286, "grad_norm": 0.0297782514244318, "kl": 2.2893771529197693e-05, "learning_rate": 5.657047735161255e-07, "loss": -0.0009, "reward": 0.4546010522171855, "reward_std": 0.9696914628148079, "step": 55 }, { "clip_fraction": 0.0, "completion_length": 1533.7361297607422, "dapo/avg_reward_std": 0.2159253837484302, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29797980415098596, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 34.722222222222214, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.064, "grad_norm": 0.03312206640839577, "kl": 7.178634405136108e-06, "learning_rate": 5.5e-07, "loss": 0.0108, "reward": 0.7257717102766037, "reward_std": 0.9033158496022224, "step": 56 }, { "clip_fraction": 0.0, "completion_length": 2934.4409942626953, "dapo/avg_reward_std": 0.2505974847337474, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36956522192644037, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 41.66666666666666, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06514285714285714, "grad_norm": 0.02451159618794918, "kl": 1.9356608390808105e-05, "learning_rate": 5.342952264838747e-07, "loss": 0.0483, "reward": 0.5572653282433748, "reward_std": 0.9176028743386269, "step": 57 }, { "clip_fraction": 0.0, "completion_length": 1933.5243377685547, "dapo/avg_reward_std": 0.20699472725391388, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3235294174622087, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 43.50198412698413, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06628571428571428, "grad_norm": 0.04205997660756111, "kl": 2.446398138999939e-05, "learning_rate": 5.186095868151436e-07, "loss": 0.035, "reward": 0.5425214860588312, "reward_std": 0.9688811302185059, "step": 58 }, { "clip_fraction": 0.0, "completion_length": 2404.819435119629, "dapo/avg_reward_std": 0.21416518474236512, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2649572701790394, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 28.070436507936506, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06742857142857143, "grad_norm": 0.032379262149333954, "kl": 2.0030885934829712e-05, "learning_rate": 5.02962191529556e-07, "loss": -0.0022, "reward": 0.5781768467277288, "reward_std": 0.9525356665253639, "step": 59 }, { "clip_fraction": 0.0, "completion_length": 2963.888931274414, "dapo/avg_reward_std": 0.32426256509054274, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42857143637679873, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 58.035714285714285, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06857142857142857, "grad_norm": 0.027211569249629974, "kl": 1.7156358808279037e-05, "learning_rate": 4.873721045679706e-07, "loss": 0.0068, "reward": 0.44747511111199856, "reward_std": 0.9607158154249191, "step": 60 }, { "clip_fraction": 0.0, "completion_length": 2205.2465591430664, "dapo/avg_reward_std": 0.203433408588171, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2500000063329935, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 38.46153846153846, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06971428571428571, "grad_norm": 0.035166963934898376, "kl": 1.146271824836731e-05, "learning_rate": 4.7185832004988133e-07, "loss": 0.0016, "reward": 0.7233948148787022, "reward_std": 0.9537224471569061, "step": 61 }, { "clip_fraction": 0.0, "completion_length": 2170.302101135254, "dapo/avg_reward_std": 0.3071755821054632, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46212121776559134, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.5, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07085714285714285, "grad_norm": 0.032445963472127914, "kl": 1.7118407413363457e-05, "learning_rate": 4.5643973913200837e-07, "loss": 0.0133, "reward": 0.5614959334488958, "reward_std": 0.9226407110691071, "step": 62 }, { "clip_fraction": 0.0, "completion_length": 2304.038215637207, "dapo/avg_reward_std": 0.3201758420025861, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3827160596847534, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 33.33333333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.072, "grad_norm": 0.03544686362147331, "kl": 1.1014439223799855e-05, "learning_rate": 4.4113514698014953e-07, "loss": 0.0809, "reward": 0.6520206034183502, "reward_std": 0.9506091177463531, "step": 63 }, { "clip_fraction": 0.0, "completion_length": 1901.3506965637207, "dapo/avg_reward_std": 0.2710137654233862, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33950617964620944, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 38.541666666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07314285714285715, "grad_norm": 0.044119708240032196, "kl": 2.606213092803955e-05, "learning_rate": 4.2596318988235037e-07, "loss": 0.0059, "reward": 0.6546321045607328, "reward_std": 0.9510733336210251, "step": 64 }, { "clip_fraction": 0.0, "completion_length": 2792.0382232666016, "dapo/avg_reward_std": 0.2836403740303857, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36904762951391085, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 39.58333333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07428571428571429, "grad_norm": 0.04388947784900665, "kl": 1.2818491086363792e-05, "learning_rate": 4.1094235253127374e-07, "loss": 0.0675, "reward": 0.5376700833439827, "reward_std": 0.9546815231442451, "step": 65 }, { "clip_fraction": 0.0, "completion_length": 3018.1111450195312, "dapo/avg_reward_std": 0.2566617141167323, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35000000993410746, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 29.583333333333325, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07542857142857143, "grad_norm": 0.030510403215885162, "kl": 2.337433397769928e-05, "learning_rate": 3.9609093550344907e-07, "loss": 0.067, "reward": 0.45654861629009247, "reward_std": 0.9348908290266991, "step": 66 }, { "clip_fraction": 0.0, "completion_length": 2246.7361183166504, "dapo/avg_reward_std": 0.17681238457963272, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2657657684506597, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 39.75198412698412, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07657142857142857, "grad_norm": 0.039485227316617966, "kl": 3.0115246772766113e-05, "learning_rate": 3.8142703296283953e-07, "loss": -0.0103, "reward": 0.559457328170538, "reward_std": 0.9844456240534782, "step": 67 }, { "clip_fraction": 0.0, "completion_length": 1877.3090591430664, "dapo/avg_reward_std": 0.21082516993795122, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2809523867709296, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 40.13888888888889, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07771428571428571, "grad_norm": 0.04208315163850784, "kl": 1.7916783690452576e-05, "learning_rate": 3.6696851061588994e-07, "loss": 0.0055, "reward": 0.71805115416646, "reward_std": 0.9486410617828369, "step": 68 }, { "clip_fraction": 0.0, "completion_length": 2743.187484741211, "dapo/avg_reward_std": 0.3629622704842511, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5882353020064971, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 57.291666666666664, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07885714285714286, "grad_norm": 0.046305615454912186, "kl": 1.8481165170669556e-05, "learning_rate": 3.5273298394491515e-07, "loss": 0.0753, "reward": 0.5533816255629063, "reward_std": 0.9835677221417427, "step": 69 }, { "clip_fraction": 0.0, "completion_length": 1971.8750610351562, "dapo/avg_reward_std": 0.290031298995018, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3958333432674408, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 50.11904761904761, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08, "grad_norm": 0.03249451890587807, "kl": 1.0361894965171814e-05, "learning_rate": 3.387377967463493e-07, "loss": 0.0123, "reward": 0.7815902195870876, "reward_std": 0.9491127580404282, "step": 70 }, { "clip_fraction": 0.0, "completion_length": 2149.5729370117188, "dapo/avg_reward_std": 0.30720199798715525, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37931035356274967, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 31.666666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08114285714285714, "grad_norm": 0.02995998226106167, "kl": 2.8252601623535156e-05, "learning_rate": 3.250000000000001e-07, "loss": 0.0769, "reward": 0.5328625496476889, "reward_std": 0.9026356488466263, "step": 71 }, { "clip_fraction": 0.0, "completion_length": 1963.1562538146973, "dapo/avg_reward_std": 0.27671699684399825, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4551282163995963, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 46.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08228571428571428, "grad_norm": 0.046918418258428574, "kl": 3.359094262123108e-05, "learning_rate": 3.115363310950578e-07, "loss": 0.0368, "reward": 0.32596728252246976, "reward_std": 0.917833186686039, "step": 72 }, { "clip_fraction": 0.0, "completion_length": 2666.1666717529297, "dapo/avg_reward_std": 0.2536189202219248, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34895834140479565, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 37.84722222222222, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08342857142857144, "grad_norm": 0.0253219623118639, "kl": 3.542192280292511e-05, "learning_rate": 2.9836319343816397e-07, "loss": 0.0107, "reward": 0.6293175183236599, "reward_std": 0.935965321958065, "step": 73 }, { "clip_fraction": 0.0, "completion_length": 2119.447982788086, "dapo/avg_reward_std": 0.26048696994781495, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4200000029802322, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 47.291666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08457142857142858, "grad_norm": 0.034480538219213486, "kl": 1.7508864402770996e-05, "learning_rate": 2.854966364683872e-07, "loss": 0.0483, "reward": 0.7494360618293285, "reward_std": 0.9492424502968788, "step": 74 }, { "clip_fraction": 0.0, "completion_length": 2078.9375, "dapo/avg_reward_std": 0.2828026126932215, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3580246976128331, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 35.11904761904762, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08571428571428572, "grad_norm": 0.03545458987355232, "kl": 1.3923272490501404e-05, "learning_rate": 2.729523361034538e-07, "loss": 0.0531, "reward": 0.5464182365685701, "reward_std": 0.9530047550797462, "step": 75 }, { "clip_fraction": 0.0, "completion_length": 2342.5416564941406, "dapo/avg_reward_std": 0.21854268149896103, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3080808154561303, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 32.341269841269835, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08685714285714285, "grad_norm": 0.02881987765431404, "kl": 1.169554889202118e-05, "learning_rate": 2.6074557564105724e-07, "loss": 0.0077, "reward": 0.5642017107456923, "reward_std": 0.9335212334990501, "step": 76 }, { "clip_fraction": 0.0, "completion_length": 3205.104217529297, "dapo/avg_reward_std": 0.2153491945493789, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2777777835726738, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 23.45238095238095, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.088, "grad_norm": 0.024909108877182007, "kl": 2.2567808628082275e-05, "learning_rate": 2.488912271385139e-07, "loss": 0.0436, "reward": 0.4511043671518564, "reward_std": 0.9582105726003647, "step": 77 }, { "clip_fraction": 0.0, "completion_length": 1984.7881927490234, "dapo/avg_reward_std": 0.2325562967194451, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3703703780968984, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 46.354166666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08914285714285715, "grad_norm": 0.04120900481939316, "kl": 2.2590160369873047e-05, "learning_rate": 2.374037332934512e-07, "loss": 0.0514, "reward": 0.46765367314219475, "reward_std": 0.9171552434563637, "step": 78 }, { "clip_fraction": 0.0, "completion_length": 2322.930576324463, "dapo/avg_reward_std": 0.24565138667821884, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35416666977107525, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 49.375, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09028571428571429, "grad_norm": 0.03351881355047226, "kl": 1.6979873180389404e-05, "learning_rate": 2.2629708984760706e-07, "loss": 0.0813, "reward": 0.4460947550833225, "reward_std": 0.9485716819763184, "step": 79 }, { "clip_fraction": 0.0, "completion_length": 2418.187545776367, "dapo/avg_reward_std": 0.23119631229024945, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2929292975953131, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 37.013888888888886, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09142857142857143, "grad_norm": 0.03444164991378784, "kl": 1.9297003746032715e-05, "learning_rate": 2.1558482853517253e-07, "loss": -0.0123, "reward": 0.47735430393368006, "reward_std": 0.9275016784667969, "step": 80 }, { "clip_fraction": 0.0, "completion_length": 2673.1666870117188, "dapo/avg_reward_std": 0.29530651973826544, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39285714977553915, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 40.52083333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09257142857142857, "grad_norm": 0.02858138270676136, "kl": 1.998385414481163e-05, "learning_rate": 2.0528000059645995e-07, "loss": 0.034, "reward": 0.41152474470436573, "reward_std": 0.9514285027980804, "step": 81 }, { "clip_fraction": 0.0, "completion_length": 2257.954864501953, "dapo/avg_reward_std": 0.23162428935368856, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3277777835726738, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 39.72222222222222, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09371428571428571, "grad_norm": 0.034180980175733566, "kl": 1.03069469332695e-05, "learning_rate": 7.681643291108517e-07, "loss": 0.0478, "reward": 0.6525773257017136, "reward_std": 0.9826234132051468, "step": 82 }, { "clip_fraction": 0.0, "completion_length": 2630.8507080078125, "dapo/avg_reward_std": 0.25974711243595394, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3511904797383717, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 49.166666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09485714285714286, "grad_norm": 0.03644736111164093, "kl": 1.800060272216797e-05, "learning_rate": 7.612622032536507e-07, "loss": 0.0921, "reward": 0.4112757742404938, "reward_std": 0.9365755990147591, "step": 83 }, { "clip_fraction": 0.0, "completion_length": 2569.4896087646484, "dapo/avg_reward_std": 0.20397330891518367, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22619048080274037, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 33.541666666666664, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.096, "grad_norm": 0.027630111202597618, "kl": 9.745359420776367e-06, "learning_rate": 7.54295724882796e-07, "loss": 0.0357, "reward": 0.41497555933892727, "reward_std": 0.9506618455052376, "step": 84 }, { "clip_fraction": 0.0, "completion_length": 2213.0660400390625, "dapo/avg_reward_std": 0.2754218357224618, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334038334506, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 36.354166666666664, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09714285714285714, "grad_norm": 0.035216327756643295, "kl": 1.6536563634872437e-05, "learning_rate": 7.472670160550848e-07, "loss": 0.0527, "reward": 0.632079154253006, "reward_std": 0.9386599361896515, "step": 85 }, { "clip_fraction": 0.0, "completion_length": 2339.1215209960938, "dapo/avg_reward_std": 0.24339192857344946, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.291666673289405, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 35.3125, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09828571428571428, "grad_norm": 0.03125083073973656, "kl": 1.6085803508758545e-05, "learning_rate": 7.401782177833147e-07, "loss": -0.0221, "reward": 0.4631906310096383, "reward_std": 0.9198382347822189, "step": 86 }, { "clip_fraction": 0.0, "completion_length": 1837.8993301391602, "dapo/avg_reward_std": 0.22774873872598012, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3777777845660845, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 46.87499999999999, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09942857142857142, "grad_norm": 0.04138842225074768, "kl": 1.7467886209487915e-05, "learning_rate": 7.330314893841101e-07, "loss": 0.0024, "reward": 0.7271542213857174, "reward_std": 0.905590832233429, "step": 87 }, { "clip_fraction": 0.0, "completion_length": 2786.0416564941406, "dapo/avg_reward_std": 0.2095056755202157, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2952381019081388, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 35.65972222222222, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10057142857142858, "grad_norm": 0.025848887860774994, "kl": 7.427297532558441e-06, "learning_rate": 7.258290078201731e-07, "loss": 0.002, "reward": 0.43730420619249344, "reward_std": 0.9195110127329826, "step": 88 }, { "clip_fraction": 0.0, "completion_length": 2346.68754196167, "dapo/avg_reward_std": 0.19395678072440914, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2560975657003682, "dapo/num_sampling_attempts": 5.125, "dapo/sampling_efficiency": 35.01488095238095, "dapo/total_prompts_processed": 30.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10171428571428572, "grad_norm": 0.040970027446746826, "kl": 1.3796612620353699e-05, "learning_rate": 7.185729670371604e-07, "loss": 0.0476, "reward": 0.6351554682478309, "reward_std": 0.8568265736103058, "step": 89 }, { "clip_fraction": 0.0, "completion_length": 2486.21875, "dapo/avg_reward_std": 0.2474305311153675, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3735632284961898, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 37.61904761904762, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10285714285714286, "grad_norm": 0.030587567016482353, "kl": 1.4983117580413818e-05, "learning_rate": 7.11265577295385e-07, "loss": 0.0254, "reward": 0.6515812119469047, "reward_std": 0.9235646799206734, "step": 90 }, { "clip_fraction": 0.0, "completion_length": 2515.017402648926, "dapo/avg_reward_std": 0.25874078144197876, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3913043562484824, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 51.56249999999999, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.104, "grad_norm": 0.031289275735616684, "kl": 6.1551108956336975e-06, "learning_rate": 7.039090644965509e-07, "loss": 0.0328, "reward": 0.6403396036475897, "reward_std": 0.9428967460989952, "step": 91 }, { "clip_fraction": 0.0, "completion_length": 2979.027801513672, "dapo/avg_reward_std": 0.2504267347486396, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2543859713171658, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 35.63041125541125, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10514285714285715, "grad_norm": 0.029049718752503395, "kl": -1.2740492820739746e-06, "learning_rate": 6.965056695057204e-07, "loss": 0.0314, "reward": 0.535519327968359, "reward_std": 0.8926167041063309, "step": 92 }, { "clip_fraction": 0.0, "completion_length": 2552.562515258789, "dapo/avg_reward_std": 0.2413217886801689, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334038334506, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 34.791666666666664, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10628571428571429, "grad_norm": 0.03139115869998932, "kl": 1.3202428817749023e-05, "learning_rate": 6.890576474687263e-07, "loss": 0.067, "reward": 0.6561751328408718, "reward_std": 0.9787176623940468, "step": 93 }, { "clip_fraction": 0.0, "completion_length": 2403.184051513672, "dapo/avg_reward_std": 0.29813223962600416, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40384616129673445, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 40.416666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10742857142857143, "grad_norm": 0.032709378749132156, "kl": 2.093333750963211e-05, "learning_rate": 6.815672671252315e-07, "loss": 0.0328, "reward": 0.556912356056273, "reward_std": 0.9464646279811859, "step": 94 }, { "clip_fraction": 0.0, "completion_length": 2963.795181274414, "dapo/avg_reward_std": 0.2564438986472594, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26068376577817476, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 24.07738095238095, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10857142857142857, "grad_norm": 0.023549171164631844, "kl": 9.554903954267502e-06, "learning_rate": 6.740368101176495e-07, "loss": 0.0142, "reward": 0.3492610058747232, "reward_std": 0.8781530037522316, "step": 95 }, { "clip_fraction": 0.0, "completion_length": 2655.21875, "dapo/avg_reward_std": 0.31138683449138294, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46969697827642615, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 43.74999999999999, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10971428571428571, "grad_norm": 0.03213554993271828, "kl": 1.945020630955696e-05, "learning_rate": 6.664685702961344e-07, "loss": 0.0357, "reward": 0.4872458651661873, "reward_std": 0.9538498669862747, "step": 96 }, { "clip_fraction": 0.0, "completion_length": 2325.888900756836, "dapo/avg_reward_std": 0.18781672976911068, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2968750069849193, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.263888888888886, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11085714285714286, "grad_norm": 0.03308973088860512, "kl": 1.2524658814072609e-05, "learning_rate": 6.588648530198504e-07, "loss": 0.0332, "reward": 0.5582090672105551, "reward_std": 0.9704806208610535, "step": 97 }, { "clip_fraction": 0.0, "completion_length": 2980.78125, "dapo/avg_reward_std": 0.22120360245830134, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29824561900214147, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 34.717261904761905, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.112, "grad_norm": 0.02593560516834259, "kl": 9.87970270216465e-06, "learning_rate": 6.512279744547392e-07, "loss": 0.0537, "reward": 0.5110117536969483, "reward_std": 0.9140844419598579, "step": 98 }, { "clip_fraction": 0.0, "completion_length": 2679.701400756836, "dapo/avg_reward_std": 0.22513854503631592, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.388888892200258, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.104166666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11314285714285714, "grad_norm": 0.028198201209306717, "kl": -2.773245796561241e-06, "learning_rate": 6.435602608679916e-07, "loss": 0.0223, "reward": 0.5703150723129511, "reward_std": 0.9169064536690712, "step": 99 }, { "clip_fraction": 0.0, "completion_length": 2113.7396087646484, "dapo/avg_reward_std": 0.2158526074555185, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666724615627, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 30.823863636363633, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11428571428571428, "grad_norm": 0.032321903854608536, "kl": 2.765655517578125e-05, "learning_rate": 6.358640479194451e-07, "loss": 0.037, "reward": 0.552736995741725, "reward_std": 0.929665133357048, "step": 100 }, { "clip_fraction": 0.0, "completion_length": 2397.545135498047, "dapo/avg_reward_std": 0.2640196681022644, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41304348603538843, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 43.75, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11542857142857142, "grad_norm": 0.030507881194353104, "kl": 1.4653429388999939e-05, "learning_rate": 6.281416799501187e-07, "loss": 0.0216, "reward": 0.7607237044721842, "reward_std": 0.9413916915655136, "step": 101 }, { "clip_fraction": 0.0, "completion_length": 2775.312515258789, "dapo/avg_reward_std": 0.26319959415839267, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3910256509597485, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11657142857142858, "grad_norm": 0.028825754299759865, "kl": 1.7821788787841797e-05, "learning_rate": 6.203955092681039e-07, "loss": -0.0059, "reward": 0.4367541056126356, "reward_std": 0.9408165961503983, "step": 102 }, { "clip_fraction": 0.0, "completion_length": 2606.3194580078125, "dapo/avg_reward_std": 0.22601407093386497, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.295698931620967, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 30.624999999999993, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11771428571428572, "grad_norm": 0.029979709535837173, "kl": 2.3851171135902405e-06, "learning_rate": 6.126278954320294e-07, "loss": 0.0463, "reward": 0.6886496935039759, "reward_std": 0.9053627252578735, "step": 103 }, { "clip_fraction": 0.0, "completion_length": 2084.829849243164, "dapo/avg_reward_std": 0.22010741523794225, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2702702747003452, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 32.51488095238095, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11885714285714286, "grad_norm": 0.04769710823893547, "kl": 2.0613893866539e-05, "learning_rate": 6.048412045323164e-07, "loss": 0.1162, "reward": 0.684872523881495, "reward_std": 0.9595381543040276, "step": 104 }, { "clip_fraction": 0.0, "completion_length": 1955.1354484558105, "dapo/avg_reward_std": 0.2937169720729192, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42361111504336196, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 49.166666666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12, "grad_norm": 0.04352044314146042, "kl": 2.0936131477355957e-05, "learning_rate": 5.97037808470444e-07, "loss": -0.0017, "reward": 0.6524754576385021, "reward_std": 0.9669848829507828, "step": 105 }, { "clip_fraction": 0.0, "completion_length": 2316.0486221313477, "dapo/avg_reward_std": 0.2529407059773803, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3020833423361182, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 30.729166666666664, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12114285714285715, "grad_norm": 0.03129468858242035, "kl": 1.8656253814697266e-05, "learning_rate": 5.892200842364462e-07, "loss": -0.0284, "reward": 0.6108895651996136, "reward_std": 0.9319325312972069, "step": 106 }, { "clip_fraction": 0.0, "completion_length": 2094.6909942626953, "dapo/avg_reward_std": 0.2037892586655087, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2629629688130485, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 21.066919191919194, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12228571428571429, "grad_norm": 0.038948290050029755, "kl": 2.824072726070881e-05, "learning_rate": 5.813904131848564e-07, "loss": 0.0748, "reward": 0.48047966323792934, "reward_std": 0.9251860752701759, "step": 107 }, { "clip_fraction": 0.0, "completion_length": 2482.6146240234375, "dapo/avg_reward_std": 0.19606016278266908, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22592593100335862, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 21.577380952380953, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12342857142857143, "grad_norm": 0.027610260993242264, "kl": 1.3685785233974457e-05, "learning_rate": 5.735511803093248e-07, "loss": 0.0016, "reward": 0.46788009256124496, "reward_std": 0.9522990807890892, "step": 108 }, { "clip_fraction": 0.0, "completion_length": 3010.541717529297, "dapo/avg_reward_std": 0.23601235449314117, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38461538977347887, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 61.5530303030303, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12457142857142857, "grad_norm": 0.031469572335481644, "kl": 2.0675361156463623e-05, "learning_rate": 5.657047735161255e-07, "loss": 0.0491, "reward": 0.6003496535122395, "reward_std": 0.9582010880112648, "step": 109 }, { "clip_fraction": 0.0, "completion_length": 2550.388931274414, "dapo/avg_reward_std": 0.24275302588939668, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3222222273548444, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12571428571428572, "grad_norm": 0.03043791465461254, "kl": 1.619383692741394e-05, "learning_rate": 5.578535828967777e-07, "loss": 0.0395, "reward": 0.6210233392193913, "reward_std": 0.9545274153351784, "step": 110 }, { "clip_fraction": 0.0, "completion_length": 2248.6771240234375, "dapo/avg_reward_std": 0.2556017003953457, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32291667349636555, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 40.451388888888886, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12685714285714286, "grad_norm": 0.029558613896369934, "kl": 1.7130747437477112e-05, "learning_rate": 5.5e-07, "loss": 0.0156, "reward": 0.8898655958473682, "reward_std": 0.8961458280682564, "step": 111 }, { "clip_fraction": 0.0, "completion_length": 2790.4132537841797, "dapo/avg_reward_std": 0.2798377914088113, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35714286299688475, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 32.291666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.128, "grad_norm": 0.02665926143527031, "kl": 2.7702553779818118e-05, "learning_rate": 5.421464171032224e-07, "loss": 0.0375, "reward": 0.4765107296407223, "reward_std": 0.9586756750941277, "step": 112 }, { "clip_fraction": 0.0, "completion_length": 2058.163261413574, "dapo/avg_reward_std": 0.21719616024117722, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2850877270102501, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 36.13636363636364, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12914285714285714, "grad_norm": 0.03724399581551552, "kl": 9.129568934440613e-05, "learning_rate": 5.342952264838747e-07, "loss": 0.0308, "reward": 0.5965504869818687, "reward_std": 0.9517285376787186, "step": 113 }, { "clip_fraction": 0.0, "completion_length": 1804.7569427490234, "dapo/avg_reward_std": 0.22654692203767837, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30645161819073463, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 46.800595238095234, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13028571428571428, "grad_norm": 0.0444670133292675, "kl": 3.589317202568054e-05, "learning_rate": 5.264488196906752e-07, "loss": 0.0217, "reward": 0.4887783471494913, "reward_std": 0.9572358801960945, "step": 114 }, { "clip_fraction": 0.0, "completion_length": 2705.472236633301, "dapo/avg_reward_std": 0.24942583271435328, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4285714335384823, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13142857142857142, "grad_norm": 0.027661452069878578, "kl": 1.307763159275055e-05, "learning_rate": 5.186095868151436e-07, "loss": -0.022, "reward": 0.5754544343799353, "reward_std": 0.9811793565750122, "step": 115 }, { "clip_fraction": 0.0, "completion_length": 1660.2222213745117, "dapo/avg_reward_std": 0.20845345951415398, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30630631100487066, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 32.013888888888886, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13257142857142856, "grad_norm": 0.03922427445650101, "kl": 7.28946179151535e-06, "learning_rate": 5.107799157635538e-07, "loss": 0.0279, "reward": 0.8034113459289074, "reward_std": 0.9163173362612724, "step": 116 }, { "clip_fraction": 0.0, "completion_length": 2143.3368377685547, "dapo/avg_reward_std": 0.25861393963849105, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3456790193363472, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1337142857142857, "grad_norm": 0.0386907123029232, "kl": 2.8124195523560047e-05, "learning_rate": 5.02962191529556e-07, "loss": 0.0157, "reward": 0.5698221866041422, "reward_std": 0.9738077968358994, "step": 117 }, { "clip_fraction": 0.0, "completion_length": 2709.371551513672, "dapo/avg_reward_std": 0.17381487890731456, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26356589343658715, "dapo/num_sampling_attempts": 5.375, "dapo/sampling_efficiency": 31.522817460317455, "dapo/total_prompts_processed": 32.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13485714285714287, "grad_norm": 0.03524978086352348, "kl": 2.0368024706840515e-05, "learning_rate": 4.951587954676837e-07, "loss": 0.073, "reward": 0.5433152373880148, "reward_std": 0.9576972275972366, "step": 118 }, { "clip_fraction": 0.0, "completion_length": 2729.6458129882812, "dapo/avg_reward_std": 0.2853468172252178, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31770834140479565, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.13988095238095, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.136, "grad_norm": 0.035877469927072525, "kl": 9.79006290435791e-06, "learning_rate": 4.873721045679706e-07, "loss": 0.0223, "reward": 0.4996686838567257, "reward_std": 0.9503490626811981, "step": 119 }, { "clip_fraction": 0.0, "completion_length": 2456.458351135254, "dapo/avg_reward_std": 0.3290893492244539, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000127724239, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 40.62499999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13714285714285715, "grad_norm": 0.03583266958594322, "kl": 9.331852197647095e-06, "learning_rate": 4.79604490731896e-07, "loss": 0.0363, "reward": 0.8003920987248421, "reward_std": 0.955727644264698, "step": 120 }, { "clip_fraction": 0.0, "completion_length": 2489.1875, "dapo/avg_reward_std": 0.1615937834694272, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22222222600664412, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 37.41987179487179, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1382857142857143, "grad_norm": 0.027044769376516342, "kl": 2.0619481801986694e-05, "learning_rate": 4.7185832004988133e-07, "loss": 0.0123, "reward": 0.5692465994507074, "reward_std": 0.9356264397501945, "step": 121 }, { "clip_fraction": 0.0, "completion_length": 2946.687530517578, "dapo/avg_reward_std": 0.26767816713878084, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3452381023338863, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 33.75, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13942857142857143, "grad_norm": 0.03187067061662674, "kl": 2.1383166313171387e-05, "learning_rate": 4.641359520805548e-07, "loss": 0.0722, "reward": 0.42231168132275343, "reward_std": 0.9001481607556343, "step": 122 }, { "clip_fraction": 0.0, "completion_length": 1841.1458206176758, "dapo/avg_reward_std": 0.32384763956069945, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4000000065565109, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14057142857142857, "grad_norm": 0.03784916177392006, "kl": 4.2632222175598145e-05, "learning_rate": 4.5643973913200837e-07, "loss": 0.0367, "reward": 0.6476083844900131, "reward_std": 0.908843033015728, "step": 123 }, { "clip_fraction": 0.0, "completion_length": 2392.166702270508, "dapo/avg_reward_std": 0.26674444922085466, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3218390869683233, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 31.666666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1417142857142857, "grad_norm": 0.02941369265317917, "kl": 2.299714833498001e-05, "learning_rate": 4.4877202554526084e-07, "loss": 0.0152, "reward": 0.5824479665607214, "reward_std": 0.9478363320231438, "step": 124 }, { "clip_fraction": 0.0, "completion_length": 3125.159713745117, "dapo/avg_reward_std": 0.29309388995170593, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000049670538, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14285714285714285, "grad_norm": 0.030095171183347702, "kl": 3.2413750886917114e-05, "learning_rate": 4.4113514698014953e-07, "loss": 0.0534, "reward": 0.5003506469074637, "reward_std": 0.8919698372483253, "step": 125 }, { "clip_fraction": 0.0, "completion_length": 2462.8368377685547, "dapo/avg_reward_std": 0.2680182981491089, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3466666728258133, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 46.87499999999999, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.144, "grad_norm": 0.04286734014749527, "kl": 5.683675408363342e-05, "learning_rate": 4.3353142970386557e-07, "loss": 0.0028, "reward": 0.5951744802296162, "reward_std": 0.9584252312779427, "step": 126 }, { "clip_fraction": 0.0, "completion_length": 2443.4618225097656, "dapo/avg_reward_std": 0.19895405417833573, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2820512862541737, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 33.90376984126984, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14514285714285713, "grad_norm": 0.03486345708370209, "kl": 2.958625555038452e-05, "learning_rate": 4.2596318988235037e-07, "loss": -0.0055, "reward": 0.7111770529299974, "reward_std": 0.9570346251130104, "step": 127 }, { "clip_fraction": 0.0, "completion_length": 2227.385452270508, "dapo/avg_reward_std": 0.22934340153421676, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333333688122885, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 52.291666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1462857142857143, "grad_norm": 0.04721139743924141, "kl": 3.547314554452896e-05, "learning_rate": 4.1843273287476854e-07, "loss": 0.1085, "reward": 0.4447980001568794, "reward_std": 0.951726958155632, "step": 128 }, { "clip_fraction": 0.0, "completion_length": 2883.357681274414, "dapo/avg_reward_std": 0.4109063148498535, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.6777777880430221, "dapo/num_sampling_attempts": 1.875, "dapo/sampling_efficiency": 65.625, "dapo/total_prompts_processed": 11.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14742857142857144, "grad_norm": 0.02544778771698475, "kl": 9.082257747650146e-06, "learning_rate": 4.1094235253127374e-07, "loss": 0.046, "reward": 0.6885830331593752, "reward_std": 0.9739237055182457, "step": 129 }, { "clip_fraction": 0.0, "completion_length": 2122.795181274414, "dapo/avg_reward_std": 0.2591241377371329, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3641975356472863, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 39.70238095238095, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14857142857142858, "grad_norm": 0.03150525689125061, "kl": 3.223586827516556e-05, "learning_rate": 4.034943304942796e-07, "loss": 0.0306, "reward": 0.5525269485078752, "reward_std": 0.9417792037129402, "step": 130 }, { "clip_fraction": 0.0, "completion_length": 2306.8611450195312, "dapo/avg_reward_std": 0.3414611066209859, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3908046078065346, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 32.410714285714285, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14971428571428572, "grad_norm": 0.036385975778102875, "kl": 4.038959741592407e-05, "learning_rate": 3.9609093550344907e-07, "loss": 0.0679, "reward": 0.5595943983644247, "reward_std": 0.9294908344745636, "step": 131 }, { "clip_fraction": 0.0, "completion_length": 2100.4444694519043, "dapo/avg_reward_std": 0.22894747753938038, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34444445222616193, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 38.541666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15085714285714286, "grad_norm": 0.05820675194263458, "kl": 7.29486346244812e-05, "learning_rate": 3.8873442270461485e-07, "loss": 0.0548, "reward": 0.5259249797090888, "reward_std": 0.9095494002103806, "step": 132 }, { "clip_fraction": 0.0, "completion_length": 2399.0555725097656, "dapo/avg_reward_std": 0.2968884447346563, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4057971057684525, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 48.33333333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.152, "grad_norm": 0.03143748641014099, "kl": 1.6003847122192383e-05, "learning_rate": 3.8142703296283953e-07, "loss": 0.0154, "reward": 0.6293735019862652, "reward_std": 0.9267243668437004, "step": 133 }, { "clip_fraction": 0.0, "completion_length": 2028.9653091430664, "dapo/avg_reward_std": 0.24916886538267136, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4097222276031971, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15314285714285714, "grad_norm": 0.03667714074254036, "kl": 2.6845373213291168e-05, "learning_rate": 3.7417099217982686e-07, "loss": 0.0108, "reward": 0.6901863785460591, "reward_std": 0.9471788480877876, "step": 134 }, { "clip_fraction": 0.0, "completion_length": 2116.6493225097656, "dapo/avg_reward_std": 0.3074521411742483, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37500000638621195, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 33.035714285714285, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15428571428571428, "grad_norm": 0.04016295075416565, "kl": 4.020519554615021e-05, "learning_rate": 3.6696851061588994e-07, "loss": 0.081, "reward": 0.6064621905097738, "reward_std": 0.9165264815092087, "step": 135 }, { "clip_fraction": 0.0, "completion_length": 2051.2812728881836, "dapo/avg_reward_std": 0.20643932349754102, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2979798059571873, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 49.26136363636363, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15542857142857142, "grad_norm": 0.03907117620110512, "kl": 4.081428050994873e-05, "learning_rate": 3.5982178221668533e-07, "loss": 0.0631, "reward": 0.6007686145603657, "reward_std": 0.946811780333519, "step": 136 }, { "clip_fraction": 0.0, "completion_length": 2981.6145935058594, "dapo/avg_reward_std": 0.17673770231860025, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26190476829097387, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 33.19444444444444, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15657142857142858, "grad_norm": 0.026764124631881714, "kl": 2.1813437342643738e-05, "learning_rate": 3.5273298394491515e-07, "loss": 0.0296, "reward": 0.5422612819820642, "reward_std": 0.9660339280962944, "step": 137 }, { "clip_fraction": 0.0, "completion_length": 1996.4930725097656, "dapo/avg_reward_std": 0.2211539367834727, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35000000447034835, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15771428571428572, "grad_norm": 0.036459192633628845, "kl": 6.0535967350006104e-05, "learning_rate": 3.45704275117204e-07, "loss": 0.0473, "reward": 0.6352426074445248, "reward_std": 1.0075769945979118, "step": 138 }, { "clip_fraction": 0.0, "completion_length": 2673.013931274414, "dapo/avg_reward_std": 0.21187836019431844, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28431372738936367, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 40.347222222222214, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15885714285714286, "grad_norm": 0.027443382889032364, "kl": 4.770606756210327e-05, "learning_rate": 3.387377967463493e-07, "loss": 0.0398, "reward": 0.53852697648108, "reward_std": 0.9717471078038216, "step": 139 }, { "clip_fraction": 0.0, "completion_length": 2352.944465637207, "dapo/avg_reward_std": 0.28073156496574136, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33908046936166697, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 31.666666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16, "grad_norm": 0.03219648823142052, "kl": 1.9827857613563538e-05, "learning_rate": 3.3183567088914833e-07, "loss": 0.0502, "reward": 0.5767329391092062, "reward_std": 0.920682892203331, "step": 140 }, { "clip_fraction": 0.0, "completion_length": 2714.9097595214844, "dapo/avg_reward_std": 0.17997434735298157, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26495726979695833, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 24.82142857142857, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16114285714285714, "grad_norm": 0.03654953092336655, "kl": 2.0893290638923645e-05, "learning_rate": 3.250000000000001e-07, "loss": 0.0808, "reward": 0.7222395315766335, "reward_std": 0.9689760208129883, "step": 141 }, { "clip_fraction": 0.0, "completion_length": 1895.9965209960938, "dapo/avg_reward_std": 0.24079040033476692, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30476190788405283, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 36.67207792207792, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16228571428571428, "grad_norm": 0.05263448879122734, "kl": 8.018314838409424e-05, "learning_rate": 3.182328662904756e-07, "loss": 0.0952, "reward": 0.5266689900308847, "reward_std": 0.9142153859138489, "step": 142 }, { "clip_fraction": 0.0, "completion_length": 2619.2291717529297, "dapo/avg_reward_std": 0.2643248688790106, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34408602887584316, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 32.410714285714285, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16342857142857142, "grad_norm": 0.029158689081668854, "kl": 3.154575824737549e-05, "learning_rate": 3.115363310950578e-07, "loss": 0.0032, "reward": 0.5475870370864868, "reward_std": 0.8940814658999443, "step": 143 }, { "clip_fraction": 0.0, "completion_length": 2439.340316772461, "dapo/avg_reward_std": 0.25194550690979794, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33908046576483497, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 48.86904761904761, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16457142857142856, "grad_norm": 0.027842765673995018, "kl": 4.0609389543533325e-05, "learning_rate": 3.0491243424323783e-07, "loss": 0.0, "reward": 0.6661859937012196, "reward_std": 0.9778606072068214, "step": 144 }, { "clip_fraction": 0.0, "completion_length": 2299.4166870117188, "dapo/avg_reward_std": 0.19899881369358785, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2567567603813635, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 27.96626984126984, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1657142857142857, "grad_norm": 0.041895266622304916, "kl": 6.861239671707153e-05, "learning_rate": 2.9836319343816397e-07, "loss": 0.1109, "reward": 0.6072739865630865, "reward_std": 0.9706787243485451, "step": 145 }, { "clip_fraction": 0.0, "completion_length": 2448.3993225097656, "dapo/avg_reward_std": 0.26682727987116034, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4015151573853059, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 61.25, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16685714285714287, "grad_norm": 0.033113960176706314, "kl": 6.478279829025269e-05, "learning_rate": 2.918906036420294e-07, "loss": -0.0725, "reward": 0.7111451979726553, "reward_std": 0.9747665524482727, "step": 146 }, { "clip_fraction": 0.0, "completion_length": 2499.4132080078125, "dapo/avg_reward_std": 0.23725970940930502, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36904762791735785, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 40.972222222222214, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.168, "grad_norm": 0.03699960932135582, "kl": 5.050189793109894e-05, "learning_rate": 2.854966364683872e-07, "loss": 0.0512, "reward": 0.5902281412854791, "reward_std": 0.9745439067482948, "step": 147 }, { "clip_fraction": 0.0, "completion_length": 2606.902816772461, "dapo/avg_reward_std": 0.3174622275612571, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46212122250686993, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 50.416666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16914285714285715, "grad_norm": 0.032203614711761475, "kl": 3.288034349679947e-05, "learning_rate": 2.791832395815782e-07, "loss": 0.0183, "reward": 0.4769565463066101, "reward_std": 0.9322275221347809, "step": 148 }, { "clip_fraction": 0.0, "completion_length": 2815.8160247802734, "dapo/avg_reward_std": 0.2469456638350631, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2979798046025363, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 35.11904761904762, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1702857142857143, "grad_norm": 0.030444171279668808, "kl": 3.5978853702545166e-05, "learning_rate": 2.729523361034538e-07, "loss": 0.056, "reward": 0.6807443965226412, "reward_std": 0.9815046414732933, "step": 149 }, { "clip_fraction": 0.0, "completion_length": 2225.520866394043, "dapo/avg_reward_std": 0.19231303450134066, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2777777844005161, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 32.18749999999999, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17142857142857143, "grad_norm": 0.03868250176310539, "kl": 4.6514905989170074e-05, "learning_rate": 2.6680582402757324e-07, "loss": -0.037, "reward": 0.6887061549350619, "reward_std": 0.9610730484127998, "step": 150 }, { "clip_fraction": 0.0, "completion_length": 3103.3784790039062, "dapo/avg_reward_std": 0.20304633464132035, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31547619295971735, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 40.32738095238095, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17257142857142857, "grad_norm": 0.03259337320923805, "kl": 7.005780935287476e-05, "learning_rate": 2.6074557564105724e-07, "loss": 0.0659, "reward": 0.5518668536096811, "reward_std": 0.9462934136390686, "step": 151 }, { "clip_fraction": 0.0, "completion_length": 2488.499984741211, "dapo/avg_reward_std": 0.20882706064730883, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3177083367481828, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 39.409722222222214, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1737142857142857, "grad_norm": 0.030666321516036987, "kl": 3.533810377120972e-05, "learning_rate": 2.547734369542718e-07, "loss": 0.0437, "reward": 0.5291262120008469, "reward_std": 0.981982946395874, "step": 152 }, { "clip_fraction": 0.0, "completion_length": 2514.8507080078125, "dapo/avg_reward_std": 0.20546393813910308, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3209876600239012, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 35.93749999999999, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17485714285714285, "grad_norm": 0.028674930334091187, "kl": 7.952749729156494e-05, "learning_rate": 2.488912271385139e-07, "loss": -0.0145, "reward": 0.5828098729252815, "reward_std": 0.9706256464123726, "step": 153 }, { "clip_fraction": 0.0, "completion_length": 2717.2847290039062, "dapo/avg_reward_std": 0.25499844749768574, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36666667511065804, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 34.791666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.176, "grad_norm": 0.030772393569350243, "kl": 4.854763392359018e-05, "learning_rate": 2.4310073797187573e-07, "loss": 0.0426, "reward": 0.45278373593464494, "reward_std": 0.9311749711632729, "step": 154 }, { "clip_fraction": 0.0, "completion_length": 2762.3055725097656, "dapo/avg_reward_std": 0.29779375117758045, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3985507280930229, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 46.25, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17714285714285713, "grad_norm": 0.02795676700770855, "kl": 6.116554141044617e-05, "learning_rate": 2.374037332934512e-07, "loss": -0.017, "reward": 0.5571175646036863, "reward_std": 0.951450802385807, "step": 155 }, { "clip_fraction": 0.0, "completion_length": 2260.506950378418, "dapo/avg_reward_std": 0.19260793987740862, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.20921986375717408, "dapo/num_sampling_attempts": 5.875, "dapo/sampling_efficiency": 20.416666666666664, "dapo/total_prompts_processed": 35.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1782857142857143, "grad_norm": 0.03577401861548424, "kl": 4.409998655319214e-05, "learning_rate": 2.3180194846605364e-07, "loss": 0.0769, "reward": 0.6440617088228464, "reward_std": 0.9337564334273338, "step": 156 }, { "clip_fraction": 0.0, "completion_length": 2340.84725189209, "dapo/avg_reward_std": 0.27447891732056934, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40972222946584225, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 46.87499999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17942857142857144, "grad_norm": 0.045233093202114105, "kl": 6.485730409622192e-05, "learning_rate": 2.2629708984760706e-07, "loss": 0.0363, "reward": 0.7273098900914192, "reward_std": 0.9823846518993378, "step": 157 }, { "clip_fraction": 0.0, "completion_length": 2282.3819580078125, "dapo/avg_reward_std": 0.20623917956101268, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31140351334684774, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 23.680555555555557, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18057142857142858, "grad_norm": 0.02890234813094139, "kl": 5.996227264404297e-05, "learning_rate": 2.2089083427137329e-07, "loss": 0.0031, "reward": 0.6950137317180634, "reward_std": 0.9464666321873665, "step": 158 }, { "clip_fraction": 0.0, "completion_length": 2021.6284866333008, "dapo/avg_reward_std": 0.23576846316054062, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3198198257265864, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 26.96969696969697, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18171428571428572, "grad_norm": 0.03477742150425911, "kl": 6.712228059768677e-05, "learning_rate": 2.1558482853517253e-07, "loss": 0.0402, "reward": 0.5178025495260954, "reward_std": 0.9177478551864624, "step": 159 }, { "clip_fraction": 0.0, "completion_length": 2372.9931030273438, "dapo/avg_reward_std": 0.1955654670794805, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24206349643922986, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 22.916666666666664, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18285714285714286, "grad_norm": 0.03023899346590042, "kl": 0.00011706352233886719, "learning_rate": 2.1038068889975259e-07, "loss": -0.023, "reward": 0.5155377965420485, "reward_std": 0.9538168758153915, "step": 160 }, { "clip_fraction": 0.0, "completion_length": 2786.184097290039, "dapo/avg_reward_std": 0.22358988050152273, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3039215772467501, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 36.354166666666664, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.184, "grad_norm": 0.029065359383821487, "kl": 7.36340880393982e-05, "learning_rate": 2.0528000059645995e-07, "loss": 0.0183, "reward": 0.5675038225017488, "reward_std": 0.9294460043311119, "step": 161 }, { "clip_fraction": 0.0, "completion_length": 2661.7986183166504, "dapo/avg_reward_std": 0.23443660909129727, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3494623740834574, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 39.166666666666664, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18514285714285714, "grad_norm": 0.03428042680025101, "kl": 7.835030555725098e-05, "learning_rate": 2.0028431734436308e-07, "loss": 0.0077, "reward": 0.6459280159324408, "reward_std": 0.961892195045948, "step": 162 }, { "clip_fraction": 0.0, "completion_length": 2645.9305725097656, "dapo/avg_reward_std": 0.2903378981611003, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39855073133240576, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 54.166666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18628571428571428, "grad_norm": 0.026776015758514404, "kl": 6.175786256790161e-05, "learning_rate": 1.9539516087697517e-07, "loss": 0.0499, "reward": 0.834372952580452, "reward_std": 0.9364972710609436, "step": 163 }, { "clip_fraction": 0.0, "completion_length": 2940.7604370117188, "dapo/avg_reward_std": 0.28692422310511273, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35555556217829387, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 32.708333333333336, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18742857142857142, "grad_norm": 0.03140675649046898, "kl": 6.527453660964966e-05, "learning_rate": 1.9061402047871833e-07, "loss": 0.074, "reward": 0.41690353071317077, "reward_std": 0.9491114094853401, "step": 164 }, { "clip_fraction": 0.0, "completion_length": 2281.6284675598145, "dapo/avg_reward_std": 0.19226541501634262, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28921569007284503, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 34.285714285714285, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18857142857142858, "grad_norm": 0.044475626200437546, "kl": 6.622821092605591e-05, "learning_rate": 1.8594235253127372e-07, "loss": 0.0216, "reward": 0.5352295860648155, "reward_std": 0.9716188460588455, "step": 165 }, { "clip_fraction": 0.0, "completion_length": 2246.774314880371, "dapo/avg_reward_std": 0.21395914729048565, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2642276446993758, "dapo/num_sampling_attempts": 5.125, "dapo/sampling_efficiency": 32.51488095238095, "dapo/total_prompts_processed": 30.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18971428571428572, "grad_norm": 0.03659826144576073, "kl": 7.368624210357666e-05, "learning_rate": 1.8138158006995363e-07, "loss": 0.0485, "reward": 0.5606641564518213, "reward_std": 0.9496459811925888, "step": 166 }, { "clip_fraction": 0.0, "completion_length": 2340.156265258789, "dapo/avg_reward_std": 0.2663822333017985, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3888888974984487, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 30.32738095238095, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19085714285714286, "grad_norm": 0.03370486944913864, "kl": 0.00011890754103660583, "learning_rate": 1.7693309235023127e-07, "loss": 0.0107, "reward": 0.615155003964901, "reward_std": 0.981718622148037, "step": 167 }, { "clip_fraction": 0.0, "completion_length": 1600.381950378418, "dapo/avg_reward_std": 0.2149174999859598, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31481481964389485, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 39.30555555555556, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.192, "grad_norm": 0.040477264672517776, "kl": 4.4405460357666016e-05, "learning_rate": 1.7259824442455923e-07, "loss": 0.0183, "reward": 0.7775004804134369, "reward_std": 0.9218784719705582, "step": 168 }, { "clip_fraction": 0.0, "completion_length": 2663.3229370117188, "dapo/avg_reward_std": 0.29243687472560187, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4242424314672297, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.20833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19314285714285714, "grad_norm": 0.033447615802288055, "kl": 6.474554538726807e-05, "learning_rate": 1.6837835672960831e-07, "loss": 0.0604, "reward": 0.6684309486299753, "reward_std": 0.9398416355252266, "step": 169 }, { "clip_fraction": 0.0, "completion_length": 1823.3020782470703, "dapo/avg_reward_std": 0.19836447931624748, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24324324847878637, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 33.229166666666664, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19428571428571428, "grad_norm": 0.050460852682590485, "kl": 8.266419172286987e-05, "learning_rate": 1.6427471468404952e-07, "loss": 0.0797, "reward": 0.6385768353939056, "reward_std": 0.9705075472593307, "step": 170 }, { "clip_fraction": 0.0, "completion_length": 2620.312515258789, "dapo/avg_reward_std": 0.2494219935992185, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29901961412499933, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 30.3125, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19542857142857142, "grad_norm": 0.030058681964874268, "kl": 5.9291720390319824e-05, "learning_rate": 1.6028856829700258e-07, "loss": 0.04, "reward": 0.5667276866734028, "reward_std": 0.9310731589794159, "step": 171 }, { "clip_fraction": 0.0, "completion_length": 2728.118064880371, "dapo/avg_reward_std": 0.3154246766458858, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.47727273540063336, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.82738095238095, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19657142857142856, "grad_norm": 0.02854626253247261, "kl": 4.601478576660156e-05, "learning_rate": 1.5642113178727193e-07, "loss": -0.0071, "reward": 0.5269420258700848, "reward_std": 0.9420886114239693, "step": 172 }, { "clip_fraction": 0.0, "completion_length": 2000.6111297607422, "dapo/avg_reward_std": 0.1943835632221119, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2657657728807346, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 38.02083333333333, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1977142857142857, "grad_norm": 0.033435527235269547, "kl": 6.041303277015686e-05, "learning_rate": 1.5267358321348285e-07, "loss": -0.0116, "reward": 0.6523085497319698, "reward_std": 0.9166425243020058, "step": 173 }, { "clip_fraction": 0.0, "completion_length": 2643.138916015625, "dapo/avg_reward_std": 0.31710357325417654, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46031746977851506, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19885714285714284, "grad_norm": 0.02673209458589554, "kl": 0.00010142475366592407, "learning_rate": 1.4904706411523448e-07, "loss": 0.0252, "reward": 0.5322555489838123, "reward_std": 0.9057421013712883, "step": 174 }, { "clip_fraction": 0.0, "completion_length": 2441.3437576293945, "dapo/avg_reward_std": 0.30628569194903743, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38461538977347887, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2, "grad_norm": 0.04055117443203926, "kl": 4.247203469276428e-05, "learning_rate": 1.4554267916537495e-07, "loss": 0.0974, "reward": 0.6256343480199575, "reward_std": 0.9141717404127121, "step": 175 }, { "clip_fraction": 0.0, "completion_length": 2001.5173797607422, "dapo/avg_reward_std": 0.28915207616744504, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3817204381189039, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 29.285714285714285, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20114285714285715, "grad_norm": 0.03139885142445564, "kl": 8.495151996612549e-05, "learning_rate": 1.4216149583350755e-07, "loss": 0.0178, "reward": 0.5467482833191752, "reward_std": 0.9077746942639351, "step": 176 }, { "clip_fraction": 0.0, "completion_length": 2707.1875, "dapo/avg_reward_std": 0.2716821462943636, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3620689732247385, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 37.5, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2022857142857143, "grad_norm": 0.027195578441023827, "kl": 3.4984201192855835e-05, "learning_rate": 1.3890454406082956e-07, "loss": 0.0243, "reward": 0.4738291520625353, "reward_std": 0.9582962840795517, "step": 177 }, { "clip_fraction": 0.0, "completion_length": 2927.2534790039062, "dapo/avg_reward_std": 0.2845180779695511, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3750000127724239, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 34.49404761904761, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20342857142857143, "grad_norm": 0.0315893292427063, "kl": 9.309500455856323e-05, "learning_rate": 1.3577281594640182e-07, "loss": 0.067, "reward": 0.52550208568573, "reward_std": 0.9910342618823051, "step": 178 }, { "clip_fraction": 0.0, "completion_length": 2337.701400756836, "dapo/avg_reward_std": 0.18291032314300537, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25438597092502996, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 32.81249999999999, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20457142857142857, "grad_norm": 0.031005509197711945, "kl": 9.676814079284668e-05, "learning_rate": 1.3276726544494571e-07, "loss": 0.0165, "reward": 0.6187671273946762, "reward_std": 0.9665273353457451, "step": 179 }, { "clip_fraction": 0.0, "completion_length": 2257.3958892822266, "dapo/avg_reward_std": 0.20009312199221718, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30092593158284825, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 40.95238095238095, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2057142857142857, "grad_norm": 0.0394003801047802, "kl": 6.996467709541321e-05, "learning_rate": 1.2988880807625927e-07, "loss": 0.0627, "reward": 0.7572303153574467, "reward_std": 0.9510952234268188, "step": 180 }, { "clip_fraction": 0.0, "completion_length": 2533.9375534057617, "dapo/avg_reward_std": 0.37206994990507763, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5740740895271301, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 51.041666666666664, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20685714285714285, "grad_norm": 0.03264293819665909, "kl": 4.2844563722610474e-05, "learning_rate": 1.2713832064634125e-07, "loss": 0.0513, "reward": 0.7092031128704548, "reward_std": 1.0104939341545105, "step": 181 }, { "clip_fraction": 0.0, "completion_length": 2425.8055572509766, "dapo/avg_reward_std": 0.275991202547, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.384615390919722, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.208, "grad_norm": 0.033197954297065735, "kl": 6.585032679140568e-05, "learning_rate": 1.2451664098030743e-07, "loss": 0.0327, "reward": 0.5725661776959896, "reward_std": 0.9082557633519173, "step": 182 }, { "clip_fraction": 0.0, "completion_length": 2288.0799255371094, "dapo/avg_reward_std": 0.31956043162129144, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4318181872367859, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.083333333333336, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20914285714285713, "grad_norm": 0.0300610288977623, "kl": 9.128451347351074e-05, "learning_rate": 1.220245676671809e-07, "loss": 0.0567, "reward": 0.7111962893977761, "reward_std": 0.9172193482518196, "step": 183 }, { "clip_fraction": 0.0, "completion_length": 2212.138900756836, "dapo/avg_reward_std": 0.31106447339057924, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4066666769981384, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 40.97222222222222, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2102857142857143, "grad_norm": 0.03711786866188049, "kl": 9.056925773620605e-05, "learning_rate": 1.1966285981663407e-07, "loss": 0.0405, "reward": 0.505124656483531, "reward_std": 0.9274496361613274, "step": 184 }, { "clip_fraction": 0.0, "completion_length": 2350.8820037841797, "dapo/avg_reward_std": 0.21689824704770688, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3209876600239012, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 47.22222222222222, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21142857142857144, "grad_norm": 0.03581295162439346, "kl": 0.00011820532381534576, "learning_rate": 1.1743223682775649e-07, "loss": 0.0582, "reward": 0.6189532484859228, "reward_std": 0.92426348477602, "step": 185 }, { "clip_fraction": 0.0, "completion_length": 2414.6770629882812, "dapo/avg_reward_std": 0.26570350316263014, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333333749924937, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 41.785714285714285, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21257142857142858, "grad_norm": 0.04000677913427353, "kl": 5.166977643966675e-05, "learning_rate": 1.1533337816991931e-07, "loss": 0.0842, "reward": 0.6384202986955643, "reward_std": 0.9535242542624474, "step": 186 }, { "clip_fraction": 0.0, "completion_length": 2179.180564880371, "dapo/avg_reward_std": 0.267340756695846, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.362068974766238, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 31.25, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21371428571428572, "grad_norm": 0.03956381976604462, "kl": 7.00727105140686e-05, "learning_rate": 1.1336692317580158e-07, "loss": 0.0838, "reward": 0.6583898914977908, "reward_std": 0.9566742405295372, "step": 187 }, { "clip_fraction": 0.0, "completion_length": 2340.65975189209, "dapo/avg_reward_std": 0.19622711837291718, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31770833721384406, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 55.51136363636363, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21485714285714286, "grad_norm": 0.03709344565868378, "kl": 9.210407733917236e-05, "learning_rate": 1.1153347084664419e-07, "loss": 0.0542, "reward": 0.5126780550926924, "reward_std": 0.9266727864742279, "step": 188 }, { "clip_fraction": 0.0, "completion_length": 3183.7395782470703, "dapo/avg_reward_std": 0.19985724004303537, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.23577236293292628, "dapo/num_sampling_attempts": 5.125, "dapo/sampling_efficiency": 23.1547619047619, "dapo/total_prompts_processed": 30.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.216, "grad_norm": 0.025569448247551918, "kl": 3.505079075694084e-05, "learning_rate": 1.0983357966978745e-07, "loss": 0.0446, "reward": 0.524140851572156, "reward_std": 0.9313696026802063, "step": 189 }, { "clip_fraction": 0.0, "completion_length": 2137.0764083862305, "dapo/avg_reward_std": 0.2310014808177948, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334028720857, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 49.479166666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21714285714285714, "grad_norm": 0.049144402146339417, "kl": 0.00011414289474487305, "learning_rate": 1.0826776744855121e-07, "loss": 0.0597, "reward": 0.6003488898277283, "reward_std": 0.9967769384384155, "step": 190 }, { "clip_fraction": 0.0, "completion_length": 2711.965301513672, "dapo/avg_reward_std": 0.27090639670689903, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3388888930281003, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 42.604166666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21828571428571428, "grad_norm": 0.03207146376371384, "kl": 7.285922765731812e-05, "learning_rate": 1.068365111445064e-07, "loss": 0.0774, "reward": 0.5157463289797306, "reward_std": 0.9445067569613457, "step": 191 }, { "clip_fraction": 0.0, "completion_length": 2634.809066772461, "dapo/avg_reward_std": 0.23276896492854968, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29729730414377675, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 31.38888888888889, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21942857142857142, "grad_norm": 0.026157336309552193, "kl": 4.951097071170807e-05, "learning_rate": 1.0554024673218806e-07, "loss": 0.0183, "reward": 0.4917615167796612, "reward_std": 0.932147391140461, "step": 192 }, { "clip_fraction": 0.0, "completion_length": 2687.6562423706055, "dapo/avg_reward_std": 0.1842694640159607, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3000000034769376, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 37.20238095238095, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22057142857142858, "grad_norm": 0.036305345594882965, "kl": 5.197897553443909e-05, "learning_rate": 1.0437936906629334e-07, "loss": 0.0737, "reward": 0.8177419528365135, "reward_std": 0.9367102533578873, "step": 193 }, { "clip_fraction": 0.0, "completion_length": 2567.093780517578, "dapo/avg_reward_std": 0.2292217422615398, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30808081364992895, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 36.284722222222214, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22171428571428572, "grad_norm": 0.03788081929087639, "kl": 8.841603994369507e-05, "learning_rate": 1.0335423176140511e-07, "loss": 0.0745, "reward": 0.4994155182503164, "reward_std": 0.9395617768168449, "step": 194 }, { "clip_fraction": 0.0, "completion_length": 2132.22225189209, "dapo/avg_reward_std": 0.23152823698136113, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34408602791447795, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 46.800595238095234, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22285714285714286, "grad_norm": 0.03888849914073944, "kl": 7.880479097366333e-05, "learning_rate": 1.0246514708427701e-07, "loss": 0.0078, "reward": 0.4982965085655451, "reward_std": 0.9277759939432144, "step": 195 }, { "clip_fraction": 0.0, "completion_length": 2242.8437881469727, "dapo/avg_reward_std": 0.2252171416031687, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.27192983148913635, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 42.49999999999999, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.224, "grad_norm": 0.03532218188047409, "kl": 8.079037070274353e-05, "learning_rate": 1.017123858587145e-07, "loss": -0.0036, "reward": 0.6249313289299607, "reward_std": 0.9415610581636429, "step": 196 }, { "clip_fraction": 0.0, "completion_length": 2186.913246154785, "dapo/avg_reward_std": 0.2062954322287911, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26754386566187205, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 27.549603174603174, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22514285714285714, "grad_norm": 0.05644107237458229, "kl": 0.00012712180614471436, "learning_rate": 1.0109617738307911e-07, "loss": 0.0266, "reward": 0.6248354203999043, "reward_std": 0.9687103852629662, "step": 197 }, { "clip_fraction": 0.0, "completion_length": 2853.7430725097656, "dapo/avg_reward_std": 0.2791443226429132, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41666667277996355, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 45.3125, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22628571428571428, "grad_norm": 0.025631451979279518, "kl": 7.095187902450562e-05, "learning_rate": 1.0061670936044178e-07, "loss": 0.0195, "reward": 0.683892990462482, "reward_std": 0.9487637504935265, "step": 198 }, { "clip_fraction": 0.0, "completion_length": 2660.218780517578, "dapo/avg_reward_std": 0.24377418825259575, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3910256469478974, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 56.597222222222214, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22742857142857142, "grad_norm": 0.034018680453300476, "kl": 6.149709224700928e-05, "learning_rate": 1.002741278414069e-07, "loss": 0.0404, "reward": 0.565577644854784, "reward_std": 0.9079905152320862, "step": 199 }, { "clip_fraction": 0.0, "completion_length": 2421.875015258789, "dapo/avg_reward_std": 0.3100067762037118, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4027777823309104, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22857142857142856, "grad_norm": 0.030885452404618263, "kl": 7.659196853637695e-05, "learning_rate": 1.0006853717962393e-07, "loss": 0.0132, "reward": 0.5110834892839193, "reward_std": 0.8930082246661186, "step": 200 }, { "epoch": 0.22857142857142856, "step": 200, "total_flos": 0.0, "train_loss": 0.009447227440541611, "train_runtime": 101500.2967, "train_samples_per_second": 0.095, "train_steps_per_second": 0.002 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }