| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.22857142857142856, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2124.791679382324, | |
| "dapo/avg_reward_std": 0.28261276125907897, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42666667342185977, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.001142857142857143, | |
| "grad_norm": 0.03718917816877365, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": -0.0465, | |
| "reward": 0.6372265852987766, | |
| "reward_std": 0.9629172012209892, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2559.6631774902344, | |
| "dapo/avg_reward_std": 0.2737089714833668, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39285715403301374, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 32.291666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.002285714285714286, | |
| "grad_norm": 0.031548872590065, | |
| "kl": 0.0, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0292, | |
| "reward": 0.2883484517224133, | |
| "reward_std": 0.9225177392363548, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2259.0243072509766, | |
| "dapo/avg_reward_std": 0.30627372419392623, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40740741734151487, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 38.33333333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.0034285714285714284, | |
| "grad_norm": 0.028476394712924957, | |
| "kl": 3.738701343536377e-05, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0118, | |
| "reward": 0.5692771524190903, | |
| "reward_std": 0.9722258150577545, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2388.763916015625, | |
| "dapo/avg_reward_std": 0.2417103610932827, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34895834093913436, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 29.479166666666664, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.004571428571428572, | |
| "grad_norm": 0.03074878267943859, | |
| "kl": 3.4555792808532715e-05, | |
| "learning_rate": 3e-07, | |
| "loss": 0.0428, | |
| "reward": 0.5176859218627214, | |
| "reward_std": 0.9351213574409485, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2228.9131927490234, | |
| "dapo/avg_reward_std": 0.24784977205338016, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3494623731220922, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 34.375, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.005714285714285714, | |
| "grad_norm": 0.03052515536546707, | |
| "kl": 4.2438507080078125e-05, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0573, | |
| "reward": 0.5747799873352051, | |
| "reward_std": 0.9150463417172432, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2526.2743377685547, | |
| "dapo/avg_reward_std": 0.31032066589052026, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4772727367552844, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 39.58333333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.006857142857142857, | |
| "grad_norm": 0.031065233051776886, | |
| "kl": 6.331503391265869e-05, | |
| "learning_rate": 5e-07, | |
| "loss": 0.068, | |
| "reward": 0.49577395524829626, | |
| "reward_std": 0.9604900777339935, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2096.857650756836, | |
| "dapo/avg_reward_std": 0.30248596491637053, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.43827161303272955, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 33.33333333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.008, | |
| "grad_norm": 0.03395611792802811, | |
| "kl": 3.603100776672363e-05, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0104, | |
| "reward": 0.6337036956101656, | |
| "reward_std": 0.9339632987976074, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2080.482681274414, | |
| "dapo/avg_reward_std": 0.2619025791063905, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3489583395421505, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 27.82738095238095, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.009142857142857144, | |
| "grad_norm": 0.030713744461536407, | |
| "kl": 3.699958324432373e-05, | |
| "learning_rate": 7e-07, | |
| "loss": 0.0191, | |
| "reward": 0.5047293808311224, | |
| "reward_std": 0.9456561654806137, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2575.715316772461, | |
| "dapo/avg_reward_std": 0.26183396059533826, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4275362387947414, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 56.25, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.010285714285714285, | |
| "grad_norm": 0.02862783893942833, | |
| "kl": 3.787875175476074e-05, | |
| "learning_rate": 8e-07, | |
| "loss": 0.0251, | |
| "reward": 0.49641977716237307, | |
| "reward_std": 0.9346907436847687, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2574.7951431274414, | |
| "dapo/avg_reward_std": 0.2888991279261453, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46031746694019865, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 61.875, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.011428571428571429, | |
| "grad_norm": 0.03313002362847328, | |
| "kl": 2.9653310775756836e-05, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0131, | |
| "reward": 0.6514056231826544, | |
| "reward_std": 0.9486276879906654, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2648.3541870117188, | |
| "dapo/avg_reward_std": 0.1985154973136054, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.23333333631356556, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 22.747252747252745, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.012571428571428572, | |
| "grad_norm": 0.02842891961336136, | |
| "kl": 4.6372413635253906e-05, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0228, | |
| "reward": 0.3831507060676813, | |
| "reward_std": 0.9138674512505531, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2340.7708435058594, | |
| "dapo/avg_reward_std": 0.21896107792854308, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25000000558793545, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 29.791666666666664, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.013714285714285714, | |
| "grad_norm": 0.02896970883011818, | |
| "kl": 3.764033317565918e-05, | |
| "learning_rate": 9.997258721585931e-07, | |
| "loss": 0.0141, | |
| "reward": 0.3742078524082899, | |
| "reward_std": 0.9111683145165443, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2731.9687576293945, | |
| "dapo/avg_reward_std": 0.2593883651274222, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39506174016881873, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 43.95833333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.014857142857142857, | |
| "grad_norm": 0.028494343161582947, | |
| "kl": 4.1812658309936523e-05, | |
| "learning_rate": 9.989038226169207e-07, | |
| "loss": 0.0482, | |
| "reward": 0.37119605229236186, | |
| "reward_std": 0.9484475553035736, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2346.684066772461, | |
| "dapo/avg_reward_std": 0.2633256334247011, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3787878860126842, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 40.416666666666664, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.016, | |
| "grad_norm": 0.03419339284300804, | |
| "kl": 3.219395875930786e-05, | |
| "learning_rate": 9.975348529157229e-07, | |
| "loss": 0.0443, | |
| "reward": 0.5307169873267412, | |
| "reward_std": 0.8819384500384331, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2438.8437881469727, | |
| "dapo/avg_reward_std": 0.31698794450078693, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48412699571677614, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 49.99999999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.017142857142857144, | |
| "grad_norm": 0.03230522945523262, | |
| "kl": 3.4749507904052734e-05, | |
| "learning_rate": 9.956206309337066e-07, | |
| "loss": 0.0519, | |
| "reward": 0.6968788839876652, | |
| "reward_std": 0.9826493486762047, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2835.3125076293945, | |
| "dapo/avg_reward_std": 0.2820873036980629, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36111111876865226, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 49.375, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.018285714285714287, | |
| "grad_norm": 0.026719439774751663, | |
| "kl": 3.375113010406494e-05, | |
| "learning_rate": 9.931634888554935e-07, | |
| "loss": 0.0158, | |
| "reward": 0.4585288055241108, | |
| "reward_std": 0.9621468484401703, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2489.513870239258, | |
| "dapo/avg_reward_std": 0.24821309347947437, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35000000447034835, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 51.25, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.019428571428571427, | |
| "grad_norm": 0.030841730535030365, | |
| "kl": 3.2588839530944824e-05, | |
| "learning_rate": 9.901664203302124e-07, | |
| "loss": 0.0342, | |
| "reward": 0.4615583084523678, | |
| "reward_std": 0.8882262408733368, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2291.8854217529297, | |
| "dapo/avg_reward_std": 0.3492339625954628, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4000000149011612, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 46.87499999999999, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02057142857142857, | |
| "grad_norm": 0.4981432557106018, | |
| "kl": 4.331767559051514e-05, | |
| "learning_rate": 9.866330768241983e-07, | |
| "loss": 0.0782, | |
| "reward": 0.5650830613449216, | |
| "reward_std": 0.960162565112114, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1727.9479217529297, | |
| "dapo/avg_reward_std": 0.2201171379822951, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2863247940937678, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 27.01388888888889, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.021714285714285714, | |
| "grad_norm": 0.034473638981580734, | |
| "kl": 2.7894973754882812e-05, | |
| "learning_rate": 9.825677631722435e-07, | |
| "loss": -0.0027, | |
| "reward": 0.5283844769001007, | |
| "reward_std": 0.9302913695573807, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1848.9062576293945, | |
| "dapo/avg_reward_std": 0.2080523163983316, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3030303070942561, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 40.74404761904762, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.022857142857142857, | |
| "grad_norm": 0.03650596737861633, | |
| "kl": 2.997368574142456e-05, | |
| "learning_rate": 9.779754323328192e-07, | |
| "loss": 0.0066, | |
| "reward": 0.47246094793081284, | |
| "reward_std": 0.925552561879158, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2310.6354370117188, | |
| "dapo/avg_reward_std": 0.18431008011102676, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26250000260770323, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 32.53472222222222, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.024, | |
| "grad_norm": 0.02872428111732006, | |
| "kl": 3.707408905029297e-05, | |
| "learning_rate": 9.728616793536587e-07, | |
| "loss": 0.0041, | |
| "reward": 0.5466808546334505, | |
| "reward_std": 0.9614025354385376, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2628.4618072509766, | |
| "dapo/avg_reward_std": 0.27239492272629456, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3235294157091309, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 26.875, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.025142857142857144, | |
| "grad_norm": 0.03156612813472748, | |
| "kl": 4.024803638458252e-05, | |
| "learning_rate": 9.672327345550543e-07, | |
| "loss": 0.0396, | |
| "reward": 0.4231120813637972, | |
| "reward_std": 0.9312948659062386, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2495.7673873901367, | |
| "dapo/avg_reward_std": 0.30711027341229574, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3988095335662365, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 31.249999999999993, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.026285714285714287, | |
| "grad_norm": 0.028224533423781395, | |
| "kl": 3.413856029510498e-05, | |
| "learning_rate": 9.610954559391704e-07, | |
| "loss": 0.0195, | |
| "reward": 0.5285261562094092, | |
| "reward_std": 0.9373103529214859, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1944.9201278686523, | |
| "dapo/avg_reward_std": 0.29968351125717163, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4533333480358124, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 44.27083333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.027428571428571427, | |
| "grad_norm": 0.03633056953549385, | |
| "kl": 3.1538307666778564e-05, | |
| "learning_rate": 9.54457320834625e-07, | |
| "loss": 0.0693, | |
| "reward": 0.5397752095013857, | |
| "reward_std": 0.9495814517140388, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2616.593780517578, | |
| "dapo/avg_reward_std": 0.16712580593127124, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.19811321232678755, | |
| "dapo/num_sampling_attempts": 6.625, | |
| "dapo/sampling_efficiency": 19.166666666666664, | |
| "dapo/total_prompts_processed": 39.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 0.024344539269804955, | |
| "kl": 3.676116466522217e-05, | |
| "learning_rate": 9.473264167865171e-07, | |
| "loss": 0.0139, | |
| "reward": 0.3185653127729893, | |
| "reward_std": 0.9151088818907738, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2116.7257232666016, | |
| "dapo/avg_reward_std": 0.27600910129218265, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33908046319566926, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 44.6875, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.029714285714285714, | |
| "grad_norm": 0.031155193224549294, | |
| "kl": 3.579258918762207e-05, | |
| "learning_rate": 9.397114317029974e-07, | |
| "loss": 0.0725, | |
| "reward": 0.5197067707777023, | |
| "reward_std": 0.8911866471171379, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2148.781265258789, | |
| "dapo/avg_reward_std": 0.24896243140101432, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31666667349636557, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 22.63888888888889, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.030857142857142857, | |
| "grad_norm": 0.03762076795101166, | |
| "kl": 3.104656934738159e-05, | |
| "learning_rate": 9.316216432703916e-07, | |
| "loss": -0.0333, | |
| "reward": 0.5081147998571396, | |
| "reward_std": 0.9414060413837433, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2357.4062881469727, | |
| "dapo/avg_reward_std": 0.22747237629750194, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2990196110571132, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 34.49404761904761, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.032, | |
| "grad_norm": 0.02982812374830246, | |
| "kl": 2.621859312057495e-05, | |
| "learning_rate": 9.230669076497687e-07, | |
| "loss": 0.0231, | |
| "reward": 0.7687274925410748, | |
| "reward_std": 0.9382865354418755, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2772.941047668457, | |
| "dapo/avg_reward_std": 0.2300749086972439, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28282828854792047, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 48.482142857142854, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03314285714285714, | |
| "grad_norm": 0.030160676687955856, | |
| "kl": 2.812594175338745e-05, | |
| "learning_rate": 9.140576474687263e-07, | |
| "loss": 0.0019, | |
| "reward": 0.41888202354311943, | |
| "reward_std": 0.9044449031352997, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2038.208366394043, | |
| "dapo/avg_reward_std": 0.1657373425437183, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.21544715943859843, | |
| "dapo/num_sampling_attempts": 5.125, | |
| "dapo/sampling_efficiency": 45.71969696969697, | |
| "dapo/total_prompts_processed": 30.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03428571428571429, | |
| "grad_norm": 0.040263354778289795, | |
| "kl": 3.8951635360717773e-05, | |
| "learning_rate": 9.046048391230247e-07, | |
| "loss": 0.0158, | |
| "reward": 0.6328074131160975, | |
| "reward_std": 0.913766622543335, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2610.149299621582, | |
| "dapo/avg_reward_std": 0.24689391613006592, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39333333909511564, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 50.74404761904762, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03542857142857143, | |
| "grad_norm": 0.03027450665831566, | |
| "kl": 3.1307339668273926e-05, | |
| "learning_rate": 8.9471999940354e-07, | |
| "loss": 0.0264, | |
| "reward": 0.6263847425580025, | |
| "reward_std": 0.9919310808181763, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2505.697952270508, | |
| "dapo/avg_reward_std": 0.26817766793312564, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34946237216072695, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 33.68055555555555, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.036571428571428574, | |
| "grad_norm": 0.02961750328540802, | |
| "kl": 2.7127563953399658e-05, | |
| "learning_rate": 8.844151714648274e-07, | |
| "loss": 0.0166, | |
| "reward": 0.6057538501918316, | |
| "reward_std": 0.9584499895572662, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2879.420181274414, | |
| "dapo/avg_reward_std": 0.24957223816050422, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2824074120985137, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 35.51136363636363, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.037714285714285714, | |
| "grad_norm": 0.028292173519730568, | |
| "kl": 2.950429916381836e-05, | |
| "learning_rate": 8.737029101523929e-07, | |
| "loss": 0.032, | |
| "reward": 0.4974850555881858, | |
| "reward_std": 0.9284666180610657, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2605.826400756836, | |
| "dapo/avg_reward_std": 0.27582160755991936, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41666667101283866, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 42.70833333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.038857142857142854, | |
| "grad_norm": 0.028110038489103317, | |
| "kl": 3.172457218170166e-05, | |
| "learning_rate": 8.625962667065487e-07, | |
| "loss": 0.0358, | |
| "reward": 0.5906332535669208, | |
| "reward_std": 0.8970795348286629, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2197.09033203125, | |
| "dapo/avg_reward_std": 0.2899627904097239, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3722222303350767, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 33.035714285714285, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04, | |
| "grad_norm": 0.03307325020432472, | |
| "kl": 3.203749656677246e-05, | |
| "learning_rate": 8.511087728614862e-07, | |
| "loss": 0.024, | |
| "reward": 0.6485824584960938, | |
| "reward_std": 0.9721796959638596, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2999.3507080078125, | |
| "dapo/avg_reward_std": 0.20956570729613305, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26250000707805154, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 22.51488095238095, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04114285714285714, | |
| "grad_norm": 0.028769005089998245, | |
| "kl": 3.2588839530944824e-05, | |
| "learning_rate": 8.392544243589427e-07, | |
| "loss": 0.0619, | |
| "reward": 0.48274967167526484, | |
| "reward_std": 0.8917501345276833, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2790.3020935058594, | |
| "dapo/avg_reward_std": 0.30638546783190507, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42307692995438206, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 35.20833333333333, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04228571428571429, | |
| "grad_norm": 0.026894288137555122, | |
| "kl": 3.5509467124938965e-05, | |
| "learning_rate": 8.270476638965461e-07, | |
| "loss": 0.0283, | |
| "reward": 0.5098943561315536, | |
| "reward_std": 0.9712026715278625, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2677.1493530273438, | |
| "dapo/avg_reward_std": 0.18201035128699408, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2481481538878547, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 25.416666666666664, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04342857142857143, | |
| "grad_norm": 0.027049226686358452, | |
| "kl": 2.641230821609497e-05, | |
| "learning_rate": 8.145033635316128e-07, | |
| "loss": 0.0457, | |
| "reward": 0.507211847230792, | |
| "reward_std": 0.9677048400044441, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3130.437530517578, | |
| "dapo/avg_reward_std": 0.2055508976473528, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3137254956014016, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 26.160714285714278, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.044571428571428574, | |
| "grad_norm": 0.027378324419260025, | |
| "kl": 4.1447579860687256e-05, | |
| "learning_rate": 8.01636806561836e-07, | |
| "loss": 0.0522, | |
| "reward": 0.5557294674217701, | |
| "reward_std": 0.9394431114196777, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2026.0486297607422, | |
| "dapo/avg_reward_std": 0.20257248067193562, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666749450896, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 29.86111111111111, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 0.032405752688646317, | |
| "kl": 1.9609928131103516e-05, | |
| "learning_rate": 7.884636689049422e-07, | |
| "loss": 0.0336, | |
| "reward": 0.5694049745798111, | |
| "reward_std": 0.9232507050037384, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2640.326416015625, | |
| "dapo/avg_reward_std": 0.21237638321789828, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34343435231483344, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 29.791666666666664, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.046857142857142854, | |
| "grad_norm": 0.027951980009675026, | |
| "kl": 2.6788562536239624e-05, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.0234, | |
| "reward": 0.5206635389477015, | |
| "reward_std": 0.9366661533713341, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2681.18058013916, | |
| "dapo/avg_reward_std": 0.24859387196343521, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3218390854268238, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 35.416666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.048, | |
| "grad_norm": 0.03045503795146942, | |
| "kl": 3.679096698760986e-05, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.0237, | |
| "reward": 0.4700614605098963, | |
| "reward_std": 0.9389084428548813, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2398.7118072509766, | |
| "dapo/avg_reward_std": 0.2748411413161985, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.322580651890847, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 29.999999999999996, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04914285714285714, | |
| "grad_norm": 0.02945004403591156, | |
| "kl": 2.7336180210113525e-05, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": -0.0567, | |
| "reward": 0.6530590765178204, | |
| "reward_std": 0.929742157459259, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1968.3437805175781, | |
| "dapo/avg_reward_std": 0.20995861871374977, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2685185232096248, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 42.410714285714285, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05028571428571429, | |
| "grad_norm": 0.0354490801692009, | |
| "kl": 1.671910285949707e-05, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.0869, | |
| "reward": 0.6298563629388809, | |
| "reward_std": 0.9230287447571754, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2218.2743225097656, | |
| "dapo/avg_reward_std": 0.260509067773819, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36666667262713115, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 33.229166666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05142857142857143, | |
| "grad_norm": 0.02954520471394062, | |
| "kl": 2.514384686946869e-05, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.0031, | |
| "reward": 0.6325996220111847, | |
| "reward_std": 0.9546400979161263, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2081.1458587646484, | |
| "dapo/avg_reward_std": 0.2187695243666249, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2849462402443732, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 37.22222222222222, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.052571428571428575, | |
| "grad_norm": 0.033979643136262894, | |
| "kl": 2.872943878173828e-05, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": -0.0104, | |
| "reward": 0.5167231820523739, | |
| "reward_std": 0.9025325626134872, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2117.541702270508, | |
| "dapo/avg_reward_std": 0.18839570879936218, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26811594580826553, | |
| "dapo/num_sampling_attempts": 5.75, | |
| "dapo/sampling_efficiency": 20.441919191919194, | |
| "dapo/total_prompts_processed": 34.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.053714285714285714, | |
| "grad_norm": 0.03177877888083458, | |
| "kl": 3.078579902648926e-05, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.0077, | |
| "reward": 0.3684711689129472, | |
| "reward_std": 0.8811993673443794, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2177.4444885253906, | |
| "dapo/avg_reward_std": 0.19605370469995448, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2702702763112816, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 39.40972222222222, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.054857142857142854, | |
| "grad_norm": 0.04067355766892433, | |
| "kl": 2.4996697902679443e-05, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.0053, | |
| "reward": 0.5635924749076366, | |
| "reward_std": 0.9323460608720779, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3022.513885498047, | |
| "dapo/avg_reward_std": 0.22437315998655377, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30808081003752624, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 51.880411255411246, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.056, | |
| "grad_norm": 0.028243908658623695, | |
| "kl": 3.2588839530944824e-05, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": 0.0463, | |
| "reward": 0.5983518976718187, | |
| "reward_std": 0.97667645663023, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2369.423614501953, | |
| "dapo/avg_reward_std": 0.25065614397709185, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36538461996958804, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 51.666666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 0.03361990302801132, | |
| "kl": 2.4838373064994812e-05, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": -0.0041, | |
| "reward": 0.6849855165928602, | |
| "reward_std": 0.9522178247570992, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2274.833396911621, | |
| "dapo/avg_reward_std": 0.22345838612980312, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666745311684, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 27.132936507936506, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05828571428571429, | |
| "grad_norm": 0.031927697360515594, | |
| "kl": 1.7890706658363342e-05, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.0196, | |
| "reward": 0.8541890066117048, | |
| "reward_std": 0.9146186113357544, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2918.0799102783203, | |
| "dapo/avg_reward_std": 0.28684074508732765, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333386429425, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 41.36904761904762, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05942857142857143, | |
| "grad_norm": 0.026396779343485832, | |
| "kl": 2.3087020963430405e-05, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0343, | |
| "reward": 0.44786757230758667, | |
| "reward_std": 0.9706326127052307, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2045.833339691162, | |
| "dapo/avg_reward_std": 0.2355064716604021, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2870370431078805, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 31.354166666666664, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.060571428571428575, | |
| "grad_norm": 0.04913632944226265, | |
| "kl": 2.1755695343017578e-05, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.0387, | |
| "reward": 0.6510349959135056, | |
| "reward_std": 0.9507962614297867, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1948.9444427490234, | |
| "dapo/avg_reward_std": 0.243668794631958, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.366666671037674, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 56.5625, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.061714285714285715, | |
| "grad_norm": 0.040572620928287506, | |
| "kl": 2.1360814571380615e-05, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0417, | |
| "reward": 0.5514028863981366, | |
| "reward_std": 0.9589040726423264, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2484.541648864746, | |
| "dapo/avg_reward_std": 0.30484401606596434, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42307693224686843, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 42.18749999999999, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06285714285714286, | |
| "grad_norm": 0.0297782514244318, | |
| "kl": 2.2893771529197693e-05, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": -0.0009, | |
| "reward": 0.4546010522171855, | |
| "reward_std": 0.9696914628148079, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1533.7361297607422, | |
| "dapo/avg_reward_std": 0.2159253837484302, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29797980415098596, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 34.722222222222214, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.064, | |
| "grad_norm": 0.03312206640839577, | |
| "kl": 7.178634405136108e-06, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0108, | |
| "reward": 0.7257717102766037, | |
| "reward_std": 0.9033158496022224, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2934.4409942626953, | |
| "dapo/avg_reward_std": 0.2505974847337474, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36956522192644037, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 41.66666666666666, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06514285714285714, | |
| "grad_norm": 0.02451159618794918, | |
| "kl": 1.9356608390808105e-05, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0483, | |
| "reward": 0.5572653282433748, | |
| "reward_std": 0.9176028743386269, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1933.5243377685547, | |
| "dapo/avg_reward_std": 0.20699472725391388, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3235294174622087, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 43.50198412698413, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06628571428571428, | |
| "grad_norm": 0.04205997660756111, | |
| "kl": 2.446398138999939e-05, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.035, | |
| "reward": 0.5425214860588312, | |
| "reward_std": 0.9688811302185059, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2404.819435119629, | |
| "dapo/avg_reward_std": 0.21416518474236512, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2649572701790394, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 28.070436507936506, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06742857142857143, | |
| "grad_norm": 0.032379262149333954, | |
| "kl": 2.0030885934829712e-05, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": -0.0022, | |
| "reward": 0.5781768467277288, | |
| "reward_std": 0.9525356665253639, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2963.888931274414, | |
| "dapo/avg_reward_std": 0.32426256509054274, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42857143637679873, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 58.035714285714285, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06857142857142857, | |
| "grad_norm": 0.027211569249629974, | |
| "kl": 1.7156358808279037e-05, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0068, | |
| "reward": 0.44747511111199856, | |
| "reward_std": 0.9607158154249191, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2205.2465591430664, | |
| "dapo/avg_reward_std": 0.203433408588171, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2500000063329935, | |
| "dapo/num_sampling_attempts": 5.0, | |
| "dapo/sampling_efficiency": 38.46153846153846, | |
| "dapo/total_prompts_processed": 30.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06971428571428571, | |
| "grad_norm": 0.035166963934898376, | |
| "kl": 1.146271824836731e-05, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.0016, | |
| "reward": 0.7233948148787022, | |
| "reward_std": 0.9537224471569061, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2170.302101135254, | |
| "dapo/avg_reward_std": 0.3071755821054632, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46212121776559134, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.5, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07085714285714285, | |
| "grad_norm": 0.032445963472127914, | |
| "kl": 1.7118407413363457e-05, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0133, | |
| "reward": 0.5614959334488958, | |
| "reward_std": 0.9226407110691071, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2304.038215637207, | |
| "dapo/avg_reward_std": 0.3201758420025861, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3827160596847534, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 33.33333333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.072, | |
| "grad_norm": 0.03544686362147331, | |
| "kl": 1.1014439223799855e-05, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.0809, | |
| "reward": 0.6520206034183502, | |
| "reward_std": 0.9506091177463531, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1901.3506965637207, | |
| "dapo/avg_reward_std": 0.2710137654233862, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33950617964620944, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 38.541666666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07314285714285715, | |
| "grad_norm": 0.044119708240032196, | |
| "kl": 2.606213092803955e-05, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0059, | |
| "reward": 0.6546321045607328, | |
| "reward_std": 0.9510733336210251, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2792.0382232666016, | |
| "dapo/avg_reward_std": 0.2836403740303857, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36904762951391085, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 39.58333333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07428571428571429, | |
| "grad_norm": 0.04388947784900665, | |
| "kl": 1.2818491086363792e-05, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.0675, | |
| "reward": 0.5376700833439827, | |
| "reward_std": 0.9546815231442451, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3018.1111450195312, | |
| "dapo/avg_reward_std": 0.2566617141167323, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35000000993410746, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 29.583333333333325, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07542857142857143, | |
| "grad_norm": 0.030510403215885162, | |
| "kl": 2.337433397769928e-05, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": 0.067, | |
| "reward": 0.45654861629009247, | |
| "reward_std": 0.9348908290266991, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2246.7361183166504, | |
| "dapo/avg_reward_std": 0.17681238457963272, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2657657684506597, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 39.75198412698412, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07657142857142857, | |
| "grad_norm": 0.039485227316617966, | |
| "kl": 3.0115246772766113e-05, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": -0.0103, | |
| "reward": 0.559457328170538, | |
| "reward_std": 0.9844456240534782, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1877.3090591430664, | |
| "dapo/avg_reward_std": 0.21082516993795122, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2809523867709296, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 40.13888888888889, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07771428571428571, | |
| "grad_norm": 0.04208315163850784, | |
| "kl": 1.7916783690452576e-05, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.0055, | |
| "reward": 0.71805115416646, | |
| "reward_std": 0.9486410617828369, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2743.187484741211, | |
| "dapo/avg_reward_std": 0.3629622704842511, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5882353020064971, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 57.291666666666664, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07885714285714286, | |
| "grad_norm": 0.046305615454912186, | |
| "kl": 1.8481165170669556e-05, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.0753, | |
| "reward": 0.5533816255629063, | |
| "reward_std": 0.9835677221417427, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1971.8750610351562, | |
| "dapo/avg_reward_std": 0.290031298995018, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3958333432674408, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 50.11904761904761, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08, | |
| "grad_norm": 0.03249451890587807, | |
| "kl": 1.0361894965171814e-05, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0123, | |
| "reward": 0.7815902195870876, | |
| "reward_std": 0.9491127580404282, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2149.5729370117188, | |
| "dapo/avg_reward_std": 0.30720199798715525, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37931035356274967, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 31.666666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08114285714285714, | |
| "grad_norm": 0.02995998226106167, | |
| "kl": 2.8252601623535156e-05, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.0769, | |
| "reward": 0.5328625496476889, | |
| "reward_std": 0.9026356488466263, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1963.1562538146973, | |
| "dapo/avg_reward_std": 0.27671699684399825, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4551282163995963, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 46.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08228571428571428, | |
| "grad_norm": 0.046918418258428574, | |
| "kl": 3.359094262123108e-05, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0368, | |
| "reward": 0.32596728252246976, | |
| "reward_std": 0.917833186686039, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2666.1666717529297, | |
| "dapo/avg_reward_std": 0.2536189202219248, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34895834140479565, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 37.84722222222222, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08342857142857144, | |
| "grad_norm": 0.0253219623118639, | |
| "kl": 3.542192280292511e-05, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.0107, | |
| "reward": 0.6293175183236599, | |
| "reward_std": 0.935965321958065, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2119.447982788086, | |
| "dapo/avg_reward_std": 0.26048696994781495, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4200000029802322, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 47.291666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08457142857142858, | |
| "grad_norm": 0.034480538219213486, | |
| "kl": 1.7508864402770996e-05, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0483, | |
| "reward": 0.7494360618293285, | |
| "reward_std": 0.9492424502968788, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2078.9375, | |
| "dapo/avg_reward_std": 0.2828026126932215, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3580246976128331, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 35.11904761904762, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 0.03545458987355232, | |
| "kl": 1.3923272490501404e-05, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0531, | |
| "reward": 0.5464182365685701, | |
| "reward_std": 0.9530047550797462, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2342.5416564941406, | |
| "dapo/avg_reward_std": 0.21854268149896103, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3080808154561303, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 32.341269841269835, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08685714285714285, | |
| "grad_norm": 0.02881987765431404, | |
| "kl": 1.169554889202118e-05, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0077, | |
| "reward": 0.5642017107456923, | |
| "reward_std": 0.9335212334990501, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3205.104217529297, | |
| "dapo/avg_reward_std": 0.2153491945493789, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2777777835726738, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 23.45238095238095, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.088, | |
| "grad_norm": 0.024909108877182007, | |
| "kl": 2.2567808628082275e-05, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.0436, | |
| "reward": 0.4511043671518564, | |
| "reward_std": 0.9582105726003647, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1984.7881927490234, | |
| "dapo/avg_reward_std": 0.2325562967194451, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3703703780968984, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 46.354166666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08914285714285715, | |
| "grad_norm": 0.04120900481939316, | |
| "kl": 2.2590160369873047e-05, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": 0.0514, | |
| "reward": 0.46765367314219475, | |
| "reward_std": 0.9171552434563637, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2322.930576324463, | |
| "dapo/avg_reward_std": 0.24565138667821884, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35416666977107525, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 49.375, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09028571428571429, | |
| "grad_norm": 0.03351881355047226, | |
| "kl": 1.6979873180389404e-05, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0813, | |
| "reward": 0.4460947550833225, | |
| "reward_std": 0.9485716819763184, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2418.187545776367, | |
| "dapo/avg_reward_std": 0.23119631229024945, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2929292975953131, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 37.013888888888886, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 0.03444164991378784, | |
| "kl": 1.9297003746032715e-05, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": -0.0123, | |
| "reward": 0.47735430393368006, | |
| "reward_std": 0.9275016784667969, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2673.1666870117188, | |
| "dapo/avg_reward_std": 0.29530651973826544, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39285714977553915, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 40.52083333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09257142857142857, | |
| "grad_norm": 0.02858138270676136, | |
| "kl": 1.998385414481163e-05, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.034, | |
| "reward": 0.41152474470436573, | |
| "reward_std": 0.9514285027980804, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2257.954864501953, | |
| "dapo/avg_reward_std": 0.23162428935368856, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3277777835726738, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 39.72222222222222, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09371428571428571, | |
| "grad_norm": 0.034180980175733566, | |
| "kl": 1.03069469332695e-05, | |
| "learning_rate": 7.681643291108517e-07, | |
| "loss": 0.0478, | |
| "reward": 0.6525773257017136, | |
| "reward_std": 0.9826234132051468, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2630.8507080078125, | |
| "dapo/avg_reward_std": 0.25974711243595394, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3511904797383717, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 49.166666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09485714285714286, | |
| "grad_norm": 0.03644736111164093, | |
| "kl": 1.800060272216797e-05, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.0921, | |
| "reward": 0.4112757742404938, | |
| "reward_std": 0.9365755990147591, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2569.4896087646484, | |
| "dapo/avg_reward_std": 0.20397330891518367, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22619048080274037, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 33.541666666666664, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.096, | |
| "grad_norm": 0.027630111202597618, | |
| "kl": 9.745359420776367e-06, | |
| "learning_rate": 7.54295724882796e-07, | |
| "loss": 0.0357, | |
| "reward": 0.41497555933892727, | |
| "reward_std": 0.9506618455052376, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2213.0660400390625, | |
| "dapo/avg_reward_std": 0.2754218357224618, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334038334506, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 36.354166666666664, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09714285714285714, | |
| "grad_norm": 0.035216327756643295, | |
| "kl": 1.6536563634872437e-05, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": 0.0527, | |
| "reward": 0.632079154253006, | |
| "reward_std": 0.9386599361896515, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2339.1215209960938, | |
| "dapo/avg_reward_std": 0.24339192857344946, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.291666673289405, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 35.3125, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09828571428571428, | |
| "grad_norm": 0.03125083073973656, | |
| "kl": 1.6085803508758545e-05, | |
| "learning_rate": 7.401782177833147e-07, | |
| "loss": -0.0221, | |
| "reward": 0.4631906310096383, | |
| "reward_std": 0.9198382347822189, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1837.8993301391602, | |
| "dapo/avg_reward_std": 0.22774873872598012, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3777777845660845, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 46.87499999999999, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09942857142857142, | |
| "grad_norm": 0.04138842225074768, | |
| "kl": 1.7467886209487915e-05, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.0024, | |
| "reward": 0.7271542213857174, | |
| "reward_std": 0.905590832233429, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2786.0416564941406, | |
| "dapo/avg_reward_std": 0.2095056755202157, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2952381019081388, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 35.65972222222222, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10057142857142858, | |
| "grad_norm": 0.025848887860774994, | |
| "kl": 7.427297532558441e-06, | |
| "learning_rate": 7.258290078201731e-07, | |
| "loss": 0.002, | |
| "reward": 0.43730420619249344, | |
| "reward_std": 0.9195110127329826, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2346.68754196167, | |
| "dapo/avg_reward_std": 0.19395678072440914, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2560975657003682, | |
| "dapo/num_sampling_attempts": 5.125, | |
| "dapo/sampling_efficiency": 35.01488095238095, | |
| "dapo/total_prompts_processed": 30.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10171428571428572, | |
| "grad_norm": 0.040970027446746826, | |
| "kl": 1.3796612620353699e-05, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.0476, | |
| "reward": 0.6351554682478309, | |
| "reward_std": 0.8568265736103058, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2486.21875, | |
| "dapo/avg_reward_std": 0.2474305311153675, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3735632284961898, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 37.61904761904762, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10285714285714286, | |
| "grad_norm": 0.030587567016482353, | |
| "kl": 1.4983117580413818e-05, | |
| "learning_rate": 7.11265577295385e-07, | |
| "loss": 0.0254, | |
| "reward": 0.6515812119469047, | |
| "reward_std": 0.9235646799206734, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2515.017402648926, | |
| "dapo/avg_reward_std": 0.25874078144197876, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3913043562484824, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 51.56249999999999, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.104, | |
| "grad_norm": 0.031289275735616684, | |
| "kl": 6.1551108956336975e-06, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": 0.0328, | |
| "reward": 0.6403396036475897, | |
| "reward_std": 0.9428967460989952, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2979.027801513672, | |
| "dapo/avg_reward_std": 0.2504267347486396, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2543859713171658, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 35.63041125541125, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10514285714285715, | |
| "grad_norm": 0.029049718752503395, | |
| "kl": -1.2740492820739746e-06, | |
| "learning_rate": 6.965056695057204e-07, | |
| "loss": 0.0314, | |
| "reward": 0.535519327968359, | |
| "reward_std": 0.8926167041063309, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2552.562515258789, | |
| "dapo/avg_reward_std": 0.2413217886801689, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334038334506, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 34.791666666666664, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10628571428571429, | |
| "grad_norm": 0.03139115869998932, | |
| "kl": 1.3202428817749023e-05, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.067, | |
| "reward": 0.6561751328408718, | |
| "reward_std": 0.9787176623940468, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2403.184051513672, | |
| "dapo/avg_reward_std": 0.29813223962600416, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40384616129673445, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 40.416666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10742857142857143, | |
| "grad_norm": 0.032709378749132156, | |
| "kl": 2.093333750963211e-05, | |
| "learning_rate": 6.815672671252315e-07, | |
| "loss": 0.0328, | |
| "reward": 0.556912356056273, | |
| "reward_std": 0.9464646279811859, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2963.795181274414, | |
| "dapo/avg_reward_std": 0.2564438986472594, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26068376577817476, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 24.07738095238095, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10857142857142857, | |
| "grad_norm": 0.023549171164631844, | |
| "kl": 9.554903954267502e-06, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.0142, | |
| "reward": 0.3492610058747232, | |
| "reward_std": 0.8781530037522316, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2655.21875, | |
| "dapo/avg_reward_std": 0.31138683449138294, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46969697827642615, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 43.74999999999999, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10971428571428571, | |
| "grad_norm": 0.03213554993271828, | |
| "kl": 1.945020630955696e-05, | |
| "learning_rate": 6.664685702961344e-07, | |
| "loss": 0.0357, | |
| "reward": 0.4872458651661873, | |
| "reward_std": 0.9538498669862747, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2325.888900756836, | |
| "dapo/avg_reward_std": 0.18781672976911068, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2968750069849193, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.263888888888886, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11085714285714286, | |
| "grad_norm": 0.03308973088860512, | |
| "kl": 1.2524658814072609e-05, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": 0.0332, | |
| "reward": 0.5582090672105551, | |
| "reward_std": 0.9704806208610535, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2980.78125, | |
| "dapo/avg_reward_std": 0.22120360245830134, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29824561900214147, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 34.717261904761905, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.112, | |
| "grad_norm": 0.02593560516834259, | |
| "kl": 9.87970270216465e-06, | |
| "learning_rate": 6.512279744547392e-07, | |
| "loss": 0.0537, | |
| "reward": 0.5110117536969483, | |
| "reward_std": 0.9140844419598579, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2679.701400756836, | |
| "dapo/avg_reward_std": 0.22513854503631592, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.388888892200258, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.104166666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11314285714285714, | |
| "grad_norm": 0.028198201209306717, | |
| "kl": -2.773245796561241e-06, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": 0.0223, | |
| "reward": 0.5703150723129511, | |
| "reward_std": 0.9169064536690712, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2113.7396087646484, | |
| "dapo/avg_reward_std": 0.2158526074555185, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2916666724615627, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 30.823863636363633, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 0.032321903854608536, | |
| "kl": 2.765655517578125e-05, | |
| "learning_rate": 6.358640479194451e-07, | |
| "loss": 0.037, | |
| "reward": 0.552736995741725, | |
| "reward_std": 0.929665133357048, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2397.545135498047, | |
| "dapo/avg_reward_std": 0.2640196681022644, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41304348603538843, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 43.75, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11542857142857142, | |
| "grad_norm": 0.030507881194353104, | |
| "kl": 1.4653429388999939e-05, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.0216, | |
| "reward": 0.7607237044721842, | |
| "reward_std": 0.9413916915655136, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2775.312515258789, | |
| "dapo/avg_reward_std": 0.26319959415839267, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3910256509597485, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11657142857142858, | |
| "grad_norm": 0.028825754299759865, | |
| "kl": 1.7821788787841797e-05, | |
| "learning_rate": 6.203955092681039e-07, | |
| "loss": -0.0059, | |
| "reward": 0.4367541056126356, | |
| "reward_std": 0.9408165961503983, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2606.3194580078125, | |
| "dapo/avg_reward_std": 0.22601407093386497, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.295698931620967, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 30.624999999999993, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11771428571428572, | |
| "grad_norm": 0.029979709535837173, | |
| "kl": 2.3851171135902405e-06, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0463, | |
| "reward": 0.6886496935039759, | |
| "reward_std": 0.9053627252578735, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2084.829849243164, | |
| "dapo/avg_reward_std": 0.22010741523794225, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2702702747003452, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 32.51488095238095, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11885714285714286, | |
| "grad_norm": 0.04769710823893547, | |
| "kl": 2.0613893866539e-05, | |
| "learning_rate": 6.048412045323164e-07, | |
| "loss": 0.1162, | |
| "reward": 0.684872523881495, | |
| "reward_std": 0.9595381543040276, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1955.1354484558105, | |
| "dapo/avg_reward_std": 0.2937169720729192, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42361111504336196, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 49.166666666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12, | |
| "grad_norm": 0.04352044314146042, | |
| "kl": 2.0936131477355957e-05, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": -0.0017, | |
| "reward": 0.6524754576385021, | |
| "reward_std": 0.9669848829507828, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2316.0486221313477, | |
| "dapo/avg_reward_std": 0.2529407059773803, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3020833423361182, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 30.729166666666664, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12114285714285715, | |
| "grad_norm": 0.03129468858242035, | |
| "kl": 1.8656253814697266e-05, | |
| "learning_rate": 5.892200842364462e-07, | |
| "loss": -0.0284, | |
| "reward": 0.6108895651996136, | |
| "reward_std": 0.9319325312972069, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2094.6909942626953, | |
| "dapo/avg_reward_std": 0.2037892586655087, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2629629688130485, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 21.066919191919194, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12228571428571429, | |
| "grad_norm": 0.038948290050029755, | |
| "kl": 2.824072726070881e-05, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0748, | |
| "reward": 0.48047966323792934, | |
| "reward_std": 0.9251860752701759, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2482.6146240234375, | |
| "dapo/avg_reward_std": 0.19606016278266908, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22592593100335862, | |
| "dapo/num_sampling_attempts": 5.625, | |
| "dapo/sampling_efficiency": 21.577380952380953, | |
| "dapo/total_prompts_processed": 33.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12342857142857143, | |
| "grad_norm": 0.027610260993242264, | |
| "kl": 1.3685785233974457e-05, | |
| "learning_rate": 5.735511803093248e-07, | |
| "loss": 0.0016, | |
| "reward": 0.46788009256124496, | |
| "reward_std": 0.9522990807890892, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3010.541717529297, | |
| "dapo/avg_reward_std": 0.23601235449314117, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38461538977347887, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 61.5530303030303, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12457142857142857, | |
| "grad_norm": 0.031469572335481644, | |
| "kl": 2.0675361156463623e-05, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0491, | |
| "reward": 0.6003496535122395, | |
| "reward_std": 0.9582010880112648, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2550.388931274414, | |
| "dapo/avg_reward_std": 0.24275302588939668, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3222222273548444, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12571428571428572, | |
| "grad_norm": 0.03043791465461254, | |
| "kl": 1.619383692741394e-05, | |
| "learning_rate": 5.578535828967777e-07, | |
| "loss": 0.0395, | |
| "reward": 0.6210233392193913, | |
| "reward_std": 0.9545274153351784, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2248.6771240234375, | |
| "dapo/avg_reward_std": 0.2556017003953457, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32291667349636555, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 40.451388888888886, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12685714285714286, | |
| "grad_norm": 0.029558613896369934, | |
| "kl": 1.7130747437477112e-05, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0156, | |
| "reward": 0.8898655958473682, | |
| "reward_std": 0.8961458280682564, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2790.4132537841797, | |
| "dapo/avg_reward_std": 0.2798377914088113, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35714286299688475, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 32.291666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.128, | |
| "grad_norm": 0.02665926143527031, | |
| "kl": 2.7702553779818118e-05, | |
| "learning_rate": 5.421464171032224e-07, | |
| "loss": 0.0375, | |
| "reward": 0.4765107296407223, | |
| "reward_std": 0.9586756750941277, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2058.163261413574, | |
| "dapo/avg_reward_std": 0.21719616024117722, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2850877270102501, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 36.13636363636364, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12914285714285714, | |
| "grad_norm": 0.03724399581551552, | |
| "kl": 9.129568934440613e-05, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0308, | |
| "reward": 0.5965504869818687, | |
| "reward_std": 0.9517285376787186, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1804.7569427490234, | |
| "dapo/avg_reward_std": 0.22654692203767837, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30645161819073463, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 46.800595238095234, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13028571428571428, | |
| "grad_norm": 0.0444670133292675, | |
| "kl": 3.589317202568054e-05, | |
| "learning_rate": 5.264488196906752e-07, | |
| "loss": 0.0217, | |
| "reward": 0.4887783471494913, | |
| "reward_std": 0.9572358801960945, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2705.472236633301, | |
| "dapo/avg_reward_std": 0.24942583271435328, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4285714335384823, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13142857142857142, | |
| "grad_norm": 0.027661452069878578, | |
| "kl": 1.307763159275055e-05, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": -0.022, | |
| "reward": 0.5754544343799353, | |
| "reward_std": 0.9811793565750122, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1660.2222213745117, | |
| "dapo/avg_reward_std": 0.20845345951415398, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30630631100487066, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 32.013888888888886, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13257142857142856, | |
| "grad_norm": 0.03922427445650101, | |
| "kl": 7.28946179151535e-06, | |
| "learning_rate": 5.107799157635538e-07, | |
| "loss": 0.0279, | |
| "reward": 0.8034113459289074, | |
| "reward_std": 0.9163173362612724, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2143.3368377685547, | |
| "dapo/avg_reward_std": 0.25861393963849105, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3456790193363472, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1337142857142857, | |
| "grad_norm": 0.0386907123029232, | |
| "kl": 2.8124195523560047e-05, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.0157, | |
| "reward": 0.5698221866041422, | |
| "reward_std": 0.9738077968358994, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2709.371551513672, | |
| "dapo/avg_reward_std": 0.17381487890731456, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26356589343658715, | |
| "dapo/num_sampling_attempts": 5.375, | |
| "dapo/sampling_efficiency": 31.522817460317455, | |
| "dapo/total_prompts_processed": 32.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13485714285714287, | |
| "grad_norm": 0.03524978086352348, | |
| "kl": 2.0368024706840515e-05, | |
| "learning_rate": 4.951587954676837e-07, | |
| "loss": 0.073, | |
| "reward": 0.5433152373880148, | |
| "reward_std": 0.9576972275972366, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2729.6458129882812, | |
| "dapo/avg_reward_std": 0.2853468172252178, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31770834140479565, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 38.13988095238095, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.136, | |
| "grad_norm": 0.035877469927072525, | |
| "kl": 9.79006290435791e-06, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0223, | |
| "reward": 0.4996686838567257, | |
| "reward_std": 0.9503490626811981, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2456.458351135254, | |
| "dapo/avg_reward_std": 0.3290893492244539, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000127724239, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 40.62499999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 0.03583266958594322, | |
| "kl": 9.331852197647095e-06, | |
| "learning_rate": 4.79604490731896e-07, | |
| "loss": 0.0363, | |
| "reward": 0.8003920987248421, | |
| "reward_std": 0.955727644264698, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2489.1875, | |
| "dapo/avg_reward_std": 0.1615937834694272, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.22222222600664412, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 37.41987179487179, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1382857142857143, | |
| "grad_norm": 0.027044769376516342, | |
| "kl": 2.0619481801986694e-05, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.0123, | |
| "reward": 0.5692465994507074, | |
| "reward_std": 0.9356264397501945, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2946.687530517578, | |
| "dapo/avg_reward_std": 0.26767816713878084, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3452381023338863, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 33.75, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13942857142857143, | |
| "grad_norm": 0.03187067061662674, | |
| "kl": 2.1383166313171387e-05, | |
| "learning_rate": 4.641359520805548e-07, | |
| "loss": 0.0722, | |
| "reward": 0.42231168132275343, | |
| "reward_std": 0.9001481607556343, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1841.1458206176758, | |
| "dapo/avg_reward_std": 0.32384763956069945, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4000000065565109, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14057142857142857, | |
| "grad_norm": 0.03784916177392006, | |
| "kl": 4.2632222175598145e-05, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0367, | |
| "reward": 0.6476083844900131, | |
| "reward_std": 0.908843033015728, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2392.166702270508, | |
| "dapo/avg_reward_std": 0.26674444922085466, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3218390869683233, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 31.666666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1417142857142857, | |
| "grad_norm": 0.02941369265317917, | |
| "kl": 2.299714833498001e-05, | |
| "learning_rate": 4.4877202554526084e-07, | |
| "loss": 0.0152, | |
| "reward": 0.5824479665607214, | |
| "reward_std": 0.9478363320231438, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3125.159713745117, | |
| "dapo/avg_reward_std": 0.29309388995170593, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000049670538, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.030095171183347702, | |
| "kl": 3.2413750886917114e-05, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.0534, | |
| "reward": 0.5003506469074637, | |
| "reward_std": 0.8919698372483253, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2462.8368377685547, | |
| "dapo/avg_reward_std": 0.2680182981491089, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3466666728258133, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 46.87499999999999, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.144, | |
| "grad_norm": 0.04286734014749527, | |
| "kl": 5.683675408363342e-05, | |
| "learning_rate": 4.3353142970386557e-07, | |
| "loss": 0.0028, | |
| "reward": 0.5951744802296162, | |
| "reward_std": 0.9584252312779427, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2443.4618225097656, | |
| "dapo/avg_reward_std": 0.19895405417833573, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2820512862541737, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 33.90376984126984, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14514285714285713, | |
| "grad_norm": 0.03486345708370209, | |
| "kl": 2.958625555038452e-05, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": -0.0055, | |
| "reward": 0.7111770529299974, | |
| "reward_std": 0.9570346251130104, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2227.385452270508, | |
| "dapo/avg_reward_std": 0.22934340153421676, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333333688122885, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 52.291666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1462857142857143, | |
| "grad_norm": 0.04721139743924141, | |
| "kl": 3.547314554452896e-05, | |
| "learning_rate": 4.1843273287476854e-07, | |
| "loss": 0.1085, | |
| "reward": 0.4447980001568794, | |
| "reward_std": 0.951726958155632, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2883.357681274414, | |
| "dapo/avg_reward_std": 0.4109063148498535, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.6777777880430221, | |
| "dapo/num_sampling_attempts": 1.875, | |
| "dapo/sampling_efficiency": 65.625, | |
| "dapo/total_prompts_processed": 11.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14742857142857144, | |
| "grad_norm": 0.02544778771698475, | |
| "kl": 9.082257747650146e-06, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.046, | |
| "reward": 0.6885830331593752, | |
| "reward_std": 0.9739237055182457, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2122.795181274414, | |
| "dapo/avg_reward_std": 0.2591241377371329, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3641975356472863, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 39.70238095238095, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14857142857142858, | |
| "grad_norm": 0.03150525689125061, | |
| "kl": 3.223586827516556e-05, | |
| "learning_rate": 4.034943304942796e-07, | |
| "loss": 0.0306, | |
| "reward": 0.5525269485078752, | |
| "reward_std": 0.9417792037129402, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2306.8611450195312, | |
| "dapo/avg_reward_std": 0.3414611066209859, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3908046078065346, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 32.410714285714285, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14971428571428572, | |
| "grad_norm": 0.036385975778102875, | |
| "kl": 4.038959741592407e-05, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": 0.0679, | |
| "reward": 0.5595943983644247, | |
| "reward_std": 0.9294908344745636, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2100.4444694519043, | |
| "dapo/avg_reward_std": 0.22894747753938038, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34444445222616193, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 38.541666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15085714285714286, | |
| "grad_norm": 0.05820675194263458, | |
| "kl": 7.29486346244812e-05, | |
| "learning_rate": 3.8873442270461485e-07, | |
| "loss": 0.0548, | |
| "reward": 0.5259249797090888, | |
| "reward_std": 0.9095494002103806, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2399.0555725097656, | |
| "dapo/avg_reward_std": 0.2968884447346563, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4057971057684525, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 48.33333333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.152, | |
| "grad_norm": 0.03143748641014099, | |
| "kl": 1.6003847122192383e-05, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.0154, | |
| "reward": 0.6293735019862652, | |
| "reward_std": 0.9267243668437004, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2028.9653091430664, | |
| "dapo/avg_reward_std": 0.24916886538267136, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4097222276031971, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15314285714285714, | |
| "grad_norm": 0.03667714074254036, | |
| "kl": 2.6845373213291168e-05, | |
| "learning_rate": 3.7417099217982686e-07, | |
| "loss": 0.0108, | |
| "reward": 0.6901863785460591, | |
| "reward_std": 0.9471788480877876, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2116.6493225097656, | |
| "dapo/avg_reward_std": 0.3074521411742483, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37500000638621195, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 33.035714285714285, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15428571428571428, | |
| "grad_norm": 0.04016295075416565, | |
| "kl": 4.020519554615021e-05, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.081, | |
| "reward": 0.6064621905097738, | |
| "reward_std": 0.9165264815092087, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2051.2812728881836, | |
| "dapo/avg_reward_std": 0.20643932349754102, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2979798059571873, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 49.26136363636363, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15542857142857142, | |
| "grad_norm": 0.03907117620110512, | |
| "kl": 4.081428050994873e-05, | |
| "learning_rate": 3.5982178221668533e-07, | |
| "loss": 0.0631, | |
| "reward": 0.6007686145603657, | |
| "reward_std": 0.946811780333519, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2981.6145935058594, | |
| "dapo/avg_reward_std": 0.17673770231860025, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26190476829097387, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 33.19444444444444, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15657142857142858, | |
| "grad_norm": 0.026764124631881714, | |
| "kl": 2.1813437342643738e-05, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.0296, | |
| "reward": 0.5422612819820642, | |
| "reward_std": 0.9660339280962944, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1996.4930725097656, | |
| "dapo/avg_reward_std": 0.2211539367834727, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35000000447034835, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15771428571428572, | |
| "grad_norm": 0.036459192633628845, | |
| "kl": 6.0535967350006104e-05, | |
| "learning_rate": 3.45704275117204e-07, | |
| "loss": 0.0473, | |
| "reward": 0.6352426074445248, | |
| "reward_std": 1.0075769945979118, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2673.013931274414, | |
| "dapo/avg_reward_std": 0.21187836019431844, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28431372738936367, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 40.347222222222214, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15885714285714286, | |
| "grad_norm": 0.027443382889032364, | |
| "kl": 4.770606756210327e-05, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0398, | |
| "reward": 0.53852697648108, | |
| "reward_std": 0.9717471078038216, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2352.944465637207, | |
| "dapo/avg_reward_std": 0.28073156496574136, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33908046936166697, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 31.666666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16, | |
| "grad_norm": 0.03219648823142052, | |
| "kl": 1.9827857613563538e-05, | |
| "learning_rate": 3.3183567088914833e-07, | |
| "loss": 0.0502, | |
| "reward": 0.5767329391092062, | |
| "reward_std": 0.920682892203331, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2714.9097595214844, | |
| "dapo/avg_reward_std": 0.17997434735298157, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26495726979695833, | |
| "dapo/num_sampling_attempts": 4.875, | |
| "dapo/sampling_efficiency": 24.82142857142857, | |
| "dapo/total_prompts_processed": 29.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16114285714285714, | |
| "grad_norm": 0.03654953092336655, | |
| "kl": 2.0893290638923645e-05, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.0808, | |
| "reward": 0.7222395315766335, | |
| "reward_std": 0.9689760208129883, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1895.9965209960938, | |
| "dapo/avg_reward_std": 0.24079040033476692, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30476190788405283, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 36.67207792207792, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16228571428571428, | |
| "grad_norm": 0.05263448879122734, | |
| "kl": 8.018314838409424e-05, | |
| "learning_rate": 3.182328662904756e-07, | |
| "loss": 0.0952, | |
| "reward": 0.5266689900308847, | |
| "reward_std": 0.9142153859138489, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2619.2291717529297, | |
| "dapo/avg_reward_std": 0.2643248688790106, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34408602887584316, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 32.410714285714285, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16342857142857142, | |
| "grad_norm": 0.029158689081668854, | |
| "kl": 3.154575824737549e-05, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0032, | |
| "reward": 0.5475870370864868, | |
| "reward_std": 0.8940814658999443, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2439.340316772461, | |
| "dapo/avg_reward_std": 0.25194550690979794, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33908046576483497, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 48.86904761904761, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16457142857142856, | |
| "grad_norm": 0.027842765673995018, | |
| "kl": 4.0609389543533325e-05, | |
| "learning_rate": 3.0491243424323783e-07, | |
| "loss": 0.0, | |
| "reward": 0.6661859937012196, | |
| "reward_std": 0.9778606072068214, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2299.4166870117188, | |
| "dapo/avg_reward_std": 0.19899881369358785, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2567567603813635, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 27.96626984126984, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1657142857142857, | |
| "grad_norm": 0.041895266622304916, | |
| "kl": 6.861239671707153e-05, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.1109, | |
| "reward": 0.6072739865630865, | |
| "reward_std": 0.9706787243485451, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2448.3993225097656, | |
| "dapo/avg_reward_std": 0.26682727987116034, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4015151573853059, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 61.25, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16685714285714287, | |
| "grad_norm": 0.033113960176706314, | |
| "kl": 6.478279829025269e-05, | |
| "learning_rate": 2.918906036420294e-07, | |
| "loss": -0.0725, | |
| "reward": 0.7111451979726553, | |
| "reward_std": 0.9747665524482727, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2499.4132080078125, | |
| "dapo/avg_reward_std": 0.23725970940930502, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36904762791735785, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 40.972222222222214, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.168, | |
| "grad_norm": 0.03699960932135582, | |
| "kl": 5.050189793109894e-05, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0512, | |
| "reward": 0.5902281412854791, | |
| "reward_std": 0.9745439067482948, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2606.902816772461, | |
| "dapo/avg_reward_std": 0.3174622275612571, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46212122250686993, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 50.416666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16914285714285715, | |
| "grad_norm": 0.032203614711761475, | |
| "kl": 3.288034349679947e-05, | |
| "learning_rate": 2.791832395815782e-07, | |
| "loss": 0.0183, | |
| "reward": 0.4769565463066101, | |
| "reward_std": 0.9322275221347809, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2815.8160247802734, | |
| "dapo/avg_reward_std": 0.2469456638350631, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2979798046025363, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 35.11904761904762, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1702857142857143, | |
| "grad_norm": 0.030444171279668808, | |
| "kl": 3.5978853702545166e-05, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.056, | |
| "reward": 0.6807443965226412, | |
| "reward_std": 0.9815046414732933, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2225.520866394043, | |
| "dapo/avg_reward_std": 0.19231303450134066, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2777777844005161, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 32.18749999999999, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 0.03868250176310539, | |
| "kl": 4.6514905989170074e-05, | |
| "learning_rate": 2.6680582402757324e-07, | |
| "loss": -0.037, | |
| "reward": 0.6887061549350619, | |
| "reward_std": 0.9610730484127998, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3103.3784790039062, | |
| "dapo/avg_reward_std": 0.20304633464132035, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31547619295971735, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 40.32738095238095, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17257142857142857, | |
| "grad_norm": 0.03259337320923805, | |
| "kl": 7.005780935287476e-05, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0659, | |
| "reward": 0.5518668536096811, | |
| "reward_std": 0.9462934136390686, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2488.499984741211, | |
| "dapo/avg_reward_std": 0.20882706064730883, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3177083367481828, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 39.409722222222214, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1737142857142857, | |
| "grad_norm": 0.030666321516036987, | |
| "kl": 3.533810377120972e-05, | |
| "learning_rate": 2.547734369542718e-07, | |
| "loss": 0.0437, | |
| "reward": 0.5291262120008469, | |
| "reward_std": 0.981982946395874, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2514.8507080078125, | |
| "dapo/avg_reward_std": 0.20546393813910308, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3209876600239012, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 35.93749999999999, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17485714285714285, | |
| "grad_norm": 0.028674930334091187, | |
| "kl": 7.952749729156494e-05, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": -0.0145, | |
| "reward": 0.5828098729252815, | |
| "reward_std": 0.9706256464123726, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2717.2847290039062, | |
| "dapo/avg_reward_std": 0.25499844749768574, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36666667511065804, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 34.791666666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.176, | |
| "grad_norm": 0.030772393569350243, | |
| "kl": 4.854763392359018e-05, | |
| "learning_rate": 2.4310073797187573e-07, | |
| "loss": 0.0426, | |
| "reward": 0.45278373593464494, | |
| "reward_std": 0.9311749711632729, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2762.3055725097656, | |
| "dapo/avg_reward_std": 0.29779375117758045, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3985507280930229, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 46.25, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17714285714285713, | |
| "grad_norm": 0.02795676700770855, | |
| "kl": 6.116554141044617e-05, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": -0.017, | |
| "reward": 0.5571175646036863, | |
| "reward_std": 0.951450802385807, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2260.506950378418, | |
| "dapo/avg_reward_std": 0.19260793987740862, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.20921986375717408, | |
| "dapo/num_sampling_attempts": 5.875, | |
| "dapo/sampling_efficiency": 20.416666666666664, | |
| "dapo/total_prompts_processed": 35.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1782857142857143, | |
| "grad_norm": 0.03577401861548424, | |
| "kl": 4.409998655319214e-05, | |
| "learning_rate": 2.3180194846605364e-07, | |
| "loss": 0.0769, | |
| "reward": 0.6440617088228464, | |
| "reward_std": 0.9337564334273338, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2340.84725189209, | |
| "dapo/avg_reward_std": 0.27447891732056934, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40972222946584225, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 46.87499999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17942857142857144, | |
| "grad_norm": 0.045233093202114105, | |
| "kl": 6.485730409622192e-05, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0363, | |
| "reward": 0.7273098900914192, | |
| "reward_std": 0.9823846518993378, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2282.3819580078125, | |
| "dapo/avg_reward_std": 0.20623917956101268, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31140351334684774, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 23.680555555555557, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18057142857142858, | |
| "grad_norm": 0.02890234813094139, | |
| "kl": 5.996227264404297e-05, | |
| "learning_rate": 2.2089083427137329e-07, | |
| "loss": 0.0031, | |
| "reward": 0.6950137317180634, | |
| "reward_std": 0.9464666321873665, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2021.6284866333008, | |
| "dapo/avg_reward_std": 0.23576846316054062, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3198198257265864, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 26.96969696969697, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18171428571428572, | |
| "grad_norm": 0.03477742150425911, | |
| "kl": 6.712228059768677e-05, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0402, | |
| "reward": 0.5178025495260954, | |
| "reward_std": 0.9177478551864624, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2372.9931030273438, | |
| "dapo/avg_reward_std": 0.1955654670794805, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24206349643922986, | |
| "dapo/num_sampling_attempts": 5.25, | |
| "dapo/sampling_efficiency": 22.916666666666664, | |
| "dapo/total_prompts_processed": 31.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 0.03023899346590042, | |
| "kl": 0.00011706352233886719, | |
| "learning_rate": 2.1038068889975259e-07, | |
| "loss": -0.023, | |
| "reward": 0.5155377965420485, | |
| "reward_std": 0.9538168758153915, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2786.184097290039, | |
| "dapo/avg_reward_std": 0.22358988050152273, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3039215772467501, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 36.354166666666664, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.184, | |
| "grad_norm": 0.029065359383821487, | |
| "kl": 7.36340880393982e-05, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0183, | |
| "reward": 0.5675038225017488, | |
| "reward_std": 0.9294460043311119, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2661.7986183166504, | |
| "dapo/avg_reward_std": 0.23443660909129727, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3494623740834574, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 39.166666666666664, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18514285714285714, | |
| "grad_norm": 0.03428042680025101, | |
| "kl": 7.835030555725098e-05, | |
| "learning_rate": 2.0028431734436308e-07, | |
| "loss": 0.0077, | |
| "reward": 0.6459280159324408, | |
| "reward_std": 0.961892195045948, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2645.9305725097656, | |
| "dapo/avg_reward_std": 0.2903378981611003, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39855073133240576, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 54.166666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18628571428571428, | |
| "grad_norm": 0.026776015758514404, | |
| "kl": 6.175786256790161e-05, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.0499, | |
| "reward": 0.834372952580452, | |
| "reward_std": 0.9364972710609436, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2940.7604370117188, | |
| "dapo/avg_reward_std": 0.28692422310511273, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35555556217829387, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 32.708333333333336, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18742857142857142, | |
| "grad_norm": 0.03140675649046898, | |
| "kl": 6.527453660964966e-05, | |
| "learning_rate": 1.9061402047871833e-07, | |
| "loss": 0.074, | |
| "reward": 0.41690353071317077, | |
| "reward_std": 0.9491114094853401, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2281.6284675598145, | |
| "dapo/avg_reward_std": 0.19226541501634262, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28921569007284503, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 34.285714285714285, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18857142857142858, | |
| "grad_norm": 0.044475626200437546, | |
| "kl": 6.622821092605591e-05, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0216, | |
| "reward": 0.5352295860648155, | |
| "reward_std": 0.9716188460588455, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2246.774314880371, | |
| "dapo/avg_reward_std": 0.21395914729048565, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2642276446993758, | |
| "dapo/num_sampling_attempts": 5.125, | |
| "dapo/sampling_efficiency": 32.51488095238095, | |
| "dapo/total_prompts_processed": 30.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18971428571428572, | |
| "grad_norm": 0.03659826144576073, | |
| "kl": 7.368624210357666e-05, | |
| "learning_rate": 1.8138158006995363e-07, | |
| "loss": 0.0485, | |
| "reward": 0.5606641564518213, | |
| "reward_std": 0.9496459811925888, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2340.156265258789, | |
| "dapo/avg_reward_std": 0.2663822333017985, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3888888974984487, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 30.32738095238095, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19085714285714286, | |
| "grad_norm": 0.03370486944913864, | |
| "kl": 0.00011890754103660583, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": 0.0107, | |
| "reward": 0.615155003964901, | |
| "reward_std": 0.981718622148037, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1600.381950378418, | |
| "dapo/avg_reward_std": 0.2149174999859598, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31481481964389485, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 39.30555555555556, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.192, | |
| "grad_norm": 0.040477264672517776, | |
| "kl": 4.4405460357666016e-05, | |
| "learning_rate": 1.7259824442455923e-07, | |
| "loss": 0.0183, | |
| "reward": 0.7775004804134369, | |
| "reward_std": 0.9218784719705582, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2663.3229370117188, | |
| "dapo/avg_reward_std": 0.29243687472560187, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4242424314672297, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.20833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19314285714285714, | |
| "grad_norm": 0.033447615802288055, | |
| "kl": 6.474554538726807e-05, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": 0.0604, | |
| "reward": 0.6684309486299753, | |
| "reward_std": 0.9398416355252266, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1823.3020782470703, | |
| "dapo/avg_reward_std": 0.19836447931624748, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.24324324847878637, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 33.229166666666664, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19428571428571428, | |
| "grad_norm": 0.050460852682590485, | |
| "kl": 8.266419172286987e-05, | |
| "learning_rate": 1.6427471468404952e-07, | |
| "loss": 0.0797, | |
| "reward": 0.6385768353939056, | |
| "reward_std": 0.9705075472593307, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2620.312515258789, | |
| "dapo/avg_reward_std": 0.2494219935992185, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29901961412499933, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 30.3125, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19542857142857142, | |
| "grad_norm": 0.030058681964874268, | |
| "kl": 5.9291720390319824e-05, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": 0.04, | |
| "reward": 0.5667276866734028, | |
| "reward_std": 0.9310731589794159, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2728.118064880371, | |
| "dapo/avg_reward_std": 0.3154246766458858, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.47727273540063336, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.82738095238095, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19657142857142856, | |
| "grad_norm": 0.02854626253247261, | |
| "kl": 4.601478576660156e-05, | |
| "learning_rate": 1.5642113178727193e-07, | |
| "loss": -0.0071, | |
| "reward": 0.5269420258700848, | |
| "reward_std": 0.9420886114239693, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2000.6111297607422, | |
| "dapo/avg_reward_std": 0.1943835632221119, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2657657728807346, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 38.02083333333333, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1977142857142857, | |
| "grad_norm": 0.033435527235269547, | |
| "kl": 6.041303277015686e-05, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": -0.0116, | |
| "reward": 0.6523085497319698, | |
| "reward_std": 0.9166425243020058, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2643.138916015625, | |
| "dapo/avg_reward_std": 0.31710357325417654, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46031746977851506, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19885714285714284, | |
| "grad_norm": 0.02673209458589554, | |
| "kl": 0.00010142475366592407, | |
| "learning_rate": 1.4904706411523448e-07, | |
| "loss": 0.0252, | |
| "reward": 0.5322555489838123, | |
| "reward_std": 0.9057421013712883, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2441.3437576293945, | |
| "dapo/avg_reward_std": 0.30628569194903743, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38461538977347887, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2, | |
| "grad_norm": 0.04055117443203926, | |
| "kl": 4.247203469276428e-05, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0974, | |
| "reward": 0.6256343480199575, | |
| "reward_std": 0.9141717404127121, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2001.5173797607422, | |
| "dapo/avg_reward_std": 0.28915207616744504, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3817204381189039, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 29.285714285714285, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20114285714285715, | |
| "grad_norm": 0.03139885142445564, | |
| "kl": 8.495151996612549e-05, | |
| "learning_rate": 1.4216149583350755e-07, | |
| "loss": 0.0178, | |
| "reward": 0.5467482833191752, | |
| "reward_std": 0.9077746942639351, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2707.1875, | |
| "dapo/avg_reward_std": 0.2716821462943636, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3620689732247385, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 37.5, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2022857142857143, | |
| "grad_norm": 0.027195578441023827, | |
| "kl": 3.4984201192855835e-05, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.0243, | |
| "reward": 0.4738291520625353, | |
| "reward_std": 0.9582962840795517, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2927.2534790039062, | |
| "dapo/avg_reward_std": 0.2845180779695511, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3750000127724239, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 34.49404761904761, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20342857142857143, | |
| "grad_norm": 0.0315893292427063, | |
| "kl": 9.309500455856323e-05, | |
| "learning_rate": 1.3577281594640182e-07, | |
| "loss": 0.067, | |
| "reward": 0.52550208568573, | |
| "reward_std": 0.9910342618823051, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2337.701400756836, | |
| "dapo/avg_reward_std": 0.18291032314300537, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.25438597092502996, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 32.81249999999999, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20457142857142857, | |
| "grad_norm": 0.031005509197711945, | |
| "kl": 9.676814079284668e-05, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0165, | |
| "reward": 0.6187671273946762, | |
| "reward_std": 0.9665273353457451, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2257.3958892822266, | |
| "dapo/avg_reward_std": 0.20009312199221718, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30092593158284825, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 40.95238095238095, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2057142857142857, | |
| "grad_norm": 0.0394003801047802, | |
| "kl": 6.996467709541321e-05, | |
| "learning_rate": 1.2988880807625927e-07, | |
| "loss": 0.0627, | |
| "reward": 0.7572303153574467, | |
| "reward_std": 0.9510952234268188, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2533.9375534057617, | |
| "dapo/avg_reward_std": 0.37206994990507763, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5740740895271301, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 51.041666666666664, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20685714285714285, | |
| "grad_norm": 0.03264293819665909, | |
| "kl": 4.2844563722610474e-05, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": 0.0513, | |
| "reward": 0.7092031128704548, | |
| "reward_std": 1.0104939341545105, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2425.8055572509766, | |
| "dapo/avg_reward_std": 0.275991202547, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.384615390919722, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.208, | |
| "grad_norm": 0.033197954297065735, | |
| "kl": 6.585032679140568e-05, | |
| "learning_rate": 1.2451664098030743e-07, | |
| "loss": 0.0327, | |
| "reward": 0.5725661776959896, | |
| "reward_std": 0.9082557633519173, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2288.0799255371094, | |
| "dapo/avg_reward_std": 0.31956043162129144, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4318181872367859, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.083333333333336, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20914285714285713, | |
| "grad_norm": 0.0300610288977623, | |
| "kl": 9.128451347351074e-05, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": 0.0567, | |
| "reward": 0.7111962893977761, | |
| "reward_std": 0.9172193482518196, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2212.138900756836, | |
| "dapo/avg_reward_std": 0.31106447339057924, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4066666769981384, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 40.97222222222222, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2102857142857143, | |
| "grad_norm": 0.03711786866188049, | |
| "kl": 9.056925773620605e-05, | |
| "learning_rate": 1.1966285981663407e-07, | |
| "loss": 0.0405, | |
| "reward": 0.505124656483531, | |
| "reward_std": 0.9274496361613274, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2350.8820037841797, | |
| "dapo/avg_reward_std": 0.21689824704770688, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3209876600239012, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 47.22222222222222, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21142857142857144, | |
| "grad_norm": 0.03581295162439346, | |
| "kl": 0.00011820532381534576, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0582, | |
| "reward": 0.6189532484859228, | |
| "reward_std": 0.92426348477602, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2414.6770629882812, | |
| "dapo/avg_reward_std": 0.26570350316263014, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333333749924937, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 41.785714285714285, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21257142857142858, | |
| "grad_norm": 0.04000677913427353, | |
| "kl": 5.166977643966675e-05, | |
| "learning_rate": 1.1533337816991931e-07, | |
| "loss": 0.0842, | |
| "reward": 0.6384202986955643, | |
| "reward_std": 0.9535242542624474, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2179.180564880371, | |
| "dapo/avg_reward_std": 0.267340756695846, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.362068974766238, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 31.25, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21371428571428572, | |
| "grad_norm": 0.03956381976604462, | |
| "kl": 7.00727105140686e-05, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": 0.0838, | |
| "reward": 0.6583898914977908, | |
| "reward_std": 0.9566742405295372, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2340.65975189209, | |
| "dapo/avg_reward_std": 0.19622711837291718, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.31770833721384406, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 55.51136363636363, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21485714285714286, | |
| "grad_norm": 0.03709344565868378, | |
| "kl": 9.210407733917236e-05, | |
| "learning_rate": 1.1153347084664419e-07, | |
| "loss": 0.0542, | |
| "reward": 0.5126780550926924, | |
| "reward_std": 0.9266727864742279, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3183.7395782470703, | |
| "dapo/avg_reward_std": 0.19985724004303537, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.23577236293292628, | |
| "dapo/num_sampling_attempts": 5.125, | |
| "dapo/sampling_efficiency": 23.1547619047619, | |
| "dapo/total_prompts_processed": 30.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.216, | |
| "grad_norm": 0.025569448247551918, | |
| "kl": 3.505079075694084e-05, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0446, | |
| "reward": 0.524140851572156, | |
| "reward_std": 0.9313696026802063, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2137.0764083862305, | |
| "dapo/avg_reward_std": 0.2310014808177948, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33333334028720857, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 49.479166666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21714285714285714, | |
| "grad_norm": 0.049144402146339417, | |
| "kl": 0.00011414289474487305, | |
| "learning_rate": 1.0826776744855121e-07, | |
| "loss": 0.0597, | |
| "reward": 0.6003488898277283, | |
| "reward_std": 0.9967769384384155, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2711.965301513672, | |
| "dapo/avg_reward_std": 0.27090639670689903, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3388888930281003, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 42.604166666666664, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21828571428571428, | |
| "grad_norm": 0.03207146376371384, | |
| "kl": 7.285922765731812e-05, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": 0.0774, | |
| "reward": 0.5157463289797306, | |
| "reward_std": 0.9445067569613457, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2634.809066772461, | |
| "dapo/avg_reward_std": 0.23276896492854968, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.29729730414377675, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 31.38888888888889, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21942857142857142, | |
| "grad_norm": 0.026157336309552193, | |
| "kl": 4.951097071170807e-05, | |
| "learning_rate": 1.0554024673218806e-07, | |
| "loss": 0.0183, | |
| "reward": 0.4917615167796612, | |
| "reward_std": 0.932147391140461, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2687.6562423706055, | |
| "dapo/avg_reward_std": 0.1842694640159607, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3000000034769376, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 37.20238095238095, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22057142857142858, | |
| "grad_norm": 0.036305345594882965, | |
| "kl": 5.197897553443909e-05, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": 0.0737, | |
| "reward": 0.8177419528365135, | |
| "reward_std": 0.9367102533578873, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2567.093780517578, | |
| "dapo/avg_reward_std": 0.2292217422615398, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30808081364992895, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 36.284722222222214, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22171428571428572, | |
| "grad_norm": 0.03788081929087639, | |
| "kl": 8.841603994369507e-05, | |
| "learning_rate": 1.0335423176140511e-07, | |
| "loss": 0.0745, | |
| "reward": 0.4994155182503164, | |
| "reward_std": 0.9395617768168449, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2132.22225189209, | |
| "dapo/avg_reward_std": 0.23152823698136113, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34408602791447795, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 46.800595238095234, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22285714285714286, | |
| "grad_norm": 0.03888849914073944, | |
| "kl": 7.880479097366333e-05, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": 0.0078, | |
| "reward": 0.4982965085655451, | |
| "reward_std": 0.9277759939432144, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2242.8437881469727, | |
| "dapo/avg_reward_std": 0.2252171416031687, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.27192983148913635, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 42.49999999999999, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.224, | |
| "grad_norm": 0.03532218188047409, | |
| "kl": 8.079037070274353e-05, | |
| "learning_rate": 1.017123858587145e-07, | |
| "loss": -0.0036, | |
| "reward": 0.6249313289299607, | |
| "reward_std": 0.9415610581636429, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2186.913246154785, | |
| "dapo/avg_reward_std": 0.2062954322287911, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26754386566187205, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 27.549603174603174, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22514285714285714, | |
| "grad_norm": 0.05644107237458229, | |
| "kl": 0.00012712180614471436, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.0266, | |
| "reward": 0.6248354203999043, | |
| "reward_std": 0.9687103852629662, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2853.7430725097656, | |
| "dapo/avg_reward_std": 0.2791443226429132, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41666667277996355, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 45.3125, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22628571428571428, | |
| "grad_norm": 0.025631451979279518, | |
| "kl": 7.095187902450562e-05, | |
| "learning_rate": 1.0061670936044178e-07, | |
| "loss": 0.0195, | |
| "reward": 0.683892990462482, | |
| "reward_std": 0.9487637504935265, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2660.218780517578, | |
| "dapo/avg_reward_std": 0.24377418825259575, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3910256469478974, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 56.597222222222214, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22742857142857142, | |
| "grad_norm": 0.034018680453300476, | |
| "kl": 6.149709224700928e-05, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0404, | |
| "reward": 0.565577644854784, | |
| "reward_std": 0.9079905152320862, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2421.875015258789, | |
| "dapo/avg_reward_std": 0.3100067762037118, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4027777823309104, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.030885452404618263, | |
| "kl": 7.659196853637695e-05, | |
| "learning_rate": 1.0006853717962393e-07, | |
| "loss": 0.0132, | |
| "reward": 0.5110834892839193, | |
| "reward_std": 0.8930082246661186, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "step": 200, | |
| "total_flos": 0.0, | |
| "train_loss": 0.009447227440541611, | |
| "train_runtime": 101500.2967, | |
| "train_samples_per_second": 0.095, | |
| "train_steps_per_second": 0.002 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |