DAPO-8B / trainer_state.json
kangdawei's picture
Model save
adc2c55 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.22857142857142856,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_fraction": 0.0,
"completion_length": 2124.791679382324,
"dapo/avg_reward_std": 0.28261276125907897,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42666667342185977,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.001142857142857143,
"grad_norm": 0.03718917816877365,
"kl": 0.0,
"learning_rate": 0.0,
"loss": -0.0465,
"reward": 0.6372265852987766,
"reward_std": 0.9629172012209892,
"step": 1
},
{
"clip_fraction": 0.0,
"completion_length": 2559.6631774902344,
"dapo/avg_reward_std": 0.2737089714833668,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39285715403301374,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 32.291666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.002285714285714286,
"grad_norm": 0.031548872590065,
"kl": 0.0,
"learning_rate": 1e-07,
"loss": 0.0292,
"reward": 0.2883484517224133,
"reward_std": 0.9225177392363548,
"step": 2
},
{
"clip_fraction": 0.0,
"completion_length": 2259.0243072509766,
"dapo/avg_reward_std": 0.30627372419392623,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40740741734151487,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 38.33333333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.0034285714285714284,
"grad_norm": 0.028476394712924957,
"kl": 3.738701343536377e-05,
"learning_rate": 2e-07,
"loss": 0.0118,
"reward": 0.5692771524190903,
"reward_std": 0.9722258150577545,
"step": 3
},
{
"clip_fraction": 0.0,
"completion_length": 2388.763916015625,
"dapo/avg_reward_std": 0.2417103610932827,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34895834093913436,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 29.479166666666664,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.004571428571428572,
"grad_norm": 0.03074878267943859,
"kl": 3.4555792808532715e-05,
"learning_rate": 3e-07,
"loss": 0.0428,
"reward": 0.5176859218627214,
"reward_std": 0.9351213574409485,
"step": 4
},
{
"clip_fraction": 0.0,
"completion_length": 2228.9131927490234,
"dapo/avg_reward_std": 0.24784977205338016,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3494623731220922,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 34.375,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.005714285714285714,
"grad_norm": 0.03052515536546707,
"kl": 4.2438507080078125e-05,
"learning_rate": 4e-07,
"loss": 0.0573,
"reward": 0.5747799873352051,
"reward_std": 0.9150463417172432,
"step": 5
},
{
"clip_fraction": 0.0,
"completion_length": 2526.2743377685547,
"dapo/avg_reward_std": 0.31032066589052026,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4772727367552844,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 39.58333333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.006857142857142857,
"grad_norm": 0.031065233051776886,
"kl": 6.331503391265869e-05,
"learning_rate": 5e-07,
"loss": 0.068,
"reward": 0.49577395524829626,
"reward_std": 0.9604900777339935,
"step": 6
},
{
"clip_fraction": 0.0,
"completion_length": 2096.857650756836,
"dapo/avg_reward_std": 0.30248596491637053,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.43827161303272955,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 33.33333333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.008,
"grad_norm": 0.03395611792802811,
"kl": 3.603100776672363e-05,
"learning_rate": 6e-07,
"loss": 0.0104,
"reward": 0.6337036956101656,
"reward_std": 0.9339632987976074,
"step": 7
},
{
"clip_fraction": 0.0,
"completion_length": 2080.482681274414,
"dapo/avg_reward_std": 0.2619025791063905,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3489583395421505,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 27.82738095238095,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.009142857142857144,
"grad_norm": 0.030713744461536407,
"kl": 3.699958324432373e-05,
"learning_rate": 7e-07,
"loss": 0.0191,
"reward": 0.5047293808311224,
"reward_std": 0.9456561654806137,
"step": 8
},
{
"clip_fraction": 0.0,
"completion_length": 2575.715316772461,
"dapo/avg_reward_std": 0.26183396059533826,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4275362387947414,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 56.25,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.010285714285714285,
"grad_norm": 0.02862783893942833,
"kl": 3.787875175476074e-05,
"learning_rate": 8e-07,
"loss": 0.0251,
"reward": 0.49641977716237307,
"reward_std": 0.9346907436847687,
"step": 9
},
{
"clip_fraction": 0.0,
"completion_length": 2574.7951431274414,
"dapo/avg_reward_std": 0.2888991279261453,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46031746694019865,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 61.875,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.011428571428571429,
"grad_norm": 0.03313002362847328,
"kl": 2.9653310775756836e-05,
"learning_rate": 9e-07,
"loss": 0.0131,
"reward": 0.6514056231826544,
"reward_std": 0.9486276879906654,
"step": 10
},
{
"clip_fraction": 0.0,
"completion_length": 2648.3541870117188,
"dapo/avg_reward_std": 0.1985154973136054,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.23333333631356556,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 22.747252747252745,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.012571428571428572,
"grad_norm": 0.02842891961336136,
"kl": 4.6372413635253906e-05,
"learning_rate": 1e-06,
"loss": 0.0228,
"reward": 0.3831507060676813,
"reward_std": 0.9138674512505531,
"step": 11
},
{
"clip_fraction": 0.0,
"completion_length": 2340.7708435058594,
"dapo/avg_reward_std": 0.21896107792854308,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25000000558793545,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 29.791666666666664,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.013714285714285714,
"grad_norm": 0.02896970883011818,
"kl": 3.764033317565918e-05,
"learning_rate": 9.997258721585931e-07,
"loss": 0.0141,
"reward": 0.3742078524082899,
"reward_std": 0.9111683145165443,
"step": 12
},
{
"clip_fraction": 0.0,
"completion_length": 2731.9687576293945,
"dapo/avg_reward_std": 0.2593883651274222,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39506174016881873,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 43.95833333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.014857142857142857,
"grad_norm": 0.028494343161582947,
"kl": 4.1812658309936523e-05,
"learning_rate": 9.989038226169207e-07,
"loss": 0.0482,
"reward": 0.37119605229236186,
"reward_std": 0.9484475553035736,
"step": 13
},
{
"clip_fraction": 0.0,
"completion_length": 2346.684066772461,
"dapo/avg_reward_std": 0.2633256334247011,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3787878860126842,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 40.416666666666664,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.016,
"grad_norm": 0.03419339284300804,
"kl": 3.219395875930786e-05,
"learning_rate": 9.975348529157229e-07,
"loss": 0.0443,
"reward": 0.5307169873267412,
"reward_std": 0.8819384500384331,
"step": 14
},
{
"clip_fraction": 0.0,
"completion_length": 2438.8437881469727,
"dapo/avg_reward_std": 0.31698794450078693,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48412699571677614,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 49.99999999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.017142857142857144,
"grad_norm": 0.03230522945523262,
"kl": 3.4749507904052734e-05,
"learning_rate": 9.956206309337066e-07,
"loss": 0.0519,
"reward": 0.6968788839876652,
"reward_std": 0.9826493486762047,
"step": 15
},
{
"clip_fraction": 0.0,
"completion_length": 2835.3125076293945,
"dapo/avg_reward_std": 0.2820873036980629,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36111111876865226,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 49.375,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.018285714285714287,
"grad_norm": 0.026719439774751663,
"kl": 3.375113010406494e-05,
"learning_rate": 9.931634888554935e-07,
"loss": 0.0158,
"reward": 0.4585288055241108,
"reward_std": 0.9621468484401703,
"step": 16
},
{
"clip_fraction": 0.0,
"completion_length": 2489.513870239258,
"dapo/avg_reward_std": 0.24821309347947437,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35000000447034835,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 51.25,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.019428571428571427,
"grad_norm": 0.030841730535030365,
"kl": 3.2588839530944824e-05,
"learning_rate": 9.901664203302124e-07,
"loss": 0.0342,
"reward": 0.4615583084523678,
"reward_std": 0.8882262408733368,
"step": 17
},
{
"clip_fraction": 0.0,
"completion_length": 2291.8854217529297,
"dapo/avg_reward_std": 0.3492339625954628,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4000000149011612,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 46.87499999999999,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02057142857142857,
"grad_norm": 0.4981432557106018,
"kl": 4.331767559051514e-05,
"learning_rate": 9.866330768241983e-07,
"loss": 0.0782,
"reward": 0.5650830613449216,
"reward_std": 0.960162565112114,
"step": 18
},
{
"clip_fraction": 0.0,
"completion_length": 1727.9479217529297,
"dapo/avg_reward_std": 0.2201171379822951,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2863247940937678,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 27.01388888888889,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.021714285714285714,
"grad_norm": 0.034473638981580734,
"kl": 2.7894973754882812e-05,
"learning_rate": 9.825677631722435e-07,
"loss": -0.0027,
"reward": 0.5283844769001007,
"reward_std": 0.9302913695573807,
"step": 19
},
{
"clip_fraction": 0.0,
"completion_length": 1848.9062576293945,
"dapo/avg_reward_std": 0.2080523163983316,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3030303070942561,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 40.74404761904762,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.022857142857142857,
"grad_norm": 0.03650596737861633,
"kl": 2.997368574142456e-05,
"learning_rate": 9.779754323328192e-07,
"loss": 0.0066,
"reward": 0.47246094793081284,
"reward_std": 0.925552561879158,
"step": 20
},
{
"clip_fraction": 0.0,
"completion_length": 2310.6354370117188,
"dapo/avg_reward_std": 0.18431008011102676,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26250000260770323,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 32.53472222222222,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.024,
"grad_norm": 0.02872428111732006,
"kl": 3.707408905029297e-05,
"learning_rate": 9.728616793536587e-07,
"loss": 0.0041,
"reward": 0.5466808546334505,
"reward_std": 0.9614025354385376,
"step": 21
},
{
"clip_fraction": 0.0,
"completion_length": 2628.4618072509766,
"dapo/avg_reward_std": 0.27239492272629456,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3235294157091309,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 26.875,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.025142857142857144,
"grad_norm": 0.03156612813472748,
"kl": 4.024803638458252e-05,
"learning_rate": 9.672327345550543e-07,
"loss": 0.0396,
"reward": 0.4231120813637972,
"reward_std": 0.9312948659062386,
"step": 22
},
{
"clip_fraction": 0.0,
"completion_length": 2495.7673873901367,
"dapo/avg_reward_std": 0.30711027341229574,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3988095335662365,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 31.249999999999993,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.026285714285714287,
"grad_norm": 0.028224533423781395,
"kl": 3.413856029510498e-05,
"learning_rate": 9.610954559391704e-07,
"loss": 0.0195,
"reward": 0.5285261562094092,
"reward_std": 0.9373103529214859,
"step": 23
},
{
"clip_fraction": 0.0,
"completion_length": 1944.9201278686523,
"dapo/avg_reward_std": 0.29968351125717163,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4533333480358124,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 44.27083333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.027428571428571427,
"grad_norm": 0.03633056953549385,
"kl": 3.1538307666778564e-05,
"learning_rate": 9.54457320834625e-07,
"loss": 0.0693,
"reward": 0.5397752095013857,
"reward_std": 0.9495814517140388,
"step": 24
},
{
"clip_fraction": 0.0,
"completion_length": 2616.593780517578,
"dapo/avg_reward_std": 0.16712580593127124,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.19811321232678755,
"dapo/num_sampling_attempts": 6.625,
"dapo/sampling_efficiency": 19.166666666666664,
"dapo/total_prompts_processed": 39.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02857142857142857,
"grad_norm": 0.024344539269804955,
"kl": 3.676116466522217e-05,
"learning_rate": 9.473264167865171e-07,
"loss": 0.0139,
"reward": 0.3185653127729893,
"reward_std": 0.9151088818907738,
"step": 25
},
{
"clip_fraction": 0.0,
"completion_length": 2116.7257232666016,
"dapo/avg_reward_std": 0.27600910129218265,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33908046319566926,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 44.6875,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.029714285714285714,
"grad_norm": 0.031155193224549294,
"kl": 3.579258918762207e-05,
"learning_rate": 9.397114317029974e-07,
"loss": 0.0725,
"reward": 0.5197067707777023,
"reward_std": 0.8911866471171379,
"step": 26
},
{
"clip_fraction": 0.0,
"completion_length": 2148.781265258789,
"dapo/avg_reward_std": 0.24896243140101432,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31666667349636557,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 22.63888888888889,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.030857142857142857,
"grad_norm": 0.03762076795101166,
"kl": 3.104656934738159e-05,
"learning_rate": 9.316216432703916e-07,
"loss": -0.0333,
"reward": 0.5081147998571396,
"reward_std": 0.9414060413837433,
"step": 27
},
{
"clip_fraction": 0.0,
"completion_length": 2357.4062881469727,
"dapo/avg_reward_std": 0.22747237629750194,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2990196110571132,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 34.49404761904761,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.032,
"grad_norm": 0.02982812374830246,
"kl": 2.621859312057495e-05,
"learning_rate": 9.230669076497687e-07,
"loss": 0.0231,
"reward": 0.7687274925410748,
"reward_std": 0.9382865354418755,
"step": 28
},
{
"clip_fraction": 0.0,
"completion_length": 2772.941047668457,
"dapo/avg_reward_std": 0.2300749086972439,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28282828854792047,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 48.482142857142854,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03314285714285714,
"grad_norm": 0.030160676687955856,
"kl": 2.812594175338745e-05,
"learning_rate": 9.140576474687263e-07,
"loss": 0.0019,
"reward": 0.41888202354311943,
"reward_std": 0.9044449031352997,
"step": 29
},
{
"clip_fraction": 0.0,
"completion_length": 2038.208366394043,
"dapo/avg_reward_std": 0.1657373425437183,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.21544715943859843,
"dapo/num_sampling_attempts": 5.125,
"dapo/sampling_efficiency": 45.71969696969697,
"dapo/total_prompts_processed": 30.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03428571428571429,
"grad_norm": 0.040263354778289795,
"kl": 3.8951635360717773e-05,
"learning_rate": 9.046048391230247e-07,
"loss": 0.0158,
"reward": 0.6328074131160975,
"reward_std": 0.913766622543335,
"step": 30
},
{
"clip_fraction": 0.0,
"completion_length": 2610.149299621582,
"dapo/avg_reward_std": 0.24689391613006592,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39333333909511564,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 50.74404761904762,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03542857142857143,
"grad_norm": 0.03027450665831566,
"kl": 3.1307339668273926e-05,
"learning_rate": 8.9471999940354e-07,
"loss": 0.0264,
"reward": 0.6263847425580025,
"reward_std": 0.9919310808181763,
"step": 31
},
{
"clip_fraction": 0.0,
"completion_length": 2505.697952270508,
"dapo/avg_reward_std": 0.26817766793312564,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34946237216072695,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 33.68055555555555,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.036571428571428574,
"grad_norm": 0.02961750328540802,
"kl": 2.7127563953399658e-05,
"learning_rate": 8.844151714648274e-07,
"loss": 0.0166,
"reward": 0.6057538501918316,
"reward_std": 0.9584499895572662,
"step": 32
},
{
"clip_fraction": 0.0,
"completion_length": 2879.420181274414,
"dapo/avg_reward_std": 0.24957223816050422,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2824074120985137,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 35.51136363636363,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.037714285714285714,
"grad_norm": 0.028292173519730568,
"kl": 2.950429916381836e-05,
"learning_rate": 8.737029101523929e-07,
"loss": 0.032,
"reward": 0.4974850555881858,
"reward_std": 0.9284666180610657,
"step": 33
},
{
"clip_fraction": 0.0,
"completion_length": 2605.826400756836,
"dapo/avg_reward_std": 0.27582160755991936,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41666667101283866,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 42.70833333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.038857142857142854,
"grad_norm": 0.028110038489103317,
"kl": 3.172457218170166e-05,
"learning_rate": 8.625962667065487e-07,
"loss": 0.0358,
"reward": 0.5906332535669208,
"reward_std": 0.8970795348286629,
"step": 34
},
{
"clip_fraction": 0.0,
"completion_length": 2197.09033203125,
"dapo/avg_reward_std": 0.2899627904097239,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3722222303350767,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 33.035714285714285,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04,
"grad_norm": 0.03307325020432472,
"kl": 3.203749656677246e-05,
"learning_rate": 8.511087728614862e-07,
"loss": 0.024,
"reward": 0.6485824584960938,
"reward_std": 0.9721796959638596,
"step": 35
},
{
"clip_fraction": 0.0,
"completion_length": 2999.3507080078125,
"dapo/avg_reward_std": 0.20956570729613305,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26250000707805154,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 22.51488095238095,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04114285714285714,
"grad_norm": 0.028769005089998245,
"kl": 3.2588839530944824e-05,
"learning_rate": 8.392544243589427e-07,
"loss": 0.0619,
"reward": 0.48274967167526484,
"reward_std": 0.8917501345276833,
"step": 36
},
{
"clip_fraction": 0.0,
"completion_length": 2790.3020935058594,
"dapo/avg_reward_std": 0.30638546783190507,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42307692995438206,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 35.20833333333333,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04228571428571429,
"grad_norm": 0.026894288137555122,
"kl": 3.5509467124938965e-05,
"learning_rate": 8.270476638965461e-07,
"loss": 0.0283,
"reward": 0.5098943561315536,
"reward_std": 0.9712026715278625,
"step": 37
},
{
"clip_fraction": 0.0,
"completion_length": 2677.1493530273438,
"dapo/avg_reward_std": 0.18201035128699408,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2481481538878547,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 25.416666666666664,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04342857142857143,
"grad_norm": 0.027049226686358452,
"kl": 2.641230821609497e-05,
"learning_rate": 8.145033635316128e-07,
"loss": 0.0457,
"reward": 0.507211847230792,
"reward_std": 0.9677048400044441,
"step": 38
},
{
"clip_fraction": 0.0,
"completion_length": 3130.437530517578,
"dapo/avg_reward_std": 0.2055508976473528,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3137254956014016,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 26.160714285714278,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.044571428571428574,
"grad_norm": 0.027378324419260025,
"kl": 4.1447579860687256e-05,
"learning_rate": 8.01636806561836e-07,
"loss": 0.0522,
"reward": 0.5557294674217701,
"reward_std": 0.9394431114196777,
"step": 39
},
{
"clip_fraction": 0.0,
"completion_length": 2026.0486297607422,
"dapo/avg_reward_std": 0.20257248067193562,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666749450896,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 29.86111111111111,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.045714285714285714,
"grad_norm": 0.032405752688646317,
"kl": 1.9609928131103516e-05,
"learning_rate": 7.884636689049422e-07,
"loss": 0.0336,
"reward": 0.5694049745798111,
"reward_std": 0.9232507050037384,
"step": 40
},
{
"clip_fraction": 0.0,
"completion_length": 2640.326416015625,
"dapo/avg_reward_std": 0.21237638321789828,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34343435231483344,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 29.791666666666664,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.046857142857142854,
"grad_norm": 0.027951980009675026,
"kl": 2.6788562536239624e-05,
"learning_rate": 7.75e-07,
"loss": 0.0234,
"reward": 0.5206635389477015,
"reward_std": 0.9366661533713341,
"step": 41
},
{
"clip_fraction": 0.0,
"completion_length": 2681.18058013916,
"dapo/avg_reward_std": 0.24859387196343521,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3218390854268238,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 35.416666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.048,
"grad_norm": 0.03045503795146942,
"kl": 3.679096698760986e-05,
"learning_rate": 7.612622032536507e-07,
"loss": 0.0237,
"reward": 0.4700614605098963,
"reward_std": 0.9389084428548813,
"step": 42
},
{
"clip_fraction": 0.0,
"completion_length": 2398.7118072509766,
"dapo/avg_reward_std": 0.2748411413161985,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.322580651890847,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 29.999999999999996,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04914285714285714,
"grad_norm": 0.02945004403591156,
"kl": 2.7336180210113525e-05,
"learning_rate": 7.472670160550848e-07,
"loss": -0.0567,
"reward": 0.6530590765178204,
"reward_std": 0.929742157459259,
"step": 43
},
{
"clip_fraction": 0.0,
"completion_length": 1968.3437805175781,
"dapo/avg_reward_std": 0.20995861871374977,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2685185232096248,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 42.410714285714285,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05028571428571429,
"grad_norm": 0.0354490801692009,
"kl": 1.671910285949707e-05,
"learning_rate": 7.330314893841101e-07,
"loss": 0.0869,
"reward": 0.6298563629388809,
"reward_std": 0.9230287447571754,
"step": 44
},
{
"clip_fraction": 0.0,
"completion_length": 2218.2743225097656,
"dapo/avg_reward_std": 0.260509067773819,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36666667262713115,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 33.229166666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05142857142857143,
"grad_norm": 0.02954520471394062,
"kl": 2.514384686946869e-05,
"learning_rate": 7.185729670371604e-07,
"loss": 0.0031,
"reward": 0.6325996220111847,
"reward_std": 0.9546400979161263,
"step": 45
},
{
"clip_fraction": 0.0,
"completion_length": 2081.1458587646484,
"dapo/avg_reward_std": 0.2187695243666249,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2849462402443732,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 37.22222222222222,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.052571428571428575,
"grad_norm": 0.033979643136262894,
"kl": 2.872943878173828e-05,
"learning_rate": 7.039090644965509e-07,
"loss": -0.0104,
"reward": 0.5167231820523739,
"reward_std": 0.9025325626134872,
"step": 46
},
{
"clip_fraction": 0.0,
"completion_length": 2117.541702270508,
"dapo/avg_reward_std": 0.18839570879936218,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26811594580826553,
"dapo/num_sampling_attempts": 5.75,
"dapo/sampling_efficiency": 20.441919191919194,
"dapo/total_prompts_processed": 34.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.053714285714285714,
"grad_norm": 0.03177877888083458,
"kl": 3.078579902648926e-05,
"learning_rate": 6.890576474687263e-07,
"loss": 0.0077,
"reward": 0.3684711689129472,
"reward_std": 0.8811993673443794,
"step": 47
},
{
"clip_fraction": 0.0,
"completion_length": 2177.4444885253906,
"dapo/avg_reward_std": 0.19605370469995448,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2702702763112816,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 39.40972222222222,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.054857142857142854,
"grad_norm": 0.04067355766892433,
"kl": 2.4996697902679443e-05,
"learning_rate": 6.740368101176495e-07,
"loss": 0.0053,
"reward": 0.5635924749076366,
"reward_std": 0.9323460608720779,
"step": 48
},
{
"clip_fraction": 0.0,
"completion_length": 3022.513885498047,
"dapo/avg_reward_std": 0.22437315998655377,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30808081003752624,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 51.880411255411246,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.056,
"grad_norm": 0.028243908658623695,
"kl": 3.2588839530944824e-05,
"learning_rate": 6.588648530198504e-07,
"loss": 0.0463,
"reward": 0.5983518976718187,
"reward_std": 0.97667645663023,
"step": 49
},
{
"clip_fraction": 0.0,
"completion_length": 2369.423614501953,
"dapo/avg_reward_std": 0.25065614397709185,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36538461996958804,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 51.666666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05714285714285714,
"grad_norm": 0.03361990302801132,
"kl": 2.4838373064994812e-05,
"learning_rate": 6.435602608679916e-07,
"loss": -0.0041,
"reward": 0.6849855165928602,
"reward_std": 0.9522178247570992,
"step": 50
},
{
"clip_fraction": 0.0,
"completion_length": 2274.833396911621,
"dapo/avg_reward_std": 0.22345838612980312,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666745311684,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 27.132936507936506,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05828571428571429,
"grad_norm": 0.031927697360515594,
"kl": 1.7890706658363342e-05,
"learning_rate": 6.281416799501187e-07,
"loss": 0.0196,
"reward": 0.8541890066117048,
"reward_std": 0.9146186113357544,
"step": 51
},
{
"clip_fraction": 0.0,
"completion_length": 2918.0799102783203,
"dapo/avg_reward_std": 0.28684074508732765,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333386429425,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 41.36904761904762,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05942857142857143,
"grad_norm": 0.026396779343485832,
"kl": 2.3087020963430405e-05,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0343,
"reward": 0.44786757230758667,
"reward_std": 0.9706326127052307,
"step": 52
},
{
"clip_fraction": 0.0,
"completion_length": 2045.833339691162,
"dapo/avg_reward_std": 0.2355064716604021,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2870370431078805,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 31.354166666666664,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.060571428571428575,
"grad_norm": 0.04913632944226265,
"kl": 2.1755695343017578e-05,
"learning_rate": 5.97037808470444e-07,
"loss": 0.0387,
"reward": 0.6510349959135056,
"reward_std": 0.9507962614297867,
"step": 53
},
{
"clip_fraction": 0.0,
"completion_length": 1948.9444427490234,
"dapo/avg_reward_std": 0.243668794631958,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.366666671037674,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 56.5625,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.061714285714285715,
"grad_norm": 0.040572620928287506,
"kl": 2.1360814571380615e-05,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0417,
"reward": 0.5514028863981366,
"reward_std": 0.9589040726423264,
"step": 54
},
{
"clip_fraction": 0.0,
"completion_length": 2484.541648864746,
"dapo/avg_reward_std": 0.30484401606596434,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42307693224686843,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 42.18749999999999,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06285714285714286,
"grad_norm": 0.0297782514244318,
"kl": 2.2893771529197693e-05,
"learning_rate": 5.657047735161255e-07,
"loss": -0.0009,
"reward": 0.4546010522171855,
"reward_std": 0.9696914628148079,
"step": 55
},
{
"clip_fraction": 0.0,
"completion_length": 1533.7361297607422,
"dapo/avg_reward_std": 0.2159253837484302,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29797980415098596,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 34.722222222222214,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.064,
"grad_norm": 0.03312206640839577,
"kl": 7.178634405136108e-06,
"learning_rate": 5.5e-07,
"loss": 0.0108,
"reward": 0.7257717102766037,
"reward_std": 0.9033158496022224,
"step": 56
},
{
"clip_fraction": 0.0,
"completion_length": 2934.4409942626953,
"dapo/avg_reward_std": 0.2505974847337474,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36956522192644037,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 41.66666666666666,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06514285714285714,
"grad_norm": 0.02451159618794918,
"kl": 1.9356608390808105e-05,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0483,
"reward": 0.5572653282433748,
"reward_std": 0.9176028743386269,
"step": 57
},
{
"clip_fraction": 0.0,
"completion_length": 1933.5243377685547,
"dapo/avg_reward_std": 0.20699472725391388,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3235294174622087,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 43.50198412698413,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06628571428571428,
"grad_norm": 0.04205997660756111,
"kl": 2.446398138999939e-05,
"learning_rate": 5.186095868151436e-07,
"loss": 0.035,
"reward": 0.5425214860588312,
"reward_std": 0.9688811302185059,
"step": 58
},
{
"clip_fraction": 0.0,
"completion_length": 2404.819435119629,
"dapo/avg_reward_std": 0.21416518474236512,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2649572701790394,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 28.070436507936506,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06742857142857143,
"grad_norm": 0.032379262149333954,
"kl": 2.0030885934829712e-05,
"learning_rate": 5.02962191529556e-07,
"loss": -0.0022,
"reward": 0.5781768467277288,
"reward_std": 0.9525356665253639,
"step": 59
},
{
"clip_fraction": 0.0,
"completion_length": 2963.888931274414,
"dapo/avg_reward_std": 0.32426256509054274,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42857143637679873,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 58.035714285714285,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06857142857142857,
"grad_norm": 0.027211569249629974,
"kl": 1.7156358808279037e-05,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0068,
"reward": 0.44747511111199856,
"reward_std": 0.9607158154249191,
"step": 60
},
{
"clip_fraction": 0.0,
"completion_length": 2205.2465591430664,
"dapo/avg_reward_std": 0.203433408588171,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2500000063329935,
"dapo/num_sampling_attempts": 5.0,
"dapo/sampling_efficiency": 38.46153846153846,
"dapo/total_prompts_processed": 30.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06971428571428571,
"grad_norm": 0.035166963934898376,
"kl": 1.146271824836731e-05,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.0016,
"reward": 0.7233948148787022,
"reward_std": 0.9537224471569061,
"step": 61
},
{
"clip_fraction": 0.0,
"completion_length": 2170.302101135254,
"dapo/avg_reward_std": 0.3071755821054632,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46212121776559134,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.5,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07085714285714285,
"grad_norm": 0.032445963472127914,
"kl": 1.7118407413363457e-05,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0133,
"reward": 0.5614959334488958,
"reward_std": 0.9226407110691071,
"step": 62
},
{
"clip_fraction": 0.0,
"completion_length": 2304.038215637207,
"dapo/avg_reward_std": 0.3201758420025861,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3827160596847534,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 33.33333333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.072,
"grad_norm": 0.03544686362147331,
"kl": 1.1014439223799855e-05,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.0809,
"reward": 0.6520206034183502,
"reward_std": 0.9506091177463531,
"step": 63
},
{
"clip_fraction": 0.0,
"completion_length": 1901.3506965637207,
"dapo/avg_reward_std": 0.2710137654233862,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33950617964620944,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 38.541666666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07314285714285715,
"grad_norm": 0.044119708240032196,
"kl": 2.606213092803955e-05,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0059,
"reward": 0.6546321045607328,
"reward_std": 0.9510733336210251,
"step": 64
},
{
"clip_fraction": 0.0,
"completion_length": 2792.0382232666016,
"dapo/avg_reward_std": 0.2836403740303857,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36904762951391085,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 39.58333333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07428571428571429,
"grad_norm": 0.04388947784900665,
"kl": 1.2818491086363792e-05,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.0675,
"reward": 0.5376700833439827,
"reward_std": 0.9546815231442451,
"step": 65
},
{
"clip_fraction": 0.0,
"completion_length": 3018.1111450195312,
"dapo/avg_reward_std": 0.2566617141167323,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35000000993410746,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 29.583333333333325,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07542857142857143,
"grad_norm": 0.030510403215885162,
"kl": 2.337433397769928e-05,
"learning_rate": 3.9609093550344907e-07,
"loss": 0.067,
"reward": 0.45654861629009247,
"reward_std": 0.9348908290266991,
"step": 66
},
{
"clip_fraction": 0.0,
"completion_length": 2246.7361183166504,
"dapo/avg_reward_std": 0.17681238457963272,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2657657684506597,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 39.75198412698412,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07657142857142857,
"grad_norm": 0.039485227316617966,
"kl": 3.0115246772766113e-05,
"learning_rate": 3.8142703296283953e-07,
"loss": -0.0103,
"reward": 0.559457328170538,
"reward_std": 0.9844456240534782,
"step": 67
},
{
"clip_fraction": 0.0,
"completion_length": 1877.3090591430664,
"dapo/avg_reward_std": 0.21082516993795122,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2809523867709296,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 40.13888888888889,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07771428571428571,
"grad_norm": 0.04208315163850784,
"kl": 1.7916783690452576e-05,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.0055,
"reward": 0.71805115416646,
"reward_std": 0.9486410617828369,
"step": 68
},
{
"clip_fraction": 0.0,
"completion_length": 2743.187484741211,
"dapo/avg_reward_std": 0.3629622704842511,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5882353020064971,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 57.291666666666664,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07885714285714286,
"grad_norm": 0.046305615454912186,
"kl": 1.8481165170669556e-05,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.0753,
"reward": 0.5533816255629063,
"reward_std": 0.9835677221417427,
"step": 69
},
{
"clip_fraction": 0.0,
"completion_length": 1971.8750610351562,
"dapo/avg_reward_std": 0.290031298995018,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3958333432674408,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 50.11904761904761,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08,
"grad_norm": 0.03249451890587807,
"kl": 1.0361894965171814e-05,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0123,
"reward": 0.7815902195870876,
"reward_std": 0.9491127580404282,
"step": 70
},
{
"clip_fraction": 0.0,
"completion_length": 2149.5729370117188,
"dapo/avg_reward_std": 0.30720199798715525,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37931035356274967,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 31.666666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08114285714285714,
"grad_norm": 0.02995998226106167,
"kl": 2.8252601623535156e-05,
"learning_rate": 3.250000000000001e-07,
"loss": 0.0769,
"reward": 0.5328625496476889,
"reward_std": 0.9026356488466263,
"step": 71
},
{
"clip_fraction": 0.0,
"completion_length": 1963.1562538146973,
"dapo/avg_reward_std": 0.27671699684399825,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4551282163995963,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 46.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08228571428571428,
"grad_norm": 0.046918418258428574,
"kl": 3.359094262123108e-05,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0368,
"reward": 0.32596728252246976,
"reward_std": 0.917833186686039,
"step": 72
},
{
"clip_fraction": 0.0,
"completion_length": 2666.1666717529297,
"dapo/avg_reward_std": 0.2536189202219248,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34895834140479565,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 37.84722222222222,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08342857142857144,
"grad_norm": 0.0253219623118639,
"kl": 3.542192280292511e-05,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.0107,
"reward": 0.6293175183236599,
"reward_std": 0.935965321958065,
"step": 73
},
{
"clip_fraction": 0.0,
"completion_length": 2119.447982788086,
"dapo/avg_reward_std": 0.26048696994781495,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4200000029802322,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 47.291666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08457142857142858,
"grad_norm": 0.034480538219213486,
"kl": 1.7508864402770996e-05,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0483,
"reward": 0.7494360618293285,
"reward_std": 0.9492424502968788,
"step": 74
},
{
"clip_fraction": 0.0,
"completion_length": 2078.9375,
"dapo/avg_reward_std": 0.2828026126932215,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3580246976128331,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 35.11904761904762,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08571428571428572,
"grad_norm": 0.03545458987355232,
"kl": 1.3923272490501404e-05,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0531,
"reward": 0.5464182365685701,
"reward_std": 0.9530047550797462,
"step": 75
},
{
"clip_fraction": 0.0,
"completion_length": 2342.5416564941406,
"dapo/avg_reward_std": 0.21854268149896103,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3080808154561303,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 32.341269841269835,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08685714285714285,
"grad_norm": 0.02881987765431404,
"kl": 1.169554889202118e-05,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0077,
"reward": 0.5642017107456923,
"reward_std": 0.9335212334990501,
"step": 76
},
{
"clip_fraction": 0.0,
"completion_length": 3205.104217529297,
"dapo/avg_reward_std": 0.2153491945493789,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2777777835726738,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 23.45238095238095,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.088,
"grad_norm": 0.024909108877182007,
"kl": 2.2567808628082275e-05,
"learning_rate": 2.488912271385139e-07,
"loss": 0.0436,
"reward": 0.4511043671518564,
"reward_std": 0.9582105726003647,
"step": 77
},
{
"clip_fraction": 0.0,
"completion_length": 1984.7881927490234,
"dapo/avg_reward_std": 0.2325562967194451,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3703703780968984,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 46.354166666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08914285714285715,
"grad_norm": 0.04120900481939316,
"kl": 2.2590160369873047e-05,
"learning_rate": 2.374037332934512e-07,
"loss": 0.0514,
"reward": 0.46765367314219475,
"reward_std": 0.9171552434563637,
"step": 78
},
{
"clip_fraction": 0.0,
"completion_length": 2322.930576324463,
"dapo/avg_reward_std": 0.24565138667821884,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35416666977107525,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 49.375,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09028571428571429,
"grad_norm": 0.03351881355047226,
"kl": 1.6979873180389404e-05,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0813,
"reward": 0.4460947550833225,
"reward_std": 0.9485716819763184,
"step": 79
},
{
"clip_fraction": 0.0,
"completion_length": 2418.187545776367,
"dapo/avg_reward_std": 0.23119631229024945,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2929292975953131,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 37.013888888888886,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09142857142857143,
"grad_norm": 0.03444164991378784,
"kl": 1.9297003746032715e-05,
"learning_rate": 2.1558482853517253e-07,
"loss": -0.0123,
"reward": 0.47735430393368006,
"reward_std": 0.9275016784667969,
"step": 80
},
{
"clip_fraction": 0.0,
"completion_length": 2673.1666870117188,
"dapo/avg_reward_std": 0.29530651973826544,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39285714977553915,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 40.52083333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09257142857142857,
"grad_norm": 0.02858138270676136,
"kl": 1.998385414481163e-05,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.034,
"reward": 0.41152474470436573,
"reward_std": 0.9514285027980804,
"step": 81
},
{
"clip_fraction": 0.0,
"completion_length": 2257.954864501953,
"dapo/avg_reward_std": 0.23162428935368856,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3277777835726738,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 39.72222222222222,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09371428571428571,
"grad_norm": 0.034180980175733566,
"kl": 1.03069469332695e-05,
"learning_rate": 7.681643291108517e-07,
"loss": 0.0478,
"reward": 0.6525773257017136,
"reward_std": 0.9826234132051468,
"step": 82
},
{
"clip_fraction": 0.0,
"completion_length": 2630.8507080078125,
"dapo/avg_reward_std": 0.25974711243595394,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3511904797383717,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 49.166666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09485714285714286,
"grad_norm": 0.03644736111164093,
"kl": 1.800060272216797e-05,
"learning_rate": 7.612622032536507e-07,
"loss": 0.0921,
"reward": 0.4112757742404938,
"reward_std": 0.9365755990147591,
"step": 83
},
{
"clip_fraction": 0.0,
"completion_length": 2569.4896087646484,
"dapo/avg_reward_std": 0.20397330891518367,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22619048080274037,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 33.541666666666664,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.096,
"grad_norm": 0.027630111202597618,
"kl": 9.745359420776367e-06,
"learning_rate": 7.54295724882796e-07,
"loss": 0.0357,
"reward": 0.41497555933892727,
"reward_std": 0.9506618455052376,
"step": 84
},
{
"clip_fraction": 0.0,
"completion_length": 2213.0660400390625,
"dapo/avg_reward_std": 0.2754218357224618,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334038334506,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 36.354166666666664,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09714285714285714,
"grad_norm": 0.035216327756643295,
"kl": 1.6536563634872437e-05,
"learning_rate": 7.472670160550848e-07,
"loss": 0.0527,
"reward": 0.632079154253006,
"reward_std": 0.9386599361896515,
"step": 85
},
{
"clip_fraction": 0.0,
"completion_length": 2339.1215209960938,
"dapo/avg_reward_std": 0.24339192857344946,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.291666673289405,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 35.3125,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09828571428571428,
"grad_norm": 0.03125083073973656,
"kl": 1.6085803508758545e-05,
"learning_rate": 7.401782177833147e-07,
"loss": -0.0221,
"reward": 0.4631906310096383,
"reward_std": 0.9198382347822189,
"step": 86
},
{
"clip_fraction": 0.0,
"completion_length": 1837.8993301391602,
"dapo/avg_reward_std": 0.22774873872598012,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3777777845660845,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 46.87499999999999,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09942857142857142,
"grad_norm": 0.04138842225074768,
"kl": 1.7467886209487915e-05,
"learning_rate": 7.330314893841101e-07,
"loss": 0.0024,
"reward": 0.7271542213857174,
"reward_std": 0.905590832233429,
"step": 87
},
{
"clip_fraction": 0.0,
"completion_length": 2786.0416564941406,
"dapo/avg_reward_std": 0.2095056755202157,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2952381019081388,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 35.65972222222222,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10057142857142858,
"grad_norm": 0.025848887860774994,
"kl": 7.427297532558441e-06,
"learning_rate": 7.258290078201731e-07,
"loss": 0.002,
"reward": 0.43730420619249344,
"reward_std": 0.9195110127329826,
"step": 88
},
{
"clip_fraction": 0.0,
"completion_length": 2346.68754196167,
"dapo/avg_reward_std": 0.19395678072440914,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2560975657003682,
"dapo/num_sampling_attempts": 5.125,
"dapo/sampling_efficiency": 35.01488095238095,
"dapo/total_prompts_processed": 30.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10171428571428572,
"grad_norm": 0.040970027446746826,
"kl": 1.3796612620353699e-05,
"learning_rate": 7.185729670371604e-07,
"loss": 0.0476,
"reward": 0.6351554682478309,
"reward_std": 0.8568265736103058,
"step": 89
},
{
"clip_fraction": 0.0,
"completion_length": 2486.21875,
"dapo/avg_reward_std": 0.2474305311153675,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3735632284961898,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 37.61904761904762,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10285714285714286,
"grad_norm": 0.030587567016482353,
"kl": 1.4983117580413818e-05,
"learning_rate": 7.11265577295385e-07,
"loss": 0.0254,
"reward": 0.6515812119469047,
"reward_std": 0.9235646799206734,
"step": 90
},
{
"clip_fraction": 0.0,
"completion_length": 2515.017402648926,
"dapo/avg_reward_std": 0.25874078144197876,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3913043562484824,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 51.56249999999999,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.104,
"grad_norm": 0.031289275735616684,
"kl": 6.1551108956336975e-06,
"learning_rate": 7.039090644965509e-07,
"loss": 0.0328,
"reward": 0.6403396036475897,
"reward_std": 0.9428967460989952,
"step": 91
},
{
"clip_fraction": 0.0,
"completion_length": 2979.027801513672,
"dapo/avg_reward_std": 0.2504267347486396,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2543859713171658,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 35.63041125541125,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10514285714285715,
"grad_norm": 0.029049718752503395,
"kl": -1.2740492820739746e-06,
"learning_rate": 6.965056695057204e-07,
"loss": 0.0314,
"reward": 0.535519327968359,
"reward_std": 0.8926167041063309,
"step": 92
},
{
"clip_fraction": 0.0,
"completion_length": 2552.562515258789,
"dapo/avg_reward_std": 0.2413217886801689,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334038334506,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 34.791666666666664,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10628571428571429,
"grad_norm": 0.03139115869998932,
"kl": 1.3202428817749023e-05,
"learning_rate": 6.890576474687263e-07,
"loss": 0.067,
"reward": 0.6561751328408718,
"reward_std": 0.9787176623940468,
"step": 93
},
{
"clip_fraction": 0.0,
"completion_length": 2403.184051513672,
"dapo/avg_reward_std": 0.29813223962600416,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40384616129673445,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 40.416666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10742857142857143,
"grad_norm": 0.032709378749132156,
"kl": 2.093333750963211e-05,
"learning_rate": 6.815672671252315e-07,
"loss": 0.0328,
"reward": 0.556912356056273,
"reward_std": 0.9464646279811859,
"step": 94
},
{
"clip_fraction": 0.0,
"completion_length": 2963.795181274414,
"dapo/avg_reward_std": 0.2564438986472594,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26068376577817476,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 24.07738095238095,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10857142857142857,
"grad_norm": 0.023549171164631844,
"kl": 9.554903954267502e-06,
"learning_rate": 6.740368101176495e-07,
"loss": 0.0142,
"reward": 0.3492610058747232,
"reward_std": 0.8781530037522316,
"step": 95
},
{
"clip_fraction": 0.0,
"completion_length": 2655.21875,
"dapo/avg_reward_std": 0.31138683449138294,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46969697827642615,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 43.74999999999999,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10971428571428571,
"grad_norm": 0.03213554993271828,
"kl": 1.945020630955696e-05,
"learning_rate": 6.664685702961344e-07,
"loss": 0.0357,
"reward": 0.4872458651661873,
"reward_std": 0.9538498669862747,
"step": 96
},
{
"clip_fraction": 0.0,
"completion_length": 2325.888900756836,
"dapo/avg_reward_std": 0.18781672976911068,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2968750069849193,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.263888888888886,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11085714285714286,
"grad_norm": 0.03308973088860512,
"kl": 1.2524658814072609e-05,
"learning_rate": 6.588648530198504e-07,
"loss": 0.0332,
"reward": 0.5582090672105551,
"reward_std": 0.9704806208610535,
"step": 97
},
{
"clip_fraction": 0.0,
"completion_length": 2980.78125,
"dapo/avg_reward_std": 0.22120360245830134,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29824561900214147,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 34.717261904761905,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.112,
"grad_norm": 0.02593560516834259,
"kl": 9.87970270216465e-06,
"learning_rate": 6.512279744547392e-07,
"loss": 0.0537,
"reward": 0.5110117536969483,
"reward_std": 0.9140844419598579,
"step": 98
},
{
"clip_fraction": 0.0,
"completion_length": 2679.701400756836,
"dapo/avg_reward_std": 0.22513854503631592,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.388888892200258,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.104166666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11314285714285714,
"grad_norm": 0.028198201209306717,
"kl": -2.773245796561241e-06,
"learning_rate": 6.435602608679916e-07,
"loss": 0.0223,
"reward": 0.5703150723129511,
"reward_std": 0.9169064536690712,
"step": 99
},
{
"clip_fraction": 0.0,
"completion_length": 2113.7396087646484,
"dapo/avg_reward_std": 0.2158526074555185,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2916666724615627,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 30.823863636363633,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11428571428571428,
"grad_norm": 0.032321903854608536,
"kl": 2.765655517578125e-05,
"learning_rate": 6.358640479194451e-07,
"loss": 0.037,
"reward": 0.552736995741725,
"reward_std": 0.929665133357048,
"step": 100
},
{
"clip_fraction": 0.0,
"completion_length": 2397.545135498047,
"dapo/avg_reward_std": 0.2640196681022644,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41304348603538843,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 43.75,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11542857142857142,
"grad_norm": 0.030507881194353104,
"kl": 1.4653429388999939e-05,
"learning_rate": 6.281416799501187e-07,
"loss": 0.0216,
"reward": 0.7607237044721842,
"reward_std": 0.9413916915655136,
"step": 101
},
{
"clip_fraction": 0.0,
"completion_length": 2775.312515258789,
"dapo/avg_reward_std": 0.26319959415839267,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3910256509597485,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11657142857142858,
"grad_norm": 0.028825754299759865,
"kl": 1.7821788787841797e-05,
"learning_rate": 6.203955092681039e-07,
"loss": -0.0059,
"reward": 0.4367541056126356,
"reward_std": 0.9408165961503983,
"step": 102
},
{
"clip_fraction": 0.0,
"completion_length": 2606.3194580078125,
"dapo/avg_reward_std": 0.22601407093386497,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.295698931620967,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 30.624999999999993,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11771428571428572,
"grad_norm": 0.029979709535837173,
"kl": 2.3851171135902405e-06,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0463,
"reward": 0.6886496935039759,
"reward_std": 0.9053627252578735,
"step": 103
},
{
"clip_fraction": 0.0,
"completion_length": 2084.829849243164,
"dapo/avg_reward_std": 0.22010741523794225,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2702702747003452,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 32.51488095238095,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11885714285714286,
"grad_norm": 0.04769710823893547,
"kl": 2.0613893866539e-05,
"learning_rate": 6.048412045323164e-07,
"loss": 0.1162,
"reward": 0.684872523881495,
"reward_std": 0.9595381543040276,
"step": 104
},
{
"clip_fraction": 0.0,
"completion_length": 1955.1354484558105,
"dapo/avg_reward_std": 0.2937169720729192,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42361111504336196,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 49.166666666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12,
"grad_norm": 0.04352044314146042,
"kl": 2.0936131477355957e-05,
"learning_rate": 5.97037808470444e-07,
"loss": -0.0017,
"reward": 0.6524754576385021,
"reward_std": 0.9669848829507828,
"step": 105
},
{
"clip_fraction": 0.0,
"completion_length": 2316.0486221313477,
"dapo/avg_reward_std": 0.2529407059773803,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3020833423361182,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 30.729166666666664,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12114285714285715,
"grad_norm": 0.03129468858242035,
"kl": 1.8656253814697266e-05,
"learning_rate": 5.892200842364462e-07,
"loss": -0.0284,
"reward": 0.6108895651996136,
"reward_std": 0.9319325312972069,
"step": 106
},
{
"clip_fraction": 0.0,
"completion_length": 2094.6909942626953,
"dapo/avg_reward_std": 0.2037892586655087,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2629629688130485,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 21.066919191919194,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12228571428571429,
"grad_norm": 0.038948290050029755,
"kl": 2.824072726070881e-05,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0748,
"reward": 0.48047966323792934,
"reward_std": 0.9251860752701759,
"step": 107
},
{
"clip_fraction": 0.0,
"completion_length": 2482.6146240234375,
"dapo/avg_reward_std": 0.19606016278266908,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22592593100335862,
"dapo/num_sampling_attempts": 5.625,
"dapo/sampling_efficiency": 21.577380952380953,
"dapo/total_prompts_processed": 33.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12342857142857143,
"grad_norm": 0.027610260993242264,
"kl": 1.3685785233974457e-05,
"learning_rate": 5.735511803093248e-07,
"loss": 0.0016,
"reward": 0.46788009256124496,
"reward_std": 0.9522990807890892,
"step": 108
},
{
"clip_fraction": 0.0,
"completion_length": 3010.541717529297,
"dapo/avg_reward_std": 0.23601235449314117,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38461538977347887,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 61.5530303030303,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12457142857142857,
"grad_norm": 0.031469572335481644,
"kl": 2.0675361156463623e-05,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0491,
"reward": 0.6003496535122395,
"reward_std": 0.9582010880112648,
"step": 109
},
{
"clip_fraction": 0.0,
"completion_length": 2550.388931274414,
"dapo/avg_reward_std": 0.24275302588939668,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3222222273548444,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12571428571428572,
"grad_norm": 0.03043791465461254,
"kl": 1.619383692741394e-05,
"learning_rate": 5.578535828967777e-07,
"loss": 0.0395,
"reward": 0.6210233392193913,
"reward_std": 0.9545274153351784,
"step": 110
},
{
"clip_fraction": 0.0,
"completion_length": 2248.6771240234375,
"dapo/avg_reward_std": 0.2556017003953457,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32291667349636555,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 40.451388888888886,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12685714285714286,
"grad_norm": 0.029558613896369934,
"kl": 1.7130747437477112e-05,
"learning_rate": 5.5e-07,
"loss": 0.0156,
"reward": 0.8898655958473682,
"reward_std": 0.8961458280682564,
"step": 111
},
{
"clip_fraction": 0.0,
"completion_length": 2790.4132537841797,
"dapo/avg_reward_std": 0.2798377914088113,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35714286299688475,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 32.291666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.128,
"grad_norm": 0.02665926143527031,
"kl": 2.7702553779818118e-05,
"learning_rate": 5.421464171032224e-07,
"loss": 0.0375,
"reward": 0.4765107296407223,
"reward_std": 0.9586756750941277,
"step": 112
},
{
"clip_fraction": 0.0,
"completion_length": 2058.163261413574,
"dapo/avg_reward_std": 0.21719616024117722,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2850877270102501,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 36.13636363636364,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12914285714285714,
"grad_norm": 0.03724399581551552,
"kl": 9.129568934440613e-05,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0308,
"reward": 0.5965504869818687,
"reward_std": 0.9517285376787186,
"step": 113
},
{
"clip_fraction": 0.0,
"completion_length": 1804.7569427490234,
"dapo/avg_reward_std": 0.22654692203767837,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30645161819073463,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 46.800595238095234,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13028571428571428,
"grad_norm": 0.0444670133292675,
"kl": 3.589317202568054e-05,
"learning_rate": 5.264488196906752e-07,
"loss": 0.0217,
"reward": 0.4887783471494913,
"reward_std": 0.9572358801960945,
"step": 114
},
{
"clip_fraction": 0.0,
"completion_length": 2705.472236633301,
"dapo/avg_reward_std": 0.24942583271435328,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4285714335384823,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13142857142857142,
"grad_norm": 0.027661452069878578,
"kl": 1.307763159275055e-05,
"learning_rate": 5.186095868151436e-07,
"loss": -0.022,
"reward": 0.5754544343799353,
"reward_std": 0.9811793565750122,
"step": 115
},
{
"clip_fraction": 0.0,
"completion_length": 1660.2222213745117,
"dapo/avg_reward_std": 0.20845345951415398,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30630631100487066,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 32.013888888888886,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13257142857142856,
"grad_norm": 0.03922427445650101,
"kl": 7.28946179151535e-06,
"learning_rate": 5.107799157635538e-07,
"loss": 0.0279,
"reward": 0.8034113459289074,
"reward_std": 0.9163173362612724,
"step": 116
},
{
"clip_fraction": 0.0,
"completion_length": 2143.3368377685547,
"dapo/avg_reward_std": 0.25861393963849105,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3456790193363472,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1337142857142857,
"grad_norm": 0.0386907123029232,
"kl": 2.8124195523560047e-05,
"learning_rate": 5.02962191529556e-07,
"loss": 0.0157,
"reward": 0.5698221866041422,
"reward_std": 0.9738077968358994,
"step": 117
},
{
"clip_fraction": 0.0,
"completion_length": 2709.371551513672,
"dapo/avg_reward_std": 0.17381487890731456,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26356589343658715,
"dapo/num_sampling_attempts": 5.375,
"dapo/sampling_efficiency": 31.522817460317455,
"dapo/total_prompts_processed": 32.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13485714285714287,
"grad_norm": 0.03524978086352348,
"kl": 2.0368024706840515e-05,
"learning_rate": 4.951587954676837e-07,
"loss": 0.073,
"reward": 0.5433152373880148,
"reward_std": 0.9576972275972366,
"step": 118
},
{
"clip_fraction": 0.0,
"completion_length": 2729.6458129882812,
"dapo/avg_reward_std": 0.2853468172252178,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31770834140479565,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 38.13988095238095,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.136,
"grad_norm": 0.035877469927072525,
"kl": 9.79006290435791e-06,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0223,
"reward": 0.4996686838567257,
"reward_std": 0.9503490626811981,
"step": 119
},
{
"clip_fraction": 0.0,
"completion_length": 2456.458351135254,
"dapo/avg_reward_std": 0.3290893492244539,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000127724239,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 40.62499999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13714285714285715,
"grad_norm": 0.03583266958594322,
"kl": 9.331852197647095e-06,
"learning_rate": 4.79604490731896e-07,
"loss": 0.0363,
"reward": 0.8003920987248421,
"reward_std": 0.955727644264698,
"step": 120
},
{
"clip_fraction": 0.0,
"completion_length": 2489.1875,
"dapo/avg_reward_std": 0.1615937834694272,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.22222222600664412,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 37.41987179487179,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1382857142857143,
"grad_norm": 0.027044769376516342,
"kl": 2.0619481801986694e-05,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.0123,
"reward": 0.5692465994507074,
"reward_std": 0.9356264397501945,
"step": 121
},
{
"clip_fraction": 0.0,
"completion_length": 2946.687530517578,
"dapo/avg_reward_std": 0.26767816713878084,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3452381023338863,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 33.75,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13942857142857143,
"grad_norm": 0.03187067061662674,
"kl": 2.1383166313171387e-05,
"learning_rate": 4.641359520805548e-07,
"loss": 0.0722,
"reward": 0.42231168132275343,
"reward_std": 0.9001481607556343,
"step": 122
},
{
"clip_fraction": 0.0,
"completion_length": 1841.1458206176758,
"dapo/avg_reward_std": 0.32384763956069945,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4000000065565109,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14057142857142857,
"grad_norm": 0.03784916177392006,
"kl": 4.2632222175598145e-05,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0367,
"reward": 0.6476083844900131,
"reward_std": 0.908843033015728,
"step": 123
},
{
"clip_fraction": 0.0,
"completion_length": 2392.166702270508,
"dapo/avg_reward_std": 0.26674444922085466,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3218390869683233,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 31.666666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1417142857142857,
"grad_norm": 0.02941369265317917,
"kl": 2.299714833498001e-05,
"learning_rate": 4.4877202554526084e-07,
"loss": 0.0152,
"reward": 0.5824479665607214,
"reward_std": 0.9478363320231438,
"step": 124
},
{
"clip_fraction": 0.0,
"completion_length": 3125.159713745117,
"dapo/avg_reward_std": 0.29309388995170593,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000049670538,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14285714285714285,
"grad_norm": 0.030095171183347702,
"kl": 3.2413750886917114e-05,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.0534,
"reward": 0.5003506469074637,
"reward_std": 0.8919698372483253,
"step": 125
},
{
"clip_fraction": 0.0,
"completion_length": 2462.8368377685547,
"dapo/avg_reward_std": 0.2680182981491089,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3466666728258133,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 46.87499999999999,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.144,
"grad_norm": 0.04286734014749527,
"kl": 5.683675408363342e-05,
"learning_rate": 4.3353142970386557e-07,
"loss": 0.0028,
"reward": 0.5951744802296162,
"reward_std": 0.9584252312779427,
"step": 126
},
{
"clip_fraction": 0.0,
"completion_length": 2443.4618225097656,
"dapo/avg_reward_std": 0.19895405417833573,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2820512862541737,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 33.90376984126984,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14514285714285713,
"grad_norm": 0.03486345708370209,
"kl": 2.958625555038452e-05,
"learning_rate": 4.2596318988235037e-07,
"loss": -0.0055,
"reward": 0.7111770529299974,
"reward_std": 0.9570346251130104,
"step": 127
},
{
"clip_fraction": 0.0,
"completion_length": 2227.385452270508,
"dapo/avg_reward_std": 0.22934340153421676,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333333688122885,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 52.291666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1462857142857143,
"grad_norm": 0.04721139743924141,
"kl": 3.547314554452896e-05,
"learning_rate": 4.1843273287476854e-07,
"loss": 0.1085,
"reward": 0.4447980001568794,
"reward_std": 0.951726958155632,
"step": 128
},
{
"clip_fraction": 0.0,
"completion_length": 2883.357681274414,
"dapo/avg_reward_std": 0.4109063148498535,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.6777777880430221,
"dapo/num_sampling_attempts": 1.875,
"dapo/sampling_efficiency": 65.625,
"dapo/total_prompts_processed": 11.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14742857142857144,
"grad_norm": 0.02544778771698475,
"kl": 9.082257747650146e-06,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.046,
"reward": 0.6885830331593752,
"reward_std": 0.9739237055182457,
"step": 129
},
{
"clip_fraction": 0.0,
"completion_length": 2122.795181274414,
"dapo/avg_reward_std": 0.2591241377371329,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3641975356472863,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 39.70238095238095,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14857142857142858,
"grad_norm": 0.03150525689125061,
"kl": 3.223586827516556e-05,
"learning_rate": 4.034943304942796e-07,
"loss": 0.0306,
"reward": 0.5525269485078752,
"reward_std": 0.9417792037129402,
"step": 130
},
{
"clip_fraction": 0.0,
"completion_length": 2306.8611450195312,
"dapo/avg_reward_std": 0.3414611066209859,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3908046078065346,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 32.410714285714285,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14971428571428572,
"grad_norm": 0.036385975778102875,
"kl": 4.038959741592407e-05,
"learning_rate": 3.9609093550344907e-07,
"loss": 0.0679,
"reward": 0.5595943983644247,
"reward_std": 0.9294908344745636,
"step": 131
},
{
"clip_fraction": 0.0,
"completion_length": 2100.4444694519043,
"dapo/avg_reward_std": 0.22894747753938038,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34444445222616193,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 38.541666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15085714285714286,
"grad_norm": 0.05820675194263458,
"kl": 7.29486346244812e-05,
"learning_rate": 3.8873442270461485e-07,
"loss": 0.0548,
"reward": 0.5259249797090888,
"reward_std": 0.9095494002103806,
"step": 132
},
{
"clip_fraction": 0.0,
"completion_length": 2399.0555725097656,
"dapo/avg_reward_std": 0.2968884447346563,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4057971057684525,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 48.33333333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.152,
"grad_norm": 0.03143748641014099,
"kl": 1.6003847122192383e-05,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.0154,
"reward": 0.6293735019862652,
"reward_std": 0.9267243668437004,
"step": 133
},
{
"clip_fraction": 0.0,
"completion_length": 2028.9653091430664,
"dapo/avg_reward_std": 0.24916886538267136,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4097222276031971,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15314285714285714,
"grad_norm": 0.03667714074254036,
"kl": 2.6845373213291168e-05,
"learning_rate": 3.7417099217982686e-07,
"loss": 0.0108,
"reward": 0.6901863785460591,
"reward_std": 0.9471788480877876,
"step": 134
},
{
"clip_fraction": 0.0,
"completion_length": 2116.6493225097656,
"dapo/avg_reward_std": 0.3074521411742483,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37500000638621195,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 33.035714285714285,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15428571428571428,
"grad_norm": 0.04016295075416565,
"kl": 4.020519554615021e-05,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.081,
"reward": 0.6064621905097738,
"reward_std": 0.9165264815092087,
"step": 135
},
{
"clip_fraction": 0.0,
"completion_length": 2051.2812728881836,
"dapo/avg_reward_std": 0.20643932349754102,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2979798059571873,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 49.26136363636363,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15542857142857142,
"grad_norm": 0.03907117620110512,
"kl": 4.081428050994873e-05,
"learning_rate": 3.5982178221668533e-07,
"loss": 0.0631,
"reward": 0.6007686145603657,
"reward_std": 0.946811780333519,
"step": 136
},
{
"clip_fraction": 0.0,
"completion_length": 2981.6145935058594,
"dapo/avg_reward_std": 0.17673770231860025,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26190476829097387,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 33.19444444444444,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15657142857142858,
"grad_norm": 0.026764124631881714,
"kl": 2.1813437342643738e-05,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.0296,
"reward": 0.5422612819820642,
"reward_std": 0.9660339280962944,
"step": 137
},
{
"clip_fraction": 0.0,
"completion_length": 1996.4930725097656,
"dapo/avg_reward_std": 0.2211539367834727,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35000000447034835,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15771428571428572,
"grad_norm": 0.036459192633628845,
"kl": 6.0535967350006104e-05,
"learning_rate": 3.45704275117204e-07,
"loss": 0.0473,
"reward": 0.6352426074445248,
"reward_std": 1.0075769945979118,
"step": 138
},
{
"clip_fraction": 0.0,
"completion_length": 2673.013931274414,
"dapo/avg_reward_std": 0.21187836019431844,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28431372738936367,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 40.347222222222214,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15885714285714286,
"grad_norm": 0.027443382889032364,
"kl": 4.770606756210327e-05,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0398,
"reward": 0.53852697648108,
"reward_std": 0.9717471078038216,
"step": 139
},
{
"clip_fraction": 0.0,
"completion_length": 2352.944465637207,
"dapo/avg_reward_std": 0.28073156496574136,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33908046936166697,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 31.666666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16,
"grad_norm": 0.03219648823142052,
"kl": 1.9827857613563538e-05,
"learning_rate": 3.3183567088914833e-07,
"loss": 0.0502,
"reward": 0.5767329391092062,
"reward_std": 0.920682892203331,
"step": 140
},
{
"clip_fraction": 0.0,
"completion_length": 2714.9097595214844,
"dapo/avg_reward_std": 0.17997434735298157,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26495726979695833,
"dapo/num_sampling_attempts": 4.875,
"dapo/sampling_efficiency": 24.82142857142857,
"dapo/total_prompts_processed": 29.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16114285714285714,
"grad_norm": 0.03654953092336655,
"kl": 2.0893290638923645e-05,
"learning_rate": 3.250000000000001e-07,
"loss": 0.0808,
"reward": 0.7222395315766335,
"reward_std": 0.9689760208129883,
"step": 141
},
{
"clip_fraction": 0.0,
"completion_length": 1895.9965209960938,
"dapo/avg_reward_std": 0.24079040033476692,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30476190788405283,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 36.67207792207792,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16228571428571428,
"grad_norm": 0.05263448879122734,
"kl": 8.018314838409424e-05,
"learning_rate": 3.182328662904756e-07,
"loss": 0.0952,
"reward": 0.5266689900308847,
"reward_std": 0.9142153859138489,
"step": 142
},
{
"clip_fraction": 0.0,
"completion_length": 2619.2291717529297,
"dapo/avg_reward_std": 0.2643248688790106,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34408602887584316,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 32.410714285714285,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16342857142857142,
"grad_norm": 0.029158689081668854,
"kl": 3.154575824737549e-05,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0032,
"reward": 0.5475870370864868,
"reward_std": 0.8940814658999443,
"step": 143
},
{
"clip_fraction": 0.0,
"completion_length": 2439.340316772461,
"dapo/avg_reward_std": 0.25194550690979794,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33908046576483497,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 48.86904761904761,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16457142857142856,
"grad_norm": 0.027842765673995018,
"kl": 4.0609389543533325e-05,
"learning_rate": 3.0491243424323783e-07,
"loss": 0.0,
"reward": 0.6661859937012196,
"reward_std": 0.9778606072068214,
"step": 144
},
{
"clip_fraction": 0.0,
"completion_length": 2299.4166870117188,
"dapo/avg_reward_std": 0.19899881369358785,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2567567603813635,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 27.96626984126984,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1657142857142857,
"grad_norm": 0.041895266622304916,
"kl": 6.861239671707153e-05,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.1109,
"reward": 0.6072739865630865,
"reward_std": 0.9706787243485451,
"step": 145
},
{
"clip_fraction": 0.0,
"completion_length": 2448.3993225097656,
"dapo/avg_reward_std": 0.26682727987116034,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4015151573853059,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 61.25,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16685714285714287,
"grad_norm": 0.033113960176706314,
"kl": 6.478279829025269e-05,
"learning_rate": 2.918906036420294e-07,
"loss": -0.0725,
"reward": 0.7111451979726553,
"reward_std": 0.9747665524482727,
"step": 146
},
{
"clip_fraction": 0.0,
"completion_length": 2499.4132080078125,
"dapo/avg_reward_std": 0.23725970940930502,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36904762791735785,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 40.972222222222214,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.168,
"grad_norm": 0.03699960932135582,
"kl": 5.050189793109894e-05,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0512,
"reward": 0.5902281412854791,
"reward_std": 0.9745439067482948,
"step": 147
},
{
"clip_fraction": 0.0,
"completion_length": 2606.902816772461,
"dapo/avg_reward_std": 0.3174622275612571,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46212122250686993,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 50.416666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16914285714285715,
"grad_norm": 0.032203614711761475,
"kl": 3.288034349679947e-05,
"learning_rate": 2.791832395815782e-07,
"loss": 0.0183,
"reward": 0.4769565463066101,
"reward_std": 0.9322275221347809,
"step": 148
},
{
"clip_fraction": 0.0,
"completion_length": 2815.8160247802734,
"dapo/avg_reward_std": 0.2469456638350631,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2979798046025363,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 35.11904761904762,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1702857142857143,
"grad_norm": 0.030444171279668808,
"kl": 3.5978853702545166e-05,
"learning_rate": 2.729523361034538e-07,
"loss": 0.056,
"reward": 0.6807443965226412,
"reward_std": 0.9815046414732933,
"step": 149
},
{
"clip_fraction": 0.0,
"completion_length": 2225.520866394043,
"dapo/avg_reward_std": 0.19231303450134066,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2777777844005161,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 32.18749999999999,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17142857142857143,
"grad_norm": 0.03868250176310539,
"kl": 4.6514905989170074e-05,
"learning_rate": 2.6680582402757324e-07,
"loss": -0.037,
"reward": 0.6887061549350619,
"reward_std": 0.9610730484127998,
"step": 150
},
{
"clip_fraction": 0.0,
"completion_length": 3103.3784790039062,
"dapo/avg_reward_std": 0.20304633464132035,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31547619295971735,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 40.32738095238095,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17257142857142857,
"grad_norm": 0.03259337320923805,
"kl": 7.005780935287476e-05,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0659,
"reward": 0.5518668536096811,
"reward_std": 0.9462934136390686,
"step": 151
},
{
"clip_fraction": 0.0,
"completion_length": 2488.499984741211,
"dapo/avg_reward_std": 0.20882706064730883,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3177083367481828,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 39.409722222222214,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1737142857142857,
"grad_norm": 0.030666321516036987,
"kl": 3.533810377120972e-05,
"learning_rate": 2.547734369542718e-07,
"loss": 0.0437,
"reward": 0.5291262120008469,
"reward_std": 0.981982946395874,
"step": 152
},
{
"clip_fraction": 0.0,
"completion_length": 2514.8507080078125,
"dapo/avg_reward_std": 0.20546393813910308,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3209876600239012,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 35.93749999999999,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17485714285714285,
"grad_norm": 0.028674930334091187,
"kl": 7.952749729156494e-05,
"learning_rate": 2.488912271385139e-07,
"loss": -0.0145,
"reward": 0.5828098729252815,
"reward_std": 0.9706256464123726,
"step": 153
},
{
"clip_fraction": 0.0,
"completion_length": 2717.2847290039062,
"dapo/avg_reward_std": 0.25499844749768574,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36666667511065804,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 34.791666666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.176,
"grad_norm": 0.030772393569350243,
"kl": 4.854763392359018e-05,
"learning_rate": 2.4310073797187573e-07,
"loss": 0.0426,
"reward": 0.45278373593464494,
"reward_std": 0.9311749711632729,
"step": 154
},
{
"clip_fraction": 0.0,
"completion_length": 2762.3055725097656,
"dapo/avg_reward_std": 0.29779375117758045,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3985507280930229,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 46.25,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17714285714285713,
"grad_norm": 0.02795676700770855,
"kl": 6.116554141044617e-05,
"learning_rate": 2.374037332934512e-07,
"loss": -0.017,
"reward": 0.5571175646036863,
"reward_std": 0.951450802385807,
"step": 155
},
{
"clip_fraction": 0.0,
"completion_length": 2260.506950378418,
"dapo/avg_reward_std": 0.19260793987740862,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.20921986375717408,
"dapo/num_sampling_attempts": 5.875,
"dapo/sampling_efficiency": 20.416666666666664,
"dapo/total_prompts_processed": 35.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1782857142857143,
"grad_norm": 0.03577401861548424,
"kl": 4.409998655319214e-05,
"learning_rate": 2.3180194846605364e-07,
"loss": 0.0769,
"reward": 0.6440617088228464,
"reward_std": 0.9337564334273338,
"step": 156
},
{
"clip_fraction": 0.0,
"completion_length": 2340.84725189209,
"dapo/avg_reward_std": 0.27447891732056934,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40972222946584225,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 46.87499999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17942857142857144,
"grad_norm": 0.045233093202114105,
"kl": 6.485730409622192e-05,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0363,
"reward": 0.7273098900914192,
"reward_std": 0.9823846518993378,
"step": 157
},
{
"clip_fraction": 0.0,
"completion_length": 2282.3819580078125,
"dapo/avg_reward_std": 0.20623917956101268,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31140351334684774,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 23.680555555555557,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18057142857142858,
"grad_norm": 0.02890234813094139,
"kl": 5.996227264404297e-05,
"learning_rate": 2.2089083427137329e-07,
"loss": 0.0031,
"reward": 0.6950137317180634,
"reward_std": 0.9464666321873665,
"step": 158
},
{
"clip_fraction": 0.0,
"completion_length": 2021.6284866333008,
"dapo/avg_reward_std": 0.23576846316054062,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3198198257265864,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 26.96969696969697,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18171428571428572,
"grad_norm": 0.03477742150425911,
"kl": 6.712228059768677e-05,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0402,
"reward": 0.5178025495260954,
"reward_std": 0.9177478551864624,
"step": 159
},
{
"clip_fraction": 0.0,
"completion_length": 2372.9931030273438,
"dapo/avg_reward_std": 0.1955654670794805,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24206349643922986,
"dapo/num_sampling_attempts": 5.25,
"dapo/sampling_efficiency": 22.916666666666664,
"dapo/total_prompts_processed": 31.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18285714285714286,
"grad_norm": 0.03023899346590042,
"kl": 0.00011706352233886719,
"learning_rate": 2.1038068889975259e-07,
"loss": -0.023,
"reward": 0.5155377965420485,
"reward_std": 0.9538168758153915,
"step": 160
},
{
"clip_fraction": 0.0,
"completion_length": 2786.184097290039,
"dapo/avg_reward_std": 0.22358988050152273,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3039215772467501,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 36.354166666666664,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.184,
"grad_norm": 0.029065359383821487,
"kl": 7.36340880393982e-05,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0183,
"reward": 0.5675038225017488,
"reward_std": 0.9294460043311119,
"step": 161
},
{
"clip_fraction": 0.0,
"completion_length": 2661.7986183166504,
"dapo/avg_reward_std": 0.23443660909129727,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3494623740834574,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 39.166666666666664,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18514285714285714,
"grad_norm": 0.03428042680025101,
"kl": 7.835030555725098e-05,
"learning_rate": 2.0028431734436308e-07,
"loss": 0.0077,
"reward": 0.6459280159324408,
"reward_std": 0.961892195045948,
"step": 162
},
{
"clip_fraction": 0.0,
"completion_length": 2645.9305725097656,
"dapo/avg_reward_std": 0.2903378981611003,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39855073133240576,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 54.166666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18628571428571428,
"grad_norm": 0.026776015758514404,
"kl": 6.175786256790161e-05,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.0499,
"reward": 0.834372952580452,
"reward_std": 0.9364972710609436,
"step": 163
},
{
"clip_fraction": 0.0,
"completion_length": 2940.7604370117188,
"dapo/avg_reward_std": 0.28692422310511273,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35555556217829387,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 32.708333333333336,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18742857142857142,
"grad_norm": 0.03140675649046898,
"kl": 6.527453660964966e-05,
"learning_rate": 1.9061402047871833e-07,
"loss": 0.074,
"reward": 0.41690353071317077,
"reward_std": 0.9491114094853401,
"step": 164
},
{
"clip_fraction": 0.0,
"completion_length": 2281.6284675598145,
"dapo/avg_reward_std": 0.19226541501634262,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28921569007284503,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 34.285714285714285,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18857142857142858,
"grad_norm": 0.044475626200437546,
"kl": 6.622821092605591e-05,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0216,
"reward": 0.5352295860648155,
"reward_std": 0.9716188460588455,
"step": 165
},
{
"clip_fraction": 0.0,
"completion_length": 2246.774314880371,
"dapo/avg_reward_std": 0.21395914729048565,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2642276446993758,
"dapo/num_sampling_attempts": 5.125,
"dapo/sampling_efficiency": 32.51488095238095,
"dapo/total_prompts_processed": 30.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18971428571428572,
"grad_norm": 0.03659826144576073,
"kl": 7.368624210357666e-05,
"learning_rate": 1.8138158006995363e-07,
"loss": 0.0485,
"reward": 0.5606641564518213,
"reward_std": 0.9496459811925888,
"step": 166
},
{
"clip_fraction": 0.0,
"completion_length": 2340.156265258789,
"dapo/avg_reward_std": 0.2663822333017985,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3888888974984487,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 30.32738095238095,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19085714285714286,
"grad_norm": 0.03370486944913864,
"kl": 0.00011890754103660583,
"learning_rate": 1.7693309235023127e-07,
"loss": 0.0107,
"reward": 0.615155003964901,
"reward_std": 0.981718622148037,
"step": 167
},
{
"clip_fraction": 0.0,
"completion_length": 1600.381950378418,
"dapo/avg_reward_std": 0.2149174999859598,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31481481964389485,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 39.30555555555556,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.192,
"grad_norm": 0.040477264672517776,
"kl": 4.4405460357666016e-05,
"learning_rate": 1.7259824442455923e-07,
"loss": 0.0183,
"reward": 0.7775004804134369,
"reward_std": 0.9218784719705582,
"step": 168
},
{
"clip_fraction": 0.0,
"completion_length": 2663.3229370117188,
"dapo/avg_reward_std": 0.29243687472560187,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4242424314672297,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.20833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19314285714285714,
"grad_norm": 0.033447615802288055,
"kl": 6.474554538726807e-05,
"learning_rate": 1.6837835672960831e-07,
"loss": 0.0604,
"reward": 0.6684309486299753,
"reward_std": 0.9398416355252266,
"step": 169
},
{
"clip_fraction": 0.0,
"completion_length": 1823.3020782470703,
"dapo/avg_reward_std": 0.19836447931624748,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.24324324847878637,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 33.229166666666664,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19428571428571428,
"grad_norm": 0.050460852682590485,
"kl": 8.266419172286987e-05,
"learning_rate": 1.6427471468404952e-07,
"loss": 0.0797,
"reward": 0.6385768353939056,
"reward_std": 0.9705075472593307,
"step": 170
},
{
"clip_fraction": 0.0,
"completion_length": 2620.312515258789,
"dapo/avg_reward_std": 0.2494219935992185,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29901961412499933,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 30.3125,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19542857142857142,
"grad_norm": 0.030058681964874268,
"kl": 5.9291720390319824e-05,
"learning_rate": 1.6028856829700258e-07,
"loss": 0.04,
"reward": 0.5667276866734028,
"reward_std": 0.9310731589794159,
"step": 171
},
{
"clip_fraction": 0.0,
"completion_length": 2728.118064880371,
"dapo/avg_reward_std": 0.3154246766458858,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.47727273540063336,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.82738095238095,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19657142857142856,
"grad_norm": 0.02854626253247261,
"kl": 4.601478576660156e-05,
"learning_rate": 1.5642113178727193e-07,
"loss": -0.0071,
"reward": 0.5269420258700848,
"reward_std": 0.9420886114239693,
"step": 172
},
{
"clip_fraction": 0.0,
"completion_length": 2000.6111297607422,
"dapo/avg_reward_std": 0.1943835632221119,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2657657728807346,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 38.02083333333333,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1977142857142857,
"grad_norm": 0.033435527235269547,
"kl": 6.041303277015686e-05,
"learning_rate": 1.5267358321348285e-07,
"loss": -0.0116,
"reward": 0.6523085497319698,
"reward_std": 0.9166425243020058,
"step": 173
},
{
"clip_fraction": 0.0,
"completion_length": 2643.138916015625,
"dapo/avg_reward_std": 0.31710357325417654,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46031746977851506,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19885714285714284,
"grad_norm": 0.02673209458589554,
"kl": 0.00010142475366592407,
"learning_rate": 1.4904706411523448e-07,
"loss": 0.0252,
"reward": 0.5322555489838123,
"reward_std": 0.9057421013712883,
"step": 174
},
{
"clip_fraction": 0.0,
"completion_length": 2441.3437576293945,
"dapo/avg_reward_std": 0.30628569194903743,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38461538977347887,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2,
"grad_norm": 0.04055117443203926,
"kl": 4.247203469276428e-05,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0974,
"reward": 0.6256343480199575,
"reward_std": 0.9141717404127121,
"step": 175
},
{
"clip_fraction": 0.0,
"completion_length": 2001.5173797607422,
"dapo/avg_reward_std": 0.28915207616744504,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3817204381189039,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 29.285714285714285,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20114285714285715,
"grad_norm": 0.03139885142445564,
"kl": 8.495151996612549e-05,
"learning_rate": 1.4216149583350755e-07,
"loss": 0.0178,
"reward": 0.5467482833191752,
"reward_std": 0.9077746942639351,
"step": 176
},
{
"clip_fraction": 0.0,
"completion_length": 2707.1875,
"dapo/avg_reward_std": 0.2716821462943636,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3620689732247385,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 37.5,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2022857142857143,
"grad_norm": 0.027195578441023827,
"kl": 3.4984201192855835e-05,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.0243,
"reward": 0.4738291520625353,
"reward_std": 0.9582962840795517,
"step": 177
},
{
"clip_fraction": 0.0,
"completion_length": 2927.2534790039062,
"dapo/avg_reward_std": 0.2845180779695511,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3750000127724239,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 34.49404761904761,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20342857142857143,
"grad_norm": 0.0315893292427063,
"kl": 9.309500455856323e-05,
"learning_rate": 1.3577281594640182e-07,
"loss": 0.067,
"reward": 0.52550208568573,
"reward_std": 0.9910342618823051,
"step": 178
},
{
"clip_fraction": 0.0,
"completion_length": 2337.701400756836,
"dapo/avg_reward_std": 0.18291032314300537,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.25438597092502996,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 32.81249999999999,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20457142857142857,
"grad_norm": 0.031005509197711945,
"kl": 9.676814079284668e-05,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0165,
"reward": 0.6187671273946762,
"reward_std": 0.9665273353457451,
"step": 179
},
{
"clip_fraction": 0.0,
"completion_length": 2257.3958892822266,
"dapo/avg_reward_std": 0.20009312199221718,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30092593158284825,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 40.95238095238095,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2057142857142857,
"grad_norm": 0.0394003801047802,
"kl": 6.996467709541321e-05,
"learning_rate": 1.2988880807625927e-07,
"loss": 0.0627,
"reward": 0.7572303153574467,
"reward_std": 0.9510952234268188,
"step": 180
},
{
"clip_fraction": 0.0,
"completion_length": 2533.9375534057617,
"dapo/avg_reward_std": 0.37206994990507763,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5740740895271301,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 51.041666666666664,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20685714285714285,
"grad_norm": 0.03264293819665909,
"kl": 4.2844563722610474e-05,
"learning_rate": 1.2713832064634125e-07,
"loss": 0.0513,
"reward": 0.7092031128704548,
"reward_std": 1.0104939341545105,
"step": 181
},
{
"clip_fraction": 0.0,
"completion_length": 2425.8055572509766,
"dapo/avg_reward_std": 0.275991202547,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.384615390919722,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.208,
"grad_norm": 0.033197954297065735,
"kl": 6.585032679140568e-05,
"learning_rate": 1.2451664098030743e-07,
"loss": 0.0327,
"reward": 0.5725661776959896,
"reward_std": 0.9082557633519173,
"step": 182
},
{
"clip_fraction": 0.0,
"completion_length": 2288.0799255371094,
"dapo/avg_reward_std": 0.31956043162129144,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4318181872367859,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.083333333333336,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20914285714285713,
"grad_norm": 0.0300610288977623,
"kl": 9.128451347351074e-05,
"learning_rate": 1.220245676671809e-07,
"loss": 0.0567,
"reward": 0.7111962893977761,
"reward_std": 0.9172193482518196,
"step": 183
},
{
"clip_fraction": 0.0,
"completion_length": 2212.138900756836,
"dapo/avg_reward_std": 0.31106447339057924,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4066666769981384,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 40.97222222222222,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2102857142857143,
"grad_norm": 0.03711786866188049,
"kl": 9.056925773620605e-05,
"learning_rate": 1.1966285981663407e-07,
"loss": 0.0405,
"reward": 0.505124656483531,
"reward_std": 0.9274496361613274,
"step": 184
},
{
"clip_fraction": 0.0,
"completion_length": 2350.8820037841797,
"dapo/avg_reward_std": 0.21689824704770688,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3209876600239012,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 47.22222222222222,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21142857142857144,
"grad_norm": 0.03581295162439346,
"kl": 0.00011820532381534576,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0582,
"reward": 0.6189532484859228,
"reward_std": 0.92426348477602,
"step": 185
},
{
"clip_fraction": 0.0,
"completion_length": 2414.6770629882812,
"dapo/avg_reward_std": 0.26570350316263014,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333333749924937,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 41.785714285714285,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21257142857142858,
"grad_norm": 0.04000677913427353,
"kl": 5.166977643966675e-05,
"learning_rate": 1.1533337816991931e-07,
"loss": 0.0842,
"reward": 0.6384202986955643,
"reward_std": 0.9535242542624474,
"step": 186
},
{
"clip_fraction": 0.0,
"completion_length": 2179.180564880371,
"dapo/avg_reward_std": 0.267340756695846,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.362068974766238,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 31.25,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21371428571428572,
"grad_norm": 0.03956381976604462,
"kl": 7.00727105140686e-05,
"learning_rate": 1.1336692317580158e-07,
"loss": 0.0838,
"reward": 0.6583898914977908,
"reward_std": 0.9566742405295372,
"step": 187
},
{
"clip_fraction": 0.0,
"completion_length": 2340.65975189209,
"dapo/avg_reward_std": 0.19622711837291718,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.31770833721384406,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 55.51136363636363,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21485714285714286,
"grad_norm": 0.03709344565868378,
"kl": 9.210407733917236e-05,
"learning_rate": 1.1153347084664419e-07,
"loss": 0.0542,
"reward": 0.5126780550926924,
"reward_std": 0.9266727864742279,
"step": 188
},
{
"clip_fraction": 0.0,
"completion_length": 3183.7395782470703,
"dapo/avg_reward_std": 0.19985724004303537,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.23577236293292628,
"dapo/num_sampling_attempts": 5.125,
"dapo/sampling_efficiency": 23.1547619047619,
"dapo/total_prompts_processed": 30.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.216,
"grad_norm": 0.025569448247551918,
"kl": 3.505079075694084e-05,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0446,
"reward": 0.524140851572156,
"reward_std": 0.9313696026802063,
"step": 189
},
{
"clip_fraction": 0.0,
"completion_length": 2137.0764083862305,
"dapo/avg_reward_std": 0.2310014808177948,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33333334028720857,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 49.479166666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21714285714285714,
"grad_norm": 0.049144402146339417,
"kl": 0.00011414289474487305,
"learning_rate": 1.0826776744855121e-07,
"loss": 0.0597,
"reward": 0.6003488898277283,
"reward_std": 0.9967769384384155,
"step": 190
},
{
"clip_fraction": 0.0,
"completion_length": 2711.965301513672,
"dapo/avg_reward_std": 0.27090639670689903,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3388888930281003,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 42.604166666666664,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21828571428571428,
"grad_norm": 0.03207146376371384,
"kl": 7.285922765731812e-05,
"learning_rate": 1.068365111445064e-07,
"loss": 0.0774,
"reward": 0.5157463289797306,
"reward_std": 0.9445067569613457,
"step": 191
},
{
"clip_fraction": 0.0,
"completion_length": 2634.809066772461,
"dapo/avg_reward_std": 0.23276896492854968,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.29729730414377675,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 31.38888888888889,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21942857142857142,
"grad_norm": 0.026157336309552193,
"kl": 4.951097071170807e-05,
"learning_rate": 1.0554024673218806e-07,
"loss": 0.0183,
"reward": 0.4917615167796612,
"reward_std": 0.932147391140461,
"step": 192
},
{
"clip_fraction": 0.0,
"completion_length": 2687.6562423706055,
"dapo/avg_reward_std": 0.1842694640159607,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3000000034769376,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 37.20238095238095,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22057142857142858,
"grad_norm": 0.036305345594882965,
"kl": 5.197897553443909e-05,
"learning_rate": 1.0437936906629334e-07,
"loss": 0.0737,
"reward": 0.8177419528365135,
"reward_std": 0.9367102533578873,
"step": 193
},
{
"clip_fraction": 0.0,
"completion_length": 2567.093780517578,
"dapo/avg_reward_std": 0.2292217422615398,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30808081364992895,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 36.284722222222214,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22171428571428572,
"grad_norm": 0.03788081929087639,
"kl": 8.841603994369507e-05,
"learning_rate": 1.0335423176140511e-07,
"loss": 0.0745,
"reward": 0.4994155182503164,
"reward_std": 0.9395617768168449,
"step": 194
},
{
"clip_fraction": 0.0,
"completion_length": 2132.22225189209,
"dapo/avg_reward_std": 0.23152823698136113,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34408602791447795,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 46.800595238095234,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22285714285714286,
"grad_norm": 0.03888849914073944,
"kl": 7.880479097366333e-05,
"learning_rate": 1.0246514708427701e-07,
"loss": 0.0078,
"reward": 0.4982965085655451,
"reward_std": 0.9277759939432144,
"step": 195
},
{
"clip_fraction": 0.0,
"completion_length": 2242.8437881469727,
"dapo/avg_reward_std": 0.2252171416031687,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.27192983148913635,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 42.49999999999999,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.224,
"grad_norm": 0.03532218188047409,
"kl": 8.079037070274353e-05,
"learning_rate": 1.017123858587145e-07,
"loss": -0.0036,
"reward": 0.6249313289299607,
"reward_std": 0.9415610581636429,
"step": 196
},
{
"clip_fraction": 0.0,
"completion_length": 2186.913246154785,
"dapo/avg_reward_std": 0.2062954322287911,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26754386566187205,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 27.549603174603174,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22514285714285714,
"grad_norm": 0.05644107237458229,
"kl": 0.00012712180614471436,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.0266,
"reward": 0.6248354203999043,
"reward_std": 0.9687103852629662,
"step": 197
},
{
"clip_fraction": 0.0,
"completion_length": 2853.7430725097656,
"dapo/avg_reward_std": 0.2791443226429132,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41666667277996355,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 45.3125,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22628571428571428,
"grad_norm": 0.025631451979279518,
"kl": 7.095187902450562e-05,
"learning_rate": 1.0061670936044178e-07,
"loss": 0.0195,
"reward": 0.683892990462482,
"reward_std": 0.9487637504935265,
"step": 198
},
{
"clip_fraction": 0.0,
"completion_length": 2660.218780517578,
"dapo/avg_reward_std": 0.24377418825259575,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3910256469478974,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 56.597222222222214,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22742857142857142,
"grad_norm": 0.034018680453300476,
"kl": 6.149709224700928e-05,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0404,
"reward": 0.565577644854784,
"reward_std": 0.9079905152320862,
"step": 199
},
{
"clip_fraction": 0.0,
"completion_length": 2421.875015258789,
"dapo/avg_reward_std": 0.3100067762037118,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4027777823309104,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22857142857142856,
"grad_norm": 0.030885452404618263,
"kl": 7.659196853637695e-05,
"learning_rate": 1.0006853717962393e-07,
"loss": 0.0132,
"reward": 0.5110834892839193,
"reward_std": 0.8930082246661186,
"step": 200
},
{
"epoch": 0.22857142857142856,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.009447227440541611,
"train_runtime": 101500.2967,
"train_samples_per_second": 0.095,
"train_steps_per_second": 0.002
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}