DAPO-7B / trainer_state.json
kangdawei's picture
Model save
9d331ab verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.22857142857142856,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_fraction": 0.0,
"completion_length": 1681.8854370117188,
"dapo/avg_reward_std": 0.3420590679896505,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48245614610220255,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 54.58333333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.001142857142857143,
"grad_norm": 0.011931957677006721,
"kl": 0.0,
"learning_rate": 0.0,
"loss": 0.0219,
"reward": 0.8671084493398666,
"reward_std": 0.964848667383194,
"step": 1
},
{
"clip_fraction": 0.0,
"completion_length": 2172.913185119629,
"dapo/avg_reward_std": 0.27327019289920207,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4824561500235608,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 67.41071428571428,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.002285714285714286,
"grad_norm": 0.014162006787955761,
"kl": 0.0,
"learning_rate": 1e-07,
"loss": 0.0232,
"reward": 0.932205643504858,
"reward_std": 0.9607091471552849,
"step": 2
},
{
"clip_fraction": 0.0,
"completion_length": 2418.3611373901367,
"dapo/avg_reward_std": 0.3202404692769051,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45833334177732465,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 51.04166666666666,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.0034285714285714284,
"grad_norm": 0.011303936131298542,
"kl": 0.0001301020383834839,
"learning_rate": 2e-07,
"loss": 0.0371,
"reward": 0.5818949677050114,
"reward_std": 0.928392305970192,
"step": 3
},
{
"clip_fraction": 0.0,
"completion_length": 2080.6250228881836,
"dapo/avg_reward_std": 0.3523675338788466,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4545454586094076,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.20833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.004571428571428572,
"grad_norm": 0.010935964062809944,
"kl": 8.246302604675293e-05,
"learning_rate": 3e-07,
"loss": 0.007,
"reward": 0.6902085058391094,
"reward_std": 0.9576746746897697,
"step": 4
},
{
"clip_fraction": 0.0,
"completion_length": 2208.1910247802734,
"dapo/avg_reward_std": 0.33842799224351583,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4912280746196446,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 54.166666666666664,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.005714285714285714,
"grad_norm": 0.01424587145447731,
"kl": 0.00011987239122390747,
"learning_rate": 4e-07,
"loss": 0.0916,
"reward": 0.5482002776116133,
"reward_std": 0.9192102774977684,
"step": 5
},
{
"clip_fraction": 0.0,
"completion_length": 2428.8646087646484,
"dapo/avg_reward_std": 0.2724780907233556,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37222223381201425,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 37.39583333333333,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.006857142857142857,
"grad_norm": 0.012209060601890087,
"kl": 0.00013336539268493652,
"learning_rate": 5e-07,
"loss": 0.063,
"reward": 0.6304261162877083,
"reward_std": 0.947055421769619,
"step": 6
},
{
"clip_fraction": 0.0,
"completion_length": 2028.1111297607422,
"dapo/avg_reward_std": 0.35396890342235565,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5151515284722502,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.008,
"grad_norm": 0.01456605363637209,
"kl": 0.00010842084884643555,
"learning_rate": 6e-07,
"loss": 0.0863,
"reward": 0.7125897314399481,
"reward_std": 0.938522607088089,
"step": 7
},
{
"clip_fraction": 0.0,
"completion_length": 1825.9792022705078,
"dapo/avg_reward_std": 0.3198123288154602,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45333334505558015,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 36.45833333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.009142857142857144,
"grad_norm": 0.014117815531790257,
"kl": 8.45193862915039e-05,
"learning_rate": 7e-07,
"loss": 0.024,
"reward": 0.7728112610056996,
"reward_std": 0.953309640288353,
"step": 8
},
{
"clip_fraction": 0.0,
"completion_length": 2424.159713745117,
"dapo/avg_reward_std": 0.4454919546842575,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.6785714392151151,
"dapo/num_sampling_attempts": 1.75,
"dapo/sampling_efficiency": 70.83333333333333,
"dapo/total_prompts_processed": 10.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.010285714285714285,
"grad_norm": 0.008895393460988998,
"kl": 0.00011056661605834961,
"learning_rate": 8e-07,
"loss": 0.013,
"reward": 0.6077092736959457,
"reward_std": 0.994397833943367,
"step": 9
},
{
"clip_fraction": 0.0,
"completion_length": 1959.0763702392578,
"dapo/avg_reward_std": 0.25889470875263215,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.350000007947286,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 40.20833333333333,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.011428571428571429,
"grad_norm": 0.011032010428607464,
"kl": 8.809566497802734e-05,
"learning_rate": 9e-07,
"loss": 0.018,
"reward": 0.7773313578218222,
"reward_std": 0.9549762830138206,
"step": 10
},
{
"clip_fraction": 0.0,
"completion_length": 2597.6979217529297,
"dapo/avg_reward_std": 0.3167818512605584,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44202899284984754,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 42.70833333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.012571428571428572,
"grad_norm": 0.010659257881343365,
"kl": 0.00013309717178344727,
"learning_rate": 1e-06,
"loss": 0.0026,
"reward": 0.5649524200707674,
"reward_std": 0.9257139712572098,
"step": 11
},
{
"clip_fraction": 0.0,
"completion_length": 2214.9444580078125,
"dapo/avg_reward_std": 0.33351172175672317,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5648148208856583,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 49.99999999999999,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.013714285714285714,
"grad_norm": 0.010501649230718613,
"kl": 9.53376293182373e-05,
"learning_rate": 9.997258721585931e-07,
"loss": 0.0287,
"reward": 0.7854772098362446,
"reward_std": 0.9361946359276772,
"step": 12
},
{
"clip_fraction": 0.0,
"completion_length": 1984.5416717529297,
"dapo/avg_reward_std": 0.3313978049490187,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5925926052861743,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 56.666666666666664,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.014857142857142857,
"grad_norm": 0.012102734297513962,
"kl": 9.861588478088379e-05,
"learning_rate": 9.989038226169207e-07,
"loss": 0.0277,
"reward": 0.9007548745721579,
"reward_std": 0.9196444824337959,
"step": 13
},
{
"clip_fraction": 0.0,
"completion_length": 2267.5069885253906,
"dapo/avg_reward_std": 0.21889745750847986,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3186274560935357,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 40.63988095238095,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.016,
"grad_norm": 0.01004031766206026,
"kl": 0.00010375678539276123,
"learning_rate": 9.975348529157229e-07,
"loss": 0.0342,
"reward": 0.5439228732138872,
"reward_std": 0.9444419518113136,
"step": 14
},
{
"clip_fraction": 0.0,
"completion_length": 2403.170135498047,
"dapo/avg_reward_std": 0.24896668710491873,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4242424321445552,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 58.45238095238095,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.017142857142857144,
"grad_norm": 0.013138854876160622,
"kl": 0.00011286139488220215,
"learning_rate": 9.956206309337066e-07,
"loss": 0.0341,
"reward": 0.6446905825287104,
"reward_std": 0.9305006489157677,
"step": 15
},
{
"clip_fraction": 0.0,
"completion_length": 2368.579849243164,
"dapo/avg_reward_std": 0.32238917201757433,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4416666716337204,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 53.125,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.018285714285714287,
"grad_norm": 0.009644324891269207,
"kl": 0.00011764466762542725,
"learning_rate": 9.931634888554935e-07,
"loss": 0.0184,
"reward": 0.6319684982299805,
"reward_std": 0.9385868087410927,
"step": 16
},
{
"clip_fraction": 0.0,
"completion_length": 2354.590286254883,
"dapo/avg_reward_std": 0.2929895012466996,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41358025482407323,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 43.95833333333333,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.019428571428571427,
"grad_norm": 0.010750290006399155,
"kl": 0.00012104213237762451,
"learning_rate": 9.901664203302124e-07,
"loss": 0.0512,
"reward": 0.7495243214070797,
"reward_std": 0.9604936093091965,
"step": 17
},
{
"clip_fraction": 0.0,
"completion_length": 2353.548599243164,
"dapo/avg_reward_std": 0.3144007975404913,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46212122250686993,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.5,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02057142857142857,
"grad_norm": 0.0106205390766263,
"kl": 0.0001283884048461914,
"learning_rate": 9.866330768241983e-07,
"loss": 0.0356,
"reward": 0.7090531028807163,
"reward_std": 0.927816279232502,
"step": 18
},
{
"clip_fraction": 0.0,
"completion_length": 2599.90283203125,
"dapo/avg_reward_std": 0.31102153037985164,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46527778667708236,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 43.125,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.021714285714285714,
"grad_norm": 0.00998625811189413,
"kl": 0.00011986494064331055,
"learning_rate": 9.825677631722435e-07,
"loss": 0.0501,
"reward": 0.8357332646846771,
"reward_std": 0.9608008861541748,
"step": 19
},
{
"clip_fraction": 0.0,
"completion_length": 2307.482650756836,
"dapo/avg_reward_std": 0.3105274804613807,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4545454633506862,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.022857142857142857,
"grad_norm": 0.010738078504800797,
"kl": 9.399652481079102e-05,
"learning_rate": 9.779754323328192e-07,
"loss": 0.0104,
"reward": 0.7927055042237043,
"reward_std": 0.9697678238153458,
"step": 20
},
{
"clip_fraction": 0.0,
"completion_length": 1943.2500457763672,
"dapo/avg_reward_std": 0.3021106570959091,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.384615390919722,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.78571428571428,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.024,
"grad_norm": 0.01025764923542738,
"kl": 6.92903995513916e-05,
"learning_rate": 9.728616793536587e-07,
"loss": 0.0005,
"reward": 0.7050843685865402,
"reward_std": 0.9542289972305298,
"step": 21
},
{
"clip_fraction": 0.0,
"completion_length": 2265.222198486328,
"dapo/avg_reward_std": 0.2858178478020888,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4102564144593019,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 36.160714285714285,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.025142857142857144,
"grad_norm": 0.015554007142782211,
"kl": 0.00011515617370605469,
"learning_rate": 9.672327345550543e-07,
"loss": 0.1143,
"reward": 0.7392658032476902,
"reward_std": 0.9592578783631325,
"step": 22
},
{
"clip_fraction": 0.0,
"completion_length": 2213.857635498047,
"dapo/avg_reward_std": 0.28609917419297354,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.410714291036129,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 38.66071428571428,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.026285714285714287,
"grad_norm": 0.00819400418549776,
"kl": 7.683038711547852e-05,
"learning_rate": 9.610954559391704e-07,
"loss": 0.018,
"reward": 0.6645980039611459,
"reward_std": 0.919261984527111,
"step": 23
},
{
"clip_fraction": 0.0,
"completion_length": 1544.9930610656738,
"dapo/avg_reward_std": 0.27062960465749103,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38888889948527017,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 37.20238095238095,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.027428571428571427,
"grad_norm": 0.013472510501742363,
"kl": 6.948411464691162e-05,
"learning_rate": 9.54457320834625e-07,
"loss": 0.0006,
"reward": 0.6155341246630996,
"reward_std": 0.9053066149353981,
"step": 24
},
{
"clip_fraction": 0.0,
"completion_length": 2005.5104598999023,
"dapo/avg_reward_std": 0.2877837224253293,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38505747760164327,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 38.75,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.02857142857142857,
"grad_norm": 0.011138558387756348,
"kl": 8.162856101989746e-05,
"learning_rate": 9.473264167865171e-07,
"loss": 0.0493,
"reward": 0.6912501659244299,
"reward_std": 0.9633006453514099,
"step": 25
},
{
"clip_fraction": 0.0,
"completion_length": 2387.5555725097656,
"dapo/avg_reward_std": 0.19959817528724672,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3055555591980616,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 44.49404761904761,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.029714285714285714,
"grad_norm": 0.011900709010660648,
"kl": 9.435415267944336e-05,
"learning_rate": 9.397114317029974e-07,
"loss": 0.0815,
"reward": 0.5562675036489964,
"reward_std": 0.9110650941729546,
"step": 26
},
{
"clip_fraction": 0.0,
"completion_length": 2044.7292137145996,
"dapo/avg_reward_std": 0.3619746658951044,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.6354166744276881,
"dapo/num_sampling_attempts": 2.0,
"dapo/sampling_efficiency": 69.16666666666666,
"dapo/total_prompts_processed": 12.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.030857142857142857,
"grad_norm": 0.01303341705352068,
"kl": 8.736550807952881e-05,
"learning_rate": 9.316216432703916e-07,
"loss": 0.0141,
"reward": 0.7769045419991016,
"reward_std": 0.9760870188474655,
"step": 27
},
{
"clip_fraction": 0.0,
"completion_length": 2458.9305572509766,
"dapo/avg_reward_std": 0.2839898039465365,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.427536239442618,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 42.08333333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.032,
"grad_norm": 0.013889433816075325,
"kl": 0.00014150142669677734,
"learning_rate": 9.230669076497687e-07,
"loss": 0.0479,
"reward": 0.5980293937027454,
"reward_std": 0.9796791076660156,
"step": 28
},
{
"clip_fraction": 0.0,
"completion_length": 2496.451416015625,
"dapo/avg_reward_std": 0.35542283952236176,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5648148175742891,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 67.5,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03314285714285714,
"grad_norm": 0.011365516111254692,
"kl": 0.00010502338409423828,
"learning_rate": 9.140576474687263e-07,
"loss": 0.0278,
"reward": 0.6495406329631805,
"reward_std": 0.9649527370929718,
"step": 29
},
{
"clip_fraction": 0.0,
"completion_length": 1831.333351135254,
"dapo/avg_reward_std": 0.2628121712933416,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41304348279600556,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 60.625,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03428571428571429,
"grad_norm": 0.012428080663084984,
"kl": 8.240342140197754e-05,
"learning_rate": 9.046048391230247e-07,
"loss": 0.0408,
"reward": 0.7913381233811378,
"reward_std": 0.9801043272018433,
"step": 30
},
{
"clip_fraction": 0.0,
"completion_length": 2105.7118225097656,
"dapo/avg_reward_std": 0.2843361473083496,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4266666781902313,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 53.75,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.03542857142857143,
"grad_norm": 0.016210218891501427,
"kl": 0.0001112520694732666,
"learning_rate": 8.9471999940354e-07,
"loss": 0.1052,
"reward": 0.5814057979732752,
"reward_std": 0.9699539840221405,
"step": 31
},
{
"clip_fraction": 0.0,
"completion_length": 2366.718818664551,
"dapo/avg_reward_std": 0.2371666719173563,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34482759648355943,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 38.4375,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.036571428571428574,
"grad_norm": 0.01111757755279541,
"kl": 0.00011564791202545166,
"learning_rate": 8.844151714648274e-07,
"loss": 0.0379,
"reward": 0.6102676652371883,
"reward_std": 0.9229060783982277,
"step": 32
},
{
"clip_fraction": 0.0,
"completion_length": 2388.1909942626953,
"dapo/avg_reward_std": 0.29336222237156284,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3118279624369837,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 42.1875,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.037714285714285714,
"grad_norm": 0.01051933504641056,
"kl": 9.141862392425537e-05,
"learning_rate": 8.737029101523929e-07,
"loss": 0.041,
"reward": 0.6971308812499046,
"reward_std": 0.9577681049704552,
"step": 33
},
{
"clip_fraction": 0.0,
"completion_length": 2259.065963745117,
"dapo/avg_reward_std": 0.3195795826613903,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5833333367481828,
"dapo/num_sampling_attempts": 2.0,
"dapo/sampling_efficiency": 62.49999999999999,
"dapo/total_prompts_processed": 12.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.038857142857142854,
"grad_norm": 0.010114133358001709,
"kl": 9.936094284057617e-05,
"learning_rate": 8.625962667065487e-07,
"loss": 0.0019,
"reward": 0.706351961940527,
"reward_std": 0.9608398601412773,
"step": 34
},
{
"clip_fraction": 0.0,
"completion_length": 2236.6563262939453,
"dapo/avg_reward_std": 0.2805841226002266,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33908046679250126,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 30.952380952380942,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04,
"grad_norm": 0.01071652490645647,
"kl": 0.00013333559036254883,
"learning_rate": 8.511087728614862e-07,
"loss": 0.0108,
"reward": 0.6857370678335428,
"reward_std": 0.9366307482123375,
"step": 35
},
{
"clip_fraction": 0.0,
"completion_length": 1998.9166717529297,
"dapo/avg_reward_std": 0.30676539919593115,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4772727360779589,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 44.791666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04114285714285714,
"grad_norm": 0.011716869659721851,
"kl": 0.00010579824447631836,
"learning_rate": 8.392544243589427e-07,
"loss": 0.0577,
"reward": 0.8430320359766483,
"reward_std": 0.8613111302256584,
"step": 36
},
{
"clip_fraction": 0.0,
"completion_length": 2699.8819580078125,
"dapo/avg_reward_std": 0.280869146873211,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36781610034663104,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 36.45833333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04228571428571429,
"grad_norm": 0.011984186246991158,
"kl": 0.00011450052261352539,
"learning_rate": 8.270476638965461e-07,
"loss": 0.0641,
"reward": 0.6952194459736347,
"reward_std": 0.9531055390834808,
"step": 37
},
{
"clip_fraction": 0.0,
"completion_length": 2508.343765258789,
"dapo/avg_reward_std": 0.3086147890204475,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44444445485160466,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04342857142857143,
"grad_norm": 0.014813189394772053,
"kl": 0.00013363361358642578,
"learning_rate": 8.145033635316128e-07,
"loss": 0.0815,
"reward": 0.6981049925088882,
"reward_std": 0.9795023873448372,
"step": 38
},
{
"clip_fraction": 0.0,
"completion_length": 2568.090286254883,
"dapo/avg_reward_std": 0.2281228665149573,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30303030799735675,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 35.3125,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.044571428571428574,
"grad_norm": 0.010284055955708027,
"kl": 0.0001270771026611328,
"learning_rate": 8.01636806561836e-07,
"loss": 0.0129,
"reward": 0.5480891708284616,
"reward_std": 0.9542658925056458,
"step": 39
},
{
"clip_fraction": 0.0,
"completion_length": 2255.0798721313477,
"dapo/avg_reward_std": 0.3315709355202588,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46969698437235574,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.20833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.045714285714285714,
"grad_norm": 0.01235182024538517,
"kl": 0.00011420249938964844,
"learning_rate": 7.884636689049422e-07,
"loss": 0.0472,
"reward": 0.8707308620214462,
"reward_std": 0.9157829731702805,
"step": 40
},
{
"clip_fraction": 0.0,
"completion_length": 2417.9444427490234,
"dapo/avg_reward_std": 0.2831250044607347,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3655914020153784,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 37.723214285714285,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.046857142857142854,
"grad_norm": 0.010439831763505936,
"kl": 0.00012230873107910156,
"learning_rate": 7.75e-07,
"loss": 0.0395,
"reward": 0.7518008537590504,
"reward_std": 0.9689745083451271,
"step": 41
},
{
"clip_fraction": 0.0,
"completion_length": 2325.5937881469727,
"dapo/avg_reward_std": 0.28424168271677835,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3869047707745007,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 33.75,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.048,
"grad_norm": 0.010445328429341316,
"kl": 8.326023817062378e-05,
"learning_rate": 7.612622032536507e-07,
"loss": 0.0004,
"reward": 0.6408937154337764,
"reward_std": 0.9007892906665802,
"step": 42
},
{
"clip_fraction": 0.0,
"completion_length": 2423.9617919921875,
"dapo/avg_reward_std": 0.28680659715945905,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4038461624429776,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 46.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.04914285714285714,
"grad_norm": 0.010229532606899738,
"kl": 0.00013530254364013672,
"learning_rate": 7.472670160550848e-07,
"loss": 0.0104,
"reward": 0.6538480781018734,
"reward_std": 0.9688718169927597,
"step": 43
},
{
"clip_fraction": 0.0,
"completion_length": 2088.677085876465,
"dapo/avg_reward_std": 0.3208466252455345,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4423077031970024,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05028571428571429,
"grad_norm": 0.011106742545962334,
"kl": 0.00012566149234771729,
"learning_rate": 7.330314893841101e-07,
"loss": 0.0239,
"reward": 0.8764502704143524,
"reward_std": 0.9285347983241081,
"step": 44
},
{
"clip_fraction": 0.0,
"completion_length": 1721.781234741211,
"dapo/avg_reward_std": 0.3683280497789383,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5083333447575569,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 47.916666666666664,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05142857142857143,
"grad_norm": 0.01152133010327816,
"kl": 7.429718971252441e-05,
"learning_rate": 7.185729670371604e-07,
"loss": 0.0259,
"reward": 0.8203496672213078,
"reward_std": 0.9882074818015099,
"step": 45
},
{
"clip_fraction": 0.0,
"completion_length": 3020.9757232666016,
"dapo/avg_reward_std": 0.294668085873127,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37500000691839624,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 38.660714285714285,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.052571428571428575,
"grad_norm": 0.009526599198579788,
"kl": 0.00014853477478027344,
"learning_rate": 7.039090644965509e-07,
"loss": 0.0314,
"reward": 0.6035567373037338,
"reward_std": 0.9617942646145821,
"step": 46
},
{
"clip_fraction": 0.0,
"completion_length": 2869.8958892822266,
"dapo/avg_reward_std": 0.37419558623257804,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5196078481043086,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 66.66666666666666,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.053714285714285714,
"grad_norm": 0.008854555897414684,
"kl": 0.00012740492820739746,
"learning_rate": 6.890576474687263e-07,
"loss": 0.0266,
"reward": 0.5126286232843995,
"reward_std": 0.9323688969016075,
"step": 47
},
{
"clip_fraction": 0.0,
"completion_length": 1974.5069999694824,
"dapo/avg_reward_std": 0.31826632221539813,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42361111628512543,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 43.541666666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.054857142857142854,
"grad_norm": 0.012630482204258442,
"kl": 0.00011485815048217773,
"learning_rate": 6.740368101176495e-07,
"loss": 0.0259,
"reward": 0.7998449765145779,
"reward_std": 0.9614248275756836,
"step": 48
},
{
"clip_fraction": 0.0,
"completion_length": 2775.854164123535,
"dapo/avg_reward_std": 0.24803236694563002,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41269841435409726,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 65.97222222222223,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.056,
"grad_norm": 0.0115203270688653,
"kl": 0.00010813772678375244,
"learning_rate": 6.588648530198504e-07,
"loss": 0.0626,
"reward": 0.5735284592956305,
"reward_std": 0.9657324403524399,
"step": 49
},
{
"clip_fraction": 0.0,
"completion_length": 2555.2743377685547,
"dapo/avg_reward_std": 0.3077625359098117,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.423611119389534,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 48.33333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05714285714285714,
"grad_norm": 0.012258801609277725,
"kl": 0.00013893842697143555,
"learning_rate": 6.435602608679916e-07,
"loss": 0.0575,
"reward": 0.8288873583078384,
"reward_std": 0.950613297522068,
"step": 50
},
{
"clip_fraction": 0.0,
"completion_length": 2645.576400756836,
"dapo/avg_reward_std": 0.3462034153441588,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4236111169060071,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 39.99999999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05828571428571429,
"grad_norm": 0.01161988079547882,
"kl": 0.0001646280288696289,
"learning_rate": 6.281416799501187e-07,
"loss": 0.046,
"reward": 0.46879277005791664,
"reward_std": 0.9387945607304573,
"step": 51
},
{
"clip_fraction": 0.0,
"completion_length": 2043.677101135254,
"dapo/avg_reward_std": 0.3387378570826157,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4347826171180476,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.05942857142857143,
"grad_norm": 0.011719447560608387,
"kl": 0.00012214481830596924,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0093,
"reward": 0.7487262971699238,
"reward_std": 0.9444489181041718,
"step": 52
},
{
"clip_fraction": 0.0,
"completion_length": 2277.902801513672,
"dapo/avg_reward_std": 0.269059170936716,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37356322695469035,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 41.88988095238095,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.060571428571428575,
"grad_norm": 0.012477328069508076,
"kl": 0.00015044212341308594,
"learning_rate": 5.97037808470444e-07,
"loss": 0.048,
"reward": 0.6608240492641926,
"reward_std": 0.9770755022764206,
"step": 53
},
{
"clip_fraction": 0.0,
"completion_length": 2374.232635498047,
"dapo/avg_reward_std": 0.34054997433786804,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.500000013605408,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 37.916666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.061714285714285715,
"grad_norm": 0.013303548097610474,
"kl": 0.0001438036561012268,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0614,
"reward": 0.75572844222188,
"reward_std": 0.9565529599785805,
"step": 54
},
{
"clip_fraction": 0.0,
"completion_length": 2442.232666015625,
"dapo/avg_reward_std": 0.27056889484326047,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4097222263614337,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 45.83333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06285714285714286,
"grad_norm": 0.011922283098101616,
"kl": 0.00014710426330566406,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0447,
"reward": 0.6145301992073655,
"reward_std": 0.9308876842260361,
"step": 55
},
{
"clip_fraction": 0.0,
"completion_length": 2163.7604064941406,
"dapo/avg_reward_std": 0.306766193537485,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.47619048612458365,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 57.291666666666664,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.064,
"grad_norm": 0.009786682203412056,
"kl": 0.00011900067329406738,
"learning_rate": 5.5e-07,
"loss": 0.0353,
"reward": 0.7467220462858677,
"reward_std": 0.9404179230332375,
"step": 56
},
{
"clip_fraction": 0.0,
"completion_length": 1992.7430953979492,
"dapo/avg_reward_std": 0.21240893006324768,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2631579006188794,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 27.708333333333332,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06514285714285714,
"grad_norm": 0.015636112540960312,
"kl": 0.00013278424739837646,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0652,
"reward": 0.5448480695486069,
"reward_std": 0.8946049734950066,
"step": 57
},
{
"clip_fraction": 0.0,
"completion_length": 1786.927101135254,
"dapo/avg_reward_std": 0.27395731459061307,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.479166679084301,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 51.979166666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06628571428571428,
"grad_norm": 0.012302345596253872,
"kl": 0.00010266900062561035,
"learning_rate": 5.186095868151436e-07,
"loss": 0.0222,
"reward": 0.7567729391157627,
"reward_std": 0.9539604857563972,
"step": 58
},
{
"clip_fraction": 0.0,
"completion_length": 1871.125015258789,
"dapo/avg_reward_std": 0.26716366639504063,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3461538547506699,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 51.25,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06742857142857143,
"grad_norm": 0.012423303909599781,
"kl": 0.00013174861669540405,
"learning_rate": 5.02962191529556e-07,
"loss": 0.0051,
"reward": 0.5472707431763411,
"reward_std": 0.9848242700099945,
"step": 59
},
{
"clip_fraction": 0.0,
"completion_length": 2110.0104446411133,
"dapo/avg_reward_std": 0.27772934675216676,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3933333379030228,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 55.416666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06857142857142857,
"grad_norm": 0.010305487550795078,
"kl": 0.00013266503810882568,
"learning_rate": 4.873721045679706e-07,
"loss": -0.0051,
"reward": 0.5918029174208641,
"reward_std": 0.9419775605201721,
"step": 60
},
{
"clip_fraction": 0.0,
"completion_length": 1820.1597595214844,
"dapo/avg_reward_std": 0.2844862639904022,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.351190483463662,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 39.28571428571428,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.06971428571428571,
"grad_norm": 0.01057644933462143,
"kl": 9.304285049438477e-05,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.0019,
"reward": 0.5361353289335966,
"reward_std": 0.9243106096982956,
"step": 61
},
{
"clip_fraction": 0.0,
"completion_length": 2268.913215637207,
"dapo/avg_reward_std": 0.2805037432246738,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3456790220958215,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 39.791666666666664,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07085714285714285,
"grad_norm": 0.010327951982617378,
"kl": 0.00013640522956848145,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.011,
"reward": 0.5703515652567148,
"reward_std": 0.9485230222344398,
"step": 62
},
{
"clip_fraction": 0.0,
"completion_length": 2150.541679382324,
"dapo/avg_reward_std": 0.3610766388868031,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000164697045,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.072,
"grad_norm": 0.01420843880623579,
"kl": 0.00017371773719787598,
"learning_rate": 4.4113514698014953e-07,
"loss": 0.027,
"reward": 0.8152667284011841,
"reward_std": 0.9553957208991051,
"step": 63
},
{
"clip_fraction": 0.0,
"completion_length": 2542.954879760742,
"dapo/avg_reward_std": 0.25789711397627124,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4057971077120822,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 55.0,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07314285714285715,
"grad_norm": 0.010388275608420372,
"kl": 0.00016424059867858887,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0153,
"reward": 0.8328269198536873,
"reward_std": 0.946412943303585,
"step": 64
},
{
"clip_fraction": 0.0,
"completion_length": 2573.9132385253906,
"dapo/avg_reward_std": 0.27658049833206905,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4682539779515493,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 67.01388888888889,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07428571428571429,
"grad_norm": 0.016587890684604645,
"kl": 0.0002205371856689453,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.071,
"reward": 0.8272522762417793,
"reward_std": 0.9939362108707428,
"step": 65
},
{
"clip_fraction": 0.0,
"completion_length": 2272.4132080078125,
"dapo/avg_reward_std": 0.28441278512279194,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38888889737427235,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 49.375,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07542857142857143,
"grad_norm": 0.01080800499767065,
"kl": 0.00015676021575927734,
"learning_rate": 3.9609093550344907e-07,
"loss": -0.0104,
"reward": 0.7243790216743946,
"reward_std": 1.0099836066365242,
"step": 66
},
{
"clip_fraction": 0.0,
"completion_length": 2551.920150756836,
"dapo/avg_reward_std": 0.29605763202363794,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4242424314672297,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 50.416666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07657142857142857,
"grad_norm": 0.01253009494394064,
"kl": 0.0001944899559020996,
"learning_rate": 3.8142703296283953e-07,
"loss": 0.0544,
"reward": 0.7982187271118164,
"reward_std": 0.9796509444713593,
"step": 67
},
{
"clip_fraction": 0.0,
"completion_length": 2039.6910400390625,
"dapo/avg_reward_std": 0.3305485857029756,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4305555634200573,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07771428571428571,
"grad_norm": 0.013196859508752823,
"kl": 0.00021713972091674805,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.0185,
"reward": 0.8682084418833256,
"reward_std": 0.9861341118812561,
"step": 68
},
{
"clip_fraction": 0.0,
"completion_length": 2549.642364501953,
"dapo/avg_reward_std": 0.28639274001121523,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4133333384990692,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.07885714285714286,
"grad_norm": 0.010159006342291832,
"kl": 0.00016075372695922852,
"learning_rate": 3.5273298394491515e-07,
"loss": -0.0284,
"reward": 0.5912708025425673,
"reward_std": 0.9797485172748566,
"step": 69
},
{
"clip_fraction": 0.0,
"completion_length": 2719.5382232666016,
"dapo/avg_reward_std": 0.28611900960957565,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.351851859026485,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.625,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08,
"grad_norm": 0.011270755901932716,
"kl": 0.00022423267364501953,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0265,
"reward": 0.5740308649837971,
"reward_std": 0.8749020621180534,
"step": 70
},
{
"clip_fraction": 0.0,
"completion_length": 2073.2916946411133,
"dapo/avg_reward_std": 0.28938476492961246,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45833334264655906,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 42.49999999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08114285714285714,
"grad_norm": 0.011867412365972996,
"kl": 0.0001347661018371582,
"learning_rate": 3.250000000000001e-07,
"loss": -0.0577,
"reward": 0.5955507848411798,
"reward_std": 0.9116542786359787,
"step": 71
},
{
"clip_fraction": 0.0,
"completion_length": 2239.322914123535,
"dapo/avg_reward_std": 0.30952110344713385,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4469697041945024,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 44.166666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08228571428571428,
"grad_norm": 0.011070906184613705,
"kl": 0.000155717134475708,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0339,
"reward": 0.7990612685680389,
"reward_std": 0.9683424234390259,
"step": 72
},
{
"clip_fraction": 0.0,
"completion_length": 2044.489601135254,
"dapo/avg_reward_std": 0.21984713185917248,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3131313206571521,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 36.77083333333333,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08342857142857144,
"grad_norm": 0.014109701849520206,
"kl": 0.0001436173915863037,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.085,
"reward": 0.8676656074821949,
"reward_std": 0.9657078757882118,
"step": 73
},
{
"clip_fraction": 0.0,
"completion_length": 1958.7361068725586,
"dapo/avg_reward_std": 0.30799518460812775,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4927536339863487,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 40.625,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08457142857142858,
"grad_norm": 0.013041837140917778,
"kl": 0.0001519918441772461,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0492,
"reward": 0.6045123310759664,
"reward_std": 0.9384523630142212,
"step": 74
},
{
"clip_fraction": 0.0,
"completion_length": 1523.1284942626953,
"dapo/avg_reward_std": 0.31539708146682155,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.391025647521019,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 36.875,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08571428571428572,
"grad_norm": 0.014472462236881256,
"kl": 0.0001392364501953125,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0358,
"reward": 0.7163376174867153,
"reward_std": 0.9508332461118698,
"step": 75
},
{
"clip_fraction": 0.0,
"completion_length": 2640.7813110351562,
"dapo/avg_reward_std": 0.3144421911239624,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44000000655651095,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 43.75,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08685714285714285,
"grad_norm": 0.011127221398055553,
"kl": 0.0002060532569885254,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0604,
"reward": 0.6046733632683754,
"reward_std": 0.9528723284602165,
"step": 76
},
{
"clip_fraction": 0.0,
"completion_length": 2088.7292098999023,
"dapo/avg_reward_std": 0.3257487453520298,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4444444552063942,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 45.31249999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.088,
"grad_norm": 0.013021063059568405,
"kl": 0.00017440319061279297,
"learning_rate": 2.488912271385139e-07,
"loss": 0.0353,
"reward": 0.5843205824494362,
"reward_std": 0.9498706609010696,
"step": 77
},
{
"clip_fraction": 0.0,
"completion_length": 2710.0069580078125,
"dapo/avg_reward_std": 0.4117408903206096,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5784313836518455,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 52.08333333333333,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.08914285714285715,
"grad_norm": 0.00956858042627573,
"kl": 0.00020110607147216797,
"learning_rate": 2.374037332934512e-07,
"loss": -0.0019,
"reward": 0.7558267749845982,
"reward_std": 0.9872319549322128,
"step": 78
},
{
"clip_fraction": 0.0,
"completion_length": 2532.888916015625,
"dapo/avg_reward_std": 0.29725510747201983,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34408603031789103,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 31.696428571428562,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09028571428571429,
"grad_norm": 0.010455719195306301,
"kl": 0.00019878149032592773,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0433,
"reward": 0.7071553282439709,
"reward_std": 0.936428040266037,
"step": 79
},
{
"clip_fraction": 0.0,
"completion_length": 2045.3507232666016,
"dapo/avg_reward_std": 0.24797727167606354,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4318181872367859,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 56.24999999999999,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09142857142857143,
"grad_norm": 0.011657273396849632,
"kl": 0.00015923380851745605,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0016,
"reward": 0.8354307417757809,
"reward_std": 0.9478549808263779,
"step": 80
},
{
"clip_fraction": 0.0,
"completion_length": 2517.621482849121,
"dapo/avg_reward_std": 0.3837103931342854,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5882353055126527,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 56.24999999999999,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09257142857142857,
"grad_norm": 0.011230596341192722,
"kl": 0.00020751357078552246,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0523,
"reward": 0.6180859599262476,
"reward_std": 0.9601781144738197,
"step": 81
},
{
"clip_fraction": 0.0,
"completion_length": 2189.6805725097656,
"dapo/avg_reward_std": 0.33485331758856773,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.47916667970518273,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 42.70833333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09371428571428571,
"grad_norm": 0.01085925567895174,
"kl": 0.00018781423568725586,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.0277,
"reward": 0.7506253309547901,
"reward_std": 0.9654112830758095,
"step": 82
},
{
"clip_fraction": 0.0,
"completion_length": 2063.197952270508,
"dapo/avg_reward_std": 0.3108914480322883,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45238096444379716,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 47.291666666666664,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09485714285714286,
"grad_norm": 0.01137411966919899,
"kl": 0.00018197298049926758,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0165,
"reward": 0.6088770590722561,
"reward_std": 0.9752795398235321,
"step": 83
},
{
"clip_fraction": 0.0,
"completion_length": 2032.7708587646484,
"dapo/avg_reward_std": 0.35138528971444993,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000070957911,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 40.62499999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.096,
"grad_norm": 0.009788557887077332,
"kl": 0.0001645982265472412,
"learning_rate": 1.7693309235023127e-07,
"loss": -0.0005,
"reward": 0.6485470458865166,
"reward_std": 0.8980466201901436,
"step": 84
},
{
"clip_fraction": 0.0,
"completion_length": 2723.2083892822266,
"dapo/avg_reward_std": 0.35491983592510223,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5500000104308128,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 46.87499999999999,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09714285714285714,
"grad_norm": 0.012261813506484032,
"kl": 0.0002092123031616211,
"learning_rate": 1.6837835672960831e-07,
"loss": 0.0428,
"reward": 0.769347533583641,
"reward_std": 0.9622702524065971,
"step": 85
},
{
"clip_fraction": 0.0,
"completion_length": 2813.6979370117188,
"dapo/avg_reward_std": 0.31041908973739263,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46825397582281203,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 53.125,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09828571428571428,
"grad_norm": 0.013307915069162846,
"kl": 0.00022363662719726562,
"learning_rate": 1.6028856829700258e-07,
"loss": 0.0893,
"reward": 0.7634551003575325,
"reward_std": 0.9385863840579987,
"step": 86
},
{
"clip_fraction": 0.0,
"completion_length": 2645.5486907958984,
"dapo/avg_reward_std": 0.29486309762658747,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37931035099358396,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 38.95833333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.09942857142857142,
"grad_norm": 0.009606744162738323,
"kl": 0.00017684698104858398,
"learning_rate": 1.5267358321348285e-07,
"loss": 0.0337,
"reward": 0.6225443221628666,
"reward_std": 0.9135682806372643,
"step": 87
},
{
"clip_fraction": 0.0,
"completion_length": 2211.1111221313477,
"dapo/avg_reward_std": 0.2131810395254029,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30092593075500595,
"dapo/num_sampling_attempts": 4.5,
"dapo/sampling_efficiency": 25.535714285714285,
"dapo/total_prompts_processed": 27.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10057142857142858,
"grad_norm": 0.011731365695595741,
"kl": 0.00017218291759490967,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0114,
"reward": 0.574246758595109,
"reward_std": 0.9149169996380806,
"step": 88
},
{
"clip_fraction": 0.0,
"completion_length": 2617.9445037841797,
"dapo/avg_reward_std": 0.34073091808118317,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5087719379287017,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 47.91666666666666,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10171428571428572,
"grad_norm": 0.013213962316513062,
"kl": 0.0002383589744567871,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.072,
"reward": 0.7886459194123745,
"reward_std": 0.9416129812598228,
"step": 89
},
{
"clip_fraction": 0.0,
"completion_length": 2265.7743225097656,
"dapo/avg_reward_std": 0.39400896430015564,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48412699145930155,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 46.24999999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10285714285714286,
"grad_norm": 0.011279975064098835,
"kl": 0.00017967820167541504,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0115,
"reward": 0.8188270814716816,
"reward_std": 0.956598699092865,
"step": 90
},
{
"clip_fraction": 0.0,
"completion_length": 1751.7951850891113,
"dapo/avg_reward_std": 0.346651555462317,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44696970690380444,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 46.875,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.104,
"grad_norm": 0.013495221734046936,
"kl": 0.00012958049774169922,
"learning_rate": 1.2713832064634125e-07,
"loss": 0.0244,
"reward": 0.7544833142310381,
"reward_std": 0.920841209590435,
"step": 91
},
{
"clip_fraction": 0.0,
"completion_length": 2176.5868530273438,
"dapo/avg_reward_std": 0.31276301860809325,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3866666704416275,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 42.410714285714285,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10514285714285715,
"grad_norm": 0.014705290086567402,
"kl": 0.00018972158432006836,
"learning_rate": 1.220245676671809e-07,
"loss": 0.082,
"reward": 0.6609778106212616,
"reward_std": 0.9741540849208832,
"step": 92
},
{
"clip_fraction": 0.0,
"completion_length": 2418.0035095214844,
"dapo/avg_reward_std": 0.3533540232615037,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45454546131870965,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 50.416666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10628571428571429,
"grad_norm": 0.014526835642755032,
"kl": 0.00022083520889282227,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0467,
"reward": 0.6240662466734648,
"reward_std": 0.9587830454111099,
"step": 93
},
{
"clip_fraction": 0.0,
"completion_length": 1759.4409713745117,
"dapo/avg_reward_std": 0.31654878084858257,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4166666741172473,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 47.70833333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10742857142857143,
"grad_norm": 0.011724472045898438,
"kl": 0.00012111663818359375,
"learning_rate": 1.1336692317580158e-07,
"loss": -0.0008,
"reward": 0.8961930721998215,
"reward_std": 0.9275476858019829,
"step": 94
},
{
"clip_fraction": 0.0,
"completion_length": 1968.3958435058594,
"dapo/avg_reward_std": 0.31933523178100587,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36000000715255737,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10857142857142857,
"grad_norm": 0.012760731391608715,
"kl": 0.00015205144882202148,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0303,
"reward": 0.7966429069638252,
"reward_std": 0.9104023575782776,
"step": 95
},
{
"clip_fraction": 0.0,
"completion_length": 1705.9930610656738,
"dapo/avg_reward_std": 0.26930796217035363,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.38888889771920665,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 48.4375,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.10971428571428571,
"grad_norm": 0.016185246407985687,
"kl": 0.00014796853065490723,
"learning_rate": 1.068365111445064e-07,
"loss": -0.0016,
"reward": 0.7683778572827578,
"reward_std": 0.9466121271252632,
"step": 96
},
{
"clip_fraction": 0.0,
"completion_length": 2056.079864501953,
"dapo/avg_reward_std": 0.3310448744080283,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48484849387949164,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 51.785714285714285,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11085714285714286,
"grad_norm": 0.010300490073859692,
"kl": 0.00016963481903076172,
"learning_rate": 1.0437936906629334e-07,
"loss": 0.0027,
"reward": 0.7596820928156376,
"reward_std": 0.9540099799633026,
"step": 97
},
{
"clip_fraction": 0.0,
"completion_length": 2592.8403244018555,
"dapo/avg_reward_std": 0.21406691299902425,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3018018116016646,
"dapo/num_sampling_attempts": 4.625,
"dapo/sampling_efficiency": 30.376984126984123,
"dapo/total_prompts_processed": 27.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.112,
"grad_norm": 0.01034973282366991,
"kl": 0.000193670392036438,
"learning_rate": 1.0246514708427701e-07,
"loss": 0.0254,
"reward": 0.7206093966960907,
"reward_std": 0.9074158370494843,
"step": 98
},
{
"clip_fraction": 0.0,
"completion_length": 2686.343780517578,
"dapo/avg_reward_std": 0.24782394810959144,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.35802469926851765,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 44.513888888888886,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11314285714285714,
"grad_norm": 0.011502859182655811,
"kl": 0.00023734569549560547,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.0346,
"reward": 0.6300379456952214,
"reward_std": 0.9057611152529716,
"step": 99
},
{
"clip_fraction": 0.0,
"completion_length": 2050.166664123535,
"dapo/avg_reward_std": 0.3082110931475957,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.43055556404093903,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 44.166666666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11428571428571428,
"grad_norm": 0.015181603841483593,
"kl": 0.00023311376571655273,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0389,
"reward": 0.7550710588693619,
"reward_std": 0.9816905185580254,
"step": 100
},
{
"clip_fraction": 0.0,
"completion_length": 2261.625045776367,
"dapo/avg_reward_std": 0.2656887276419278,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3563218414783478,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 30.952380952380942,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11542857142857142,
"grad_norm": 0.012256976217031479,
"kl": 0.0002308487892150879,
"learning_rate": 1e-07,
"loss": 0.0255,
"reward": 0.6794679276645184,
"reward_std": 0.936141237616539,
"step": 101
},
{
"clip_fraction": 0.0,
"completion_length": 2019.2500381469727,
"dapo/avg_reward_std": 0.27603574914316975,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40322581414253483,
"dapo/num_sampling_attempts": 3.875,
"dapo/sampling_efficiency": 31.14583333333333,
"dapo/total_prompts_processed": 23.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11657142857142858,
"grad_norm": 0.011883130297064781,
"kl": 0.00018447637557983398,
"learning_rate": 6.203955092681039e-07,
"loss": 0.0566,
"reward": 0.9531724825501442,
"reward_std": 0.9424103274941444,
"step": 102
},
{
"clip_fraction": 0.0,
"completion_length": 2447.142379760742,
"dapo/avg_reward_std": 0.2595460871855418,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3833333447575569,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 28.958333333333332,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11771428571428572,
"grad_norm": 0.010165790095925331,
"kl": 0.00018906593322753906,
"learning_rate": 6.126278954320294e-07,
"loss": 0.0361,
"reward": 0.8079591542482376,
"reward_std": 0.9323313534259796,
"step": 103
},
{
"clip_fraction": 0.0,
"completion_length": 2414.4305725097656,
"dapo/avg_reward_std": 0.2675211922875766,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3505747174394542,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 42.013888888888886,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.11885714285714286,
"grad_norm": 0.009563453495502472,
"kl": 0.0002244710922241211,
"learning_rate": 6.048412045323164e-07,
"loss": 0.0367,
"reward": 0.6746065132319927,
"reward_std": 0.9439321234822273,
"step": 104
},
{
"clip_fraction": 0.0,
"completion_length": 2350.4653396606445,
"dapo/avg_reward_std": 0.2709802109183687,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33838384621071094,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 39.58333333333333,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12,
"grad_norm": 0.011944189667701721,
"kl": 0.00023399293422698975,
"learning_rate": 5.97037808470444e-07,
"loss": 0.0133,
"reward": 0.7501634955406189,
"reward_std": 0.9493465423583984,
"step": 105
},
{
"clip_fraction": 0.0,
"completion_length": 2232.5590209960938,
"dapo/avg_reward_std": 0.2304972934311834,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32183908234382497,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 46.05654761904762,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12114285714285715,
"grad_norm": 0.011308341287076473,
"kl": 0.0002868175506591797,
"learning_rate": 5.892200842364462e-07,
"loss": 0.017,
"reward": 0.8449488952755928,
"reward_std": 0.9235394075512886,
"step": 106
},
{
"clip_fraction": 0.0,
"completion_length": 2245.4306259155273,
"dapo/avg_reward_std": 0.32372228088586225,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37681160478488257,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 45.53571428571428,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12228571428571429,
"grad_norm": 0.01079760491847992,
"kl": 0.00018143653869628906,
"learning_rate": 5.813904131848564e-07,
"loss": 0.0407,
"reward": 0.876940418034792,
"reward_std": 0.945194236934185,
"step": 107
},
{
"clip_fraction": 0.0,
"completion_length": 2877.4410095214844,
"dapo/avg_reward_std": 0.31851592376118615,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44444445201328825,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 47.916666666666664,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12342857142857143,
"grad_norm": 0.008532079868018627,
"kl": 0.00026416778564453125,
"learning_rate": 5.735511803093248e-07,
"loss": 0.0189,
"reward": 0.5354619715362787,
"reward_std": 0.9343887642025948,
"step": 108
},
{
"clip_fraction": 0.0,
"completion_length": 2287.3403396606445,
"dapo/avg_reward_std": 0.2637126021660291,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41025641560554504,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 44.6875,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12457142857142857,
"grad_norm": 0.010662744753062725,
"kl": 0.00025856494903564453,
"learning_rate": 5.657047735161255e-07,
"loss": 0.0139,
"reward": 0.6945868469774723,
"reward_std": 0.945196196436882,
"step": 109
},
{
"clip_fraction": 0.0,
"completion_length": 2099.197898864746,
"dapo/avg_reward_std": 0.2950383967586926,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.380952388048172,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 38.541666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12571428571428572,
"grad_norm": 0.011256680823862553,
"kl": 0.0002060532569885254,
"learning_rate": 5.578535828967777e-07,
"loss": 0.0106,
"reward": 0.6000825632363558,
"reward_std": 0.9193084537982941,
"step": 110
},
{
"clip_fraction": 0.0,
"completion_length": 2716.079864501953,
"dapo/avg_reward_std": 0.2741352463590688,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3620689691140734,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 41.488095238095234,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12685714285714286,
"grad_norm": 0.009062284603714943,
"kl": 0.0002288222312927246,
"learning_rate": 5.5e-07,
"loss": 0.0461,
"reward": 0.6751261968165636,
"reward_std": 0.9856812655925751,
"step": 111
},
{
"clip_fraction": 0.0,
"completion_length": 3045.125,
"dapo/avg_reward_std": 0.36701818108558654,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5888888945182165,
"dapo/num_sampling_attempts": 1.875,
"dapo/sampling_efficiency": 65.625,
"dapo/total_prompts_processed": 11.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.128,
"grad_norm": 0.013615131378173828,
"kl": 0.0003104209899902344,
"learning_rate": 5.421464171032224e-07,
"loss": 0.0541,
"reward": 0.6107649356126785,
"reward_std": 0.9386496767401695,
"step": 112
},
{
"clip_fraction": 0.0,
"completion_length": 2261.1597442626953,
"dapo/avg_reward_std": 0.2829060518741608,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.413333340883255,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.12914285714285714,
"grad_norm": 0.01245199330151081,
"kl": 0.0002949833869934082,
"learning_rate": 5.342952264838747e-07,
"loss": 0.0273,
"reward": 0.7544166818261147,
"reward_std": 0.9633913785219193,
"step": 113
},
{
"clip_fraction": 0.0,
"completion_length": 2030.1041946411133,
"dapo/avg_reward_std": 0.2660287490912846,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32738095788019045,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 45.535714285714285,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13028571428571428,
"grad_norm": 0.011100664734840393,
"kl": 0.00016885995864868164,
"learning_rate": 5.264488196906752e-07,
"loss": 0.0649,
"reward": 0.5986752398312092,
"reward_std": 0.9739916548132896,
"step": 114
},
{
"clip_fraction": 0.0,
"completion_length": 2791.465301513672,
"dapo/avg_reward_std": 0.297807412147522,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44000000655651095,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 42.08333333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13142857142857142,
"grad_norm": 0.011278674006462097,
"kl": 0.0002925395965576172,
"learning_rate": 5.186095868151436e-07,
"loss": 0.0586,
"reward": 0.6219565980136395,
"reward_std": 0.9591977074742317,
"step": 115
},
{
"clip_fraction": 0.0,
"completion_length": 2804.6493606567383,
"dapo/avg_reward_std": 0.35703572371731634,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42028986371081806,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 43.12499999999999,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13257142857142856,
"grad_norm": 0.01122019812464714,
"kl": 0.00034046173095703125,
"learning_rate": 5.107799157635538e-07,
"loss": 0.0233,
"reward": 0.469740716740489,
"reward_std": 0.9214994236826897,
"step": 116
},
{
"clip_fraction": 0.0,
"completion_length": 2037.885456085205,
"dapo/avg_reward_std": 0.30805256009101867,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4666666799783707,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 44.27083333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1337142857142857,
"grad_norm": 0.014056873507797718,
"kl": 0.0002486705780029297,
"learning_rate": 5.02962191529556e-07,
"loss": 0.038,
"reward": 0.9076524265110493,
"reward_std": 0.9655390456318855,
"step": 117
},
{
"clip_fraction": 0.0,
"completion_length": 2517.215316772461,
"dapo/avg_reward_std": 0.23199922059263503,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3571428635290691,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 45.535714285714285,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13485714285714287,
"grad_norm": 0.011827170848846436,
"kl": 0.00034999847412109375,
"learning_rate": 4.951587954676837e-07,
"loss": 0.023,
"reward": 0.5725362580269575,
"reward_std": 0.9489376917481422,
"step": 118
},
{
"clip_fraction": 0.0,
"completion_length": 2309.763916015625,
"dapo/avg_reward_std": 0.33521059803340747,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48550726084605506,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 44.166666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.136,
"grad_norm": 0.014920210465788841,
"kl": 0.0003477334976196289,
"learning_rate": 4.873721045679706e-07,
"loss": 0.0967,
"reward": 0.7152486853301525,
"reward_std": 0.9450967088341713,
"step": 119
},
{
"clip_fraction": 0.0,
"completion_length": 2588.423629760742,
"dapo/avg_reward_std": 0.322092001636823,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4236111156642437,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 40.62499999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13714285714285715,
"grad_norm": 0.009259553626179695,
"kl": 0.0002923011779785156,
"learning_rate": 4.79604490731896e-07,
"loss": 0.0204,
"reward": 0.5492150112986565,
"reward_std": 0.9336576908826828,
"step": 120
},
{
"clip_fraction": 0.0,
"completion_length": 2224.4583282470703,
"dapo/avg_reward_std": 0.2846992796375638,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4603174633923031,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1382857142857143,
"grad_norm": 0.012681272812187672,
"kl": 0.0002828836441040039,
"learning_rate": 4.7185832004988133e-07,
"loss": 0.084,
"reward": 0.8260641098022461,
"reward_std": 0.9569381102919579,
"step": 121
},
{
"clip_fraction": 0.0,
"completion_length": 2042.4618453979492,
"dapo/avg_reward_std": 0.21980997684754824,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2500000039213582,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 32.916666666666664,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.13942857142857143,
"grad_norm": 0.014447196386754513,
"kl": 0.0002307891845703125,
"learning_rate": 4.641359520805548e-07,
"loss": 0.0797,
"reward": 0.5401283344253898,
"reward_std": 0.8589324243366718,
"step": 122
},
{
"clip_fraction": 0.0,
"completion_length": 1821.270851135254,
"dapo/avg_reward_std": 0.30661167701085407,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4652777910232544,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 51.979166666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14057142857142857,
"grad_norm": 0.012464089319109917,
"kl": 0.00021943449974060059,
"learning_rate": 4.5643973913200837e-07,
"loss": 0.0524,
"reward": 0.7340994998812675,
"reward_std": 0.948041707277298,
"step": 123
},
{
"clip_fraction": 0.0,
"completion_length": 1917.8576431274414,
"dapo/avg_reward_std": 0.26710175829274313,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3452381023338863,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 35.83333333333333,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1417142857142857,
"grad_norm": 0.01295761950314045,
"kl": 0.00027441978454589844,
"learning_rate": 4.4877202554526084e-07,
"loss": 0.0395,
"reward": 0.44990649446845055,
"reward_std": 0.9298848733305931,
"step": 124
},
{
"clip_fraction": 0.0,
"completion_length": 2151.381965637207,
"dapo/avg_reward_std": 0.2770674500776374,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4347826164701711,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 55.729166666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14285714285714285,
"grad_norm": 0.011758905835449696,
"kl": 0.00027173757553100586,
"learning_rate": 4.4113514698014953e-07,
"loss": -0.0284,
"reward": 0.5582777298986912,
"reward_std": 0.9428363367915154,
"step": 125
},
{
"clip_fraction": 0.0,
"completion_length": 1810.0104522705078,
"dapo/avg_reward_std": 0.21576001878940698,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2777777860562007,
"dapo/num_sampling_attempts": 4.125,
"dapo/sampling_efficiency": 36.45833333333333,
"dapo/total_prompts_processed": 24.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.144,
"grad_norm": 0.011465526185929775,
"kl": 0.00021535158157348633,
"learning_rate": 4.3353142970386557e-07,
"loss": -0.0108,
"reward": 0.6622855560854077,
"reward_std": 0.9075748100876808,
"step": 126
},
{
"clip_fraction": 0.0,
"completion_length": 2243.732635498047,
"dapo/avg_reward_std": 0.2920382275031163,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3782051377571546,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 32.291666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14514285714285713,
"grad_norm": 0.01050955057144165,
"kl": 0.00027239322662353516,
"learning_rate": 4.2596318988235037e-07,
"loss": 0.0464,
"reward": 0.533456489443779,
"reward_std": 0.9191579967737198,
"step": 127
},
{
"clip_fraction": 0.0,
"completion_length": 2544.6875,
"dapo/avg_reward_std": 0.27494730835869197,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46031746906893595,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 46.24999999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1462857142857143,
"grad_norm": 0.017111552879214287,
"kl": 0.0003796815872192383,
"learning_rate": 4.1843273287476854e-07,
"loss": 0.0784,
"reward": 0.7016365043818951,
"reward_std": 0.986565351486206,
"step": 128
},
{
"clip_fraction": 0.0,
"completion_length": 2367.3646240234375,
"dapo/avg_reward_std": 0.23454804884062874,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34567901988824207,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 47.222222222222214,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14742857142857144,
"grad_norm": 0.009941876865923405,
"kl": 0.00034287571907043457,
"learning_rate": 4.1094235253127374e-07,
"loss": 0.0061,
"reward": 0.7930427435785532,
"reward_std": 0.9500019550323486,
"step": 129
},
{
"clip_fraction": 0.0,
"completion_length": 2704.125045776367,
"dapo/avg_reward_std": 0.33850957382292973,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5158730284089134,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 47.291666666666664,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14857142857142858,
"grad_norm": 0.012839604169130325,
"kl": 0.0004292726516723633,
"learning_rate": 4.034943304942796e-07,
"loss": 0.0353,
"reward": 0.6285950914025307,
"reward_std": 0.9615181535482407,
"step": 130
},
{
"clip_fraction": 0.0,
"completion_length": 2475.5104370117188,
"dapo/avg_reward_std": 0.30972740189595654,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4696969762444496,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 52.82738095238095,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.14971428571428572,
"grad_norm": 0.00980184692889452,
"kl": 0.00036525726318359375,
"learning_rate": 3.9609093550344907e-07,
"loss": -0.0176,
"reward": 0.7969067245721817,
"reward_std": 0.9501392021775246,
"step": 131
},
{
"clip_fraction": 0.0,
"completion_length": 2550.0034790039062,
"dapo/avg_reward_std": 0.2723326214722225,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.30952381661960054,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 29.999999999999993,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15085714285714286,
"grad_norm": 0.017491888254880905,
"kl": 0.00042629241943359375,
"learning_rate": 3.8873442270461485e-07,
"loss": 0.0772,
"reward": 0.6202478259801865,
"reward_std": 0.9556004330515862,
"step": 132
},
{
"clip_fraction": 0.0,
"completion_length": 2056.5000534057617,
"dapo/avg_reward_std": 0.3334644228219986,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5333333447575569,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 47.916666666666664,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.152,
"grad_norm": 0.012553170323371887,
"kl": 0.0004407167434692383,
"learning_rate": 3.8142703296283953e-07,
"loss": -0.0185,
"reward": 0.7429189011454582,
"reward_std": 1.0187850967049599,
"step": 133
},
{
"clip_fraction": 0.0,
"completion_length": 2446.9270935058594,
"dapo/avg_reward_std": 0.28044995562783603,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.32758621254871634,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 32.41071428571428,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15314285714285714,
"grad_norm": 0.010931877419352531,
"kl": 0.00035947561264038086,
"learning_rate": 3.7417099217982686e-07,
"loss": 0.0372,
"reward": 0.6385626457631588,
"reward_std": 0.9372833296656609,
"step": 134
},
{
"clip_fraction": 0.0,
"completion_length": 2216.4479370117188,
"dapo/avg_reward_std": 0.3021530819435914,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40972223070760566,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 47.291666666666664,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15428571428571428,
"grad_norm": 0.014175104908645153,
"kl": 0.00040656328201293945,
"learning_rate": 3.6696851061588994e-07,
"loss": 0.0637,
"reward": 0.6612157337367535,
"reward_std": 0.9335344135761261,
"step": 135
},
{
"clip_fraction": 0.0,
"completion_length": 1879.3889083862305,
"dapo/avg_reward_std": 0.24328701660550875,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36206897219707224,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 31.77083333333333,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15542857142857142,
"grad_norm": 0.011906570754945278,
"kl": 0.0002865791320800781,
"learning_rate": 3.5982178221668533e-07,
"loss": 0.0254,
"reward": 0.621966740116477,
"reward_std": 0.9788949191570282,
"step": 136
},
{
"clip_fraction": 0.0,
"completion_length": 2137.1180725097656,
"dapo/avg_reward_std": 0.19872227481433324,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2857142903975078,
"dapo/num_sampling_attempts": 4.375,
"dapo/sampling_efficiency": 26.180555555555557,
"dapo/total_prompts_processed": 26.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15657142857142858,
"grad_norm": 0.011921432800590992,
"kl": 0.000335007905960083,
"learning_rate": 3.5273298394491515e-07,
"loss": 0.0425,
"reward": 0.8858193010091782,
"reward_std": 0.960886999964714,
"step": 137
},
{
"clip_fraction": 0.0,
"completion_length": 2133.6875381469727,
"dapo/avg_reward_std": 0.30985672700972783,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5079365216550373,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 50.41666666666666,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15771428571428572,
"grad_norm": 0.011959100142121315,
"kl": 0.00031131505966186523,
"learning_rate": 3.45704275117204e-07,
"loss": 0.0473,
"reward": 0.8114638328552246,
"reward_std": 0.9208285436034203,
"step": 138
},
{
"clip_fraction": 0.0,
"completion_length": 1887.6423797607422,
"dapo/avg_reward_std": 0.30113077312707903,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45833334401249887,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 47.916666666666664,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.15885714285714286,
"grad_norm": 0.01248843315988779,
"kl": 0.0003123283386230469,
"learning_rate": 3.387377967463493e-07,
"loss": 0.0133,
"reward": 0.4802711680531502,
"reward_std": 0.9749159812927246,
"step": 139
},
{
"clip_fraction": 0.0,
"completion_length": 1555.0173835754395,
"dapo/avg_reward_std": 0.2354262595375379,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3750000074505806,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 58.05555555555555,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16,
"grad_norm": 0.01651233807206154,
"kl": 0.0003075599670410156,
"learning_rate": 3.3183567088914833e-07,
"loss": -0.0302,
"reward": 0.8893436007201672,
"reward_std": 0.9632327631115913,
"step": 140
},
{
"clip_fraction": 0.0,
"completion_length": 2886.878517150879,
"dapo/avg_reward_std": 0.2881770460378556,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48412698933056425,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 53.12499999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16114285714285714,
"grad_norm": 0.010870919562876225,
"kl": 0.0004533529281616211,
"learning_rate": 3.250000000000001e-07,
"loss": 0.0545,
"reward": 0.612054293975234,
"reward_std": 0.9482586532831192,
"step": 141
},
{
"clip_fraction": 0.0,
"completion_length": 1937.1458740234375,
"dapo/avg_reward_std": 0.3629945723906807,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.47826088057911914,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 47.39583333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16228571428571428,
"grad_norm": 0.011180016212165356,
"kl": 0.00029921531677246094,
"learning_rate": 3.182328662904756e-07,
"loss": -0.0113,
"reward": 0.6175431702286005,
"reward_std": 0.9589766189455986,
"step": 142
},
{
"clip_fraction": 0.0,
"completion_length": 2465.3159942626953,
"dapo/avg_reward_std": 0.3803708272821763,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5588235381771537,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 60.41666666666666,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16342857142857142,
"grad_norm": 0.012431374751031399,
"kl": 0.00048232078552246094,
"learning_rate": 3.115363310950578e-07,
"loss": 0.0679,
"reward": 0.7579541122540832,
"reward_std": 0.9723308756947517,
"step": 143
},
{
"clip_fraction": 0.0,
"completion_length": 2344.1180419921875,
"dapo/avg_reward_std": 0.21175828889796608,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.26315790179528686,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 25.347222222222218,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16457142857142856,
"grad_norm": 0.010860033333301544,
"kl": 0.0004448890686035156,
"learning_rate": 3.0491243424323783e-07,
"loss": -0.0005,
"reward": 0.5643926626071334,
"reward_std": 0.9328553825616837,
"step": 144
},
{
"clip_fraction": 0.0,
"completion_length": 2482.1389389038086,
"dapo/avg_reward_std": 0.37865253537893295,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.6145833432674408,
"dapo/num_sampling_attempts": 2.0,
"dapo/sampling_efficiency": 58.33333333333333,
"dapo/total_prompts_processed": 12.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1657142857142857,
"grad_norm": 0.011065399274230003,
"kl": 0.0004658699035644531,
"learning_rate": 2.9836319343816397e-07,
"loss": 0.0412,
"reward": 0.8742740526795387,
"reward_std": 0.9688765779137611,
"step": 145
},
{
"clip_fraction": 0.0,
"completion_length": 2297.6806030273438,
"dapo/avg_reward_std": 0.40159281912971945,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.627450992955881,
"dapo/num_sampling_attempts": 2.125,
"dapo/sampling_efficiency": 56.24999999999999,
"dapo/total_prompts_processed": 12.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16685714285714287,
"grad_norm": 0.014852684922516346,
"kl": 0.00038546323776245117,
"learning_rate": 2.918906036420294e-07,
"loss": 0.1043,
"reward": 0.7259054481983185,
"reward_std": 0.9452414810657501,
"step": 146
},
{
"clip_fraction": 0.0,
"completion_length": 2485.2639389038086,
"dapo/avg_reward_std": 0.2594580222731051,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.42028986435869464,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 43.125,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.168,
"grad_norm": 0.011770485900342464,
"kl": 0.00037994980812072754,
"learning_rate": 2.854966364683872e-07,
"loss": 0.0414,
"reward": 0.596230074763298,
"reward_std": 0.944911852478981,
"step": 147
},
{
"clip_fraction": 0.0,
"completion_length": 2030.6180877685547,
"dapo/avg_reward_std": 0.28245899453759193,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.43055556528270245,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 47.70833333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.16914285714285715,
"grad_norm": 0.010600890032947063,
"kl": 0.0003261566162109375,
"learning_rate": 2.791832395815782e-07,
"loss": 0.018,
"reward": 0.5254655107855797,
"reward_std": 0.9357841089367867,
"step": 148
},
{
"clip_fraction": 0.0,
"completion_length": 2956.184051513672,
"dapo/avg_reward_std": 0.3112214480837186,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44444445334374905,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 42.08333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1702857142857143,
"grad_norm": 0.010259653441607952,
"kl": 0.00048613548278808594,
"learning_rate": 2.729523361034538e-07,
"loss": 0.0339,
"reward": 0.6315554305911064,
"reward_std": 0.9876029044389725,
"step": 149
},
{
"clip_fraction": 0.0,
"completion_length": 2855.0520629882812,
"dapo/avg_reward_std": 0.32461989257070756,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5370370447635651,
"dapo/num_sampling_attempts": 2.25,
"dapo/sampling_efficiency": 55.20833333333333,
"dapo/total_prompts_processed": 13.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17142857142857143,
"grad_norm": 0.011087276972830296,
"kl": 0.0005285739898681641,
"learning_rate": 2.6680582402757324e-07,
"loss": 0.054,
"reward": 0.80087810754776,
"reward_std": 1.0038108006119728,
"step": 150
},
{
"clip_fraction": 0.0,
"completion_length": 2653.5834197998047,
"dapo/avg_reward_std": 0.24287073779851198,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36979167396202683,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 30.327380952380953,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17257142857142857,
"grad_norm": 0.011102661490440369,
"kl": 0.0005296468734741211,
"learning_rate": 2.6074557564105724e-07,
"loss": 0.0527,
"reward": 0.7124785147607327,
"reward_std": 0.9657682925462723,
"step": 151
},
{
"clip_fraction": 0.0,
"completion_length": 2141.173614501953,
"dapo/avg_reward_std": 0.25965497308763963,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333396706088,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 36.875,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1737142857142857,
"grad_norm": 0.01081050094217062,
"kl": 0.00039577484130859375,
"learning_rate": 2.547734369542718e-07,
"loss": 0.0232,
"reward": 0.5607589241117239,
"reward_std": 0.9106607139110565,
"step": 152
},
{
"clip_fraction": 0.0,
"completion_length": 2218.2882347106934,
"dapo/avg_reward_std": 0.24554675072431564,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.39583333767950535,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 48.75,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17485714285714285,
"grad_norm": 0.01474699191749096,
"kl": 0.000436246395111084,
"learning_rate": 2.488912271385139e-07,
"loss": 0.0585,
"reward": 0.4214355852454901,
"reward_std": 0.9400415197014809,
"step": 153
},
{
"clip_fraction": 0.0,
"completion_length": 2466.3368377685547,
"dapo/avg_reward_std": 0.3308070342649113,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46212121776559134,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 38.541666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.176,
"grad_norm": 0.011210402473807335,
"kl": 0.0004417896270751953,
"learning_rate": 2.4310073797187573e-07,
"loss": -0.0244,
"reward": 0.7323229797184467,
"reward_std": 0.9493635967373848,
"step": 154
},
{
"clip_fraction": 0.0,
"completion_length": 2012.8715438842773,
"dapo/avg_reward_std": 0.2809670078754425,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4133333426713943,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 35.83333333333333,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17714285714285713,
"grad_norm": 0.01654733158648014,
"kl": 0.00036275386810302734,
"learning_rate": 2.374037332934512e-07,
"loss": 0.0589,
"reward": 0.6634213328361511,
"reward_std": 0.8785304054617882,
"step": 155
},
{
"clip_fraction": 0.0,
"completion_length": 2291.3021240234375,
"dapo/avg_reward_std": 0.3599580733672432,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.44927537182103033,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 45.53571428571428,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1782857142857143,
"grad_norm": 0.011936171911656857,
"kl": 0.00043827295303344727,
"learning_rate": 2.3180194846605364e-07,
"loss": 0.0699,
"reward": 0.8599490560591221,
"reward_std": 0.9719394743442535,
"step": 156
},
{
"clip_fraction": 0.0,
"completion_length": 2499.791702270508,
"dapo/avg_reward_std": 0.3457585884766145,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5378787998448719,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.17942857142857144,
"grad_norm": 0.01289551891386509,
"kl": 0.00048601627349853516,
"learning_rate": 2.2629708984760706e-07,
"loss": 0.0584,
"reward": 0.6511420179158449,
"reward_std": 0.9461185112595558,
"step": 157
},
{
"clip_fraction": 0.0,
"completion_length": 2437.9375228881836,
"dapo/avg_reward_std": 0.23957703853475637,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.36781609829129847,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 37.291666666666664,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18057142857142858,
"grad_norm": 0.012769551016390324,
"kl": 0.0004298686981201172,
"learning_rate": 2.2089083427137329e-07,
"loss": 0.0258,
"reward": 0.6606059782207012,
"reward_std": 0.9088018089532852,
"step": 158
},
{
"clip_fraction": 0.0,
"completion_length": 1726.5868225097656,
"dapo/avg_reward_std": 0.3139249332573103,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3913043543048527,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 55.416666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18171428571428572,
"grad_norm": 0.013688490726053715,
"kl": 0.00027683377265930176,
"learning_rate": 2.1558482853517253e-07,
"loss": 0.0506,
"reward": 0.7147123599424958,
"reward_std": 0.9531080722808838,
"step": 159
},
{
"clip_fraction": 0.0,
"completion_length": 1593.003475189209,
"dapo/avg_reward_std": 0.2799004193010001,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.33908046782016754,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 45.3125,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18285714285714286,
"grad_norm": 0.020229365676641464,
"kl": 0.00033217668533325195,
"learning_rate": 2.1038068889975259e-07,
"loss": 0.0296,
"reward": 0.7677402682602406,
"reward_std": 0.9385703578591347,
"step": 160
},
{
"clip_fraction": 0.0,
"completion_length": 1877.9444274902344,
"dapo/avg_reward_std": 0.36716995636622113,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4930555621782939,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 44.49404761904761,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.184,
"grad_norm": 0.012556586414575577,
"kl": 0.00037413835525512695,
"learning_rate": 2.0528000059645995e-07,
"loss": 0.0401,
"reward": 0.6385876163840294,
"reward_std": 0.9741755276918411,
"step": 161
},
{
"clip_fraction": 0.0,
"completion_length": 2543.1145782470703,
"dapo/avg_reward_std": 0.20304026060244618,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.28921569226419225,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 26.249999999999996,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18514285714285714,
"grad_norm": 0.010984732769429684,
"kl": 0.0005058050155639648,
"learning_rate": 2.0028431734436308e-07,
"loss": 0.0214,
"reward": 0.8138710260391235,
"reward_std": 0.937220610678196,
"step": 162
},
{
"clip_fraction": 0.0,
"completion_length": 2579.7916946411133,
"dapo/avg_reward_std": 0.2669851701049244,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3333333397612852,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 31.38888888888889,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18628571428571428,
"grad_norm": 0.01393849402666092,
"kl": 0.0005407929420471191,
"learning_rate": 1.9539516087697517e-07,
"loss": 0.0557,
"reward": 0.6086596520617604,
"reward_std": 0.9360831007361412,
"step": 163
},
{
"clip_fraction": 0.0,
"completion_length": 2303.781295776367,
"dapo/avg_reward_std": 0.2889538109302521,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40384616129673445,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18742857142857142,
"grad_norm": 0.012467012740671635,
"kl": 0.0005753040313720703,
"learning_rate": 1.9061402047871833e-07,
"loss": 0.0286,
"reward": 0.7579413987696171,
"reward_std": 0.966604009270668,
"step": 164
},
{
"clip_fraction": 0.0,
"completion_length": 2215.8715744018555,
"dapo/avg_reward_std": 0.2284111071910177,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3630952446588448,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 49.72222222222222,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18857142857142858,
"grad_norm": 0.013376005925238132,
"kl": 0.00038570165634155273,
"learning_rate": 1.8594235253127372e-07,
"loss": 0.0737,
"reward": 0.6369971446692944,
"reward_std": 0.944696456193924,
"step": 165
},
{
"clip_fraction": 0.0,
"completion_length": 2194.999984741211,
"dapo/avg_reward_std": 0.35230770577555115,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5289855158847311,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 40.416666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.18971428571428572,
"grad_norm": 0.00896221399307251,
"kl": 0.0004324018955230713,
"learning_rate": 1.8138158006995363e-07,
"loss": -0.0087,
"reward": 0.770520705729723,
"reward_std": 0.9258415997028351,
"step": 166
},
{
"clip_fraction": 0.0,
"completion_length": 2363.9861373901367,
"dapo/avg_reward_std": 0.23058613193662544,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2763157930029066,
"dapo/num_sampling_attempts": 4.75,
"dapo/sampling_efficiency": 30.44642857142857,
"dapo/total_prompts_processed": 28.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19085714285714286,
"grad_norm": 0.011913989670574665,
"kl": 0.0005799531936645508,
"learning_rate": 1.7693309235023127e-07,
"loss": 0.0282,
"reward": 0.8937316909432411,
"reward_std": 0.9134809225797653,
"step": 167
},
{
"clip_fraction": 0.0,
"completion_length": 1846.3229217529297,
"dapo/avg_reward_std": 0.2788652099412063,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.37931035459041595,
"dapo/num_sampling_attempts": 3.625,
"dapo/sampling_efficiency": 33.03571428571428,
"dapo/total_prompts_processed": 21.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.192,
"grad_norm": 0.013345438055694103,
"kl": 0.00038933753967285156,
"learning_rate": 1.7259824442455923e-07,
"loss": 0.0657,
"reward": 0.5173812105786055,
"reward_std": 0.9046202600002289,
"step": 168
},
{
"clip_fraction": 0.0,
"completion_length": 1632.9965515136719,
"dapo/avg_reward_std": 0.33004767837978544,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.49206350318023134,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 51.45833333333333,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19314285714285714,
"grad_norm": 0.016018711030483246,
"kl": 0.0004235506057739258,
"learning_rate": 1.6837835672960831e-07,
"loss": -0.0266,
"reward": 0.7293304707854986,
"reward_std": 0.9580913484096527,
"step": 169
},
{
"clip_fraction": 0.0,
"completion_length": 2218.357666015625,
"dapo/avg_reward_std": 0.30882045084779913,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4242424287579276,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 49.37499999999999,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19428571428571428,
"grad_norm": 0.012691031210124493,
"kl": 0.0005915164947509766,
"learning_rate": 1.6427471468404952e-07,
"loss": 0.0375,
"reward": 0.731636168435216,
"reward_std": 0.9506037011742592,
"step": 170
},
{
"clip_fraction": 0.0,
"completion_length": 2086.989585876465,
"dapo/avg_reward_std": 0.26685478786627453,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.372222230831782,
"dapo/num_sampling_attempts": 3.75,
"dapo/sampling_efficiency": 36.45833333333333,
"dapo/total_prompts_processed": 22.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19542857142857142,
"grad_norm": 0.0107533298432827,
"kl": 0.00045359134674072266,
"learning_rate": 1.6028856829700258e-07,
"loss": 0.0268,
"reward": 0.6401270348578691,
"reward_std": 0.9421326443552971,
"step": 171
},
{
"clip_fraction": 0.0,
"completion_length": 1523.298625946045,
"dapo/avg_reward_std": 0.2958875367274651,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4294871888481654,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 41.041666666666664,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19657142857142856,
"grad_norm": 0.02487981878221035,
"kl": 0.00044208765029907227,
"learning_rate": 1.5642113178727193e-07,
"loss": 0.0215,
"reward": 0.5742892920970917,
"reward_std": 0.9192508533596992,
"step": 172
},
{
"clip_fraction": 0.0,
"completion_length": 2197.4722290039062,
"dapo/avg_reward_std": 0.33716599914160644,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4545454619960351,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.20833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.1977142857142857,
"grad_norm": 0.00999497715383768,
"kl": 0.0006158351898193359,
"learning_rate": 1.5267358321348285e-07,
"loss": -0.0198,
"reward": 0.6909432113170624,
"reward_std": 0.9331774786114693,
"step": 173
},
{
"clip_fraction": 0.0,
"completion_length": 2469.1909942626953,
"dapo/avg_reward_std": 0.31674497947096825,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4722222325702508,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 47.61904761904762,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.19885714285714284,
"grad_norm": 0.027324816212058067,
"kl": 0.0005202293395996094,
"learning_rate": 1.4904706411523448e-07,
"loss": 0.1381,
"reward": 0.7919853329658508,
"reward_std": 0.9734821692109108,
"step": 174
},
{
"clip_fraction": 0.0,
"completion_length": 2290.7292098999023,
"dapo/avg_reward_std": 0.2796748812709536,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.41071429369705065,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 31.041666666666664,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2,
"grad_norm": 0.011332061141729355,
"kl": 0.000499039888381958,
"learning_rate": 1.4554267916537495e-07,
"loss": 0.0026,
"reward": 0.5971913021057844,
"reward_std": 0.9767839089035988,
"step": 175
},
{
"clip_fraction": 0.0,
"completion_length": 2643.475685119629,
"dapo/avg_reward_std": 0.30459834399976227,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4649122922044051,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 54.58333333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20114285714285715,
"grad_norm": 0.011058920994400978,
"kl": 0.0006421804428100586,
"learning_rate": 1.4216149583350755e-07,
"loss": 0.0243,
"reward": 0.801079198718071,
"reward_std": 1.0328236892819405,
"step": 176
},
{
"clip_fraction": 0.0,
"completion_length": 2657.517364501953,
"dapo/avg_reward_std": 0.268055671826005,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3072916711680591,
"dapo/num_sampling_attempts": 4.0,
"dapo/sampling_efficiency": 30.32738095238095,
"dapo/total_prompts_processed": 24.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2022857142857143,
"grad_norm": 0.012514113448560238,
"kl": 0.0006227493286132812,
"learning_rate": 1.3890454406082956e-07,
"loss": 0.066,
"reward": 0.5342087037861347,
"reward_std": 0.9403787776827812,
"step": 177
},
{
"clip_fraction": 0.0,
"completion_length": 1730.2395935058594,
"dapo/avg_reward_std": 0.22906314557598484,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.354838716406976,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 49.99999999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20342857142857143,
"grad_norm": 0.013909725472331047,
"kl": 0.0004641413688659668,
"learning_rate": 1.3577281594640182e-07,
"loss": -0.0032,
"reward": 0.817855941131711,
"reward_std": 0.9715805351734161,
"step": 178
},
{
"clip_fraction": 0.0,
"completion_length": 1916.9652633666992,
"dapo/avg_reward_std": 0.33905652307328726,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5000000085149493,
"dapo/num_sampling_attempts": 2.625,
"dapo/sampling_efficiency": 49.99999999999999,
"dapo/total_prompts_processed": 15.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20457142857142857,
"grad_norm": 0.010170280002057552,
"kl": 0.00033092498779296875,
"learning_rate": 1.3276726544494571e-07,
"loss": 0.0153,
"reward": 0.6332587338984013,
"reward_std": 0.9844094663858414,
"step": 179
},
{
"clip_fraction": 0.0,
"completion_length": 2013.7534942626953,
"dapo/avg_reward_std": 0.4115603660282336,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5175438719360452,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 48.95833333333333,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2057142857142857,
"grad_norm": 0.010059732012450695,
"kl": 0.0004872828722000122,
"learning_rate": 1.2988880807625927e-07,
"loss": 0.012,
"reward": 0.7964395936578512,
"reward_std": 0.9064052030444145,
"step": 180
},
{
"clip_fraction": 0.0,
"completion_length": 2538.3159713745117,
"dapo/avg_reward_std": 0.3185795678032769,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3703703775450035,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.0,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20685714285714285,
"grad_norm": 0.009190794080495834,
"kl": 0.0005941390991210938,
"learning_rate": 1.2713832064634125e-07,
"loss": -0.0091,
"reward": 0.6052752519026399,
"reward_std": 0.9398948326706886,
"step": 181
},
{
"clip_fraction": 0.0,
"completion_length": 1992.0277557373047,
"dapo/avg_reward_std": 0.30058977752923965,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3392857238650322,
"dapo/num_sampling_attempts": 3.5,
"dapo/sampling_efficiency": 45.32738095238095,
"dapo/total_prompts_processed": 21.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.208,
"grad_norm": 0.017918387427926064,
"kl": 0.00043332576751708984,
"learning_rate": 1.2451664098030743e-07,
"loss": 0.0782,
"reward": 0.7308525424450636,
"reward_std": 0.8988610878586769,
"step": 182
},
{
"clip_fraction": 0.0,
"completion_length": 2368.312515258789,
"dapo/avg_reward_std": 0.2227620858213176,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.40579710317694623,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 48.33333333333333,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.20914285714285713,
"grad_norm": 0.01093615498393774,
"kl": 0.0005226731300354004,
"learning_rate": 1.220245676671809e-07,
"loss": -0.0097,
"reward": 0.6296821031719446,
"reward_std": 0.9496165588498116,
"step": 183
},
{
"clip_fraction": 0.0,
"completion_length": 1855.0486297607422,
"dapo/avg_reward_std": 0.3308859848976135,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4133333384990692,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.2102857142857143,
"grad_norm": 0.013805963099002838,
"kl": 0.0004195570945739746,
"learning_rate": 1.1966285981663407e-07,
"loss": 0.0542,
"reward": 0.8230033777654171,
"reward_std": 0.9269852489233017,
"step": 184
},
{
"clip_fraction": 0.0,
"completion_length": 2737.260452270508,
"dapo/avg_reward_std": 0.3074522775908311,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.45138889489074546,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 49.37499999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21142857142857144,
"grad_norm": 0.01179632730782032,
"kl": 0.0006718635559082031,
"learning_rate": 1.1743223682775649e-07,
"loss": 0.0529,
"reward": 0.6228375509381294,
"reward_std": 0.9775977432727814,
"step": 185
},
{
"clip_fraction": 0.0,
"completion_length": 2526.899368286133,
"dapo/avg_reward_std": 0.2964219942688942,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.48333334401249883,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 58.33333333333333,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21257142857142858,
"grad_norm": 0.014796112664043903,
"kl": 0.0005816221237182617,
"learning_rate": 1.1533337816991931e-07,
"loss": 0.088,
"reward": 0.8448536917567253,
"reward_std": 0.9608767181634903,
"step": 186
},
{
"clip_fraction": 0.0,
"completion_length": 2288.274345397949,
"dapo/avg_reward_std": 0.3166468055159957,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.34567901823255753,
"dapo/num_sampling_attempts": 3.375,
"dapo/sampling_efficiency": 40.972222222222214,
"dapo/total_prompts_processed": 20.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21371428571428572,
"grad_norm": 0.011898735538125038,
"kl": 0.000521540641784668,
"learning_rate": 1.1336692317580158e-07,
"loss": 0.0415,
"reward": 0.7687236070632935,
"reward_std": 0.9334599822759628,
"step": 187
},
{
"clip_fraction": 0.0,
"completion_length": 2432.531265258789,
"dapo/avg_reward_std": 0.28751447051763535,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4513888979951541,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 53.33333333333333,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21485714285714286,
"grad_norm": 0.010497819632291794,
"kl": 0.0007112026214599609,
"learning_rate": 1.1153347084664419e-07,
"loss": 0.0185,
"reward": 0.7899295631796122,
"reward_std": 0.9512373134493828,
"step": 188
},
{
"clip_fraction": 0.0,
"completion_length": 1948.9167022705078,
"dapo/avg_reward_std": 0.30568089832862216,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46527779040237266,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 36.87499999999999,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.216,
"grad_norm": 0.013562222942709923,
"kl": 0.0006091594696044922,
"learning_rate": 1.0983357966978745e-07,
"loss": 0.0388,
"reward": 0.6485428418964148,
"reward_std": 0.9110815972089767,
"step": 189
},
{
"clip_fraction": 0.0,
"completion_length": 2494.395866394043,
"dapo/avg_reward_std": 0.27111421525478363,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3863636404275894,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 45.20833333333333,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21714285714285714,
"grad_norm": 0.00931188277900219,
"kl": 0.0006044209003448486,
"learning_rate": 1.0826776744855121e-07,
"loss": 0.0024,
"reward": 0.5944220442324877,
"reward_std": 0.9433802142739296,
"step": 190
},
{
"clip_fraction": 0.0,
"completion_length": 2601.7569427490234,
"dapo/avg_reward_std": 0.3233232215046883,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.49166667386889457,
"dapo/num_sampling_attempts": 2.5,
"dapo/sampling_efficiency": 49.375,
"dapo/total_prompts_processed": 15.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21828571428571428,
"grad_norm": 0.011869938112795353,
"kl": 0.0006383061408996582,
"learning_rate": 1.068365111445064e-07,
"loss": 0.0221,
"reward": 0.5644997656345367,
"reward_std": 0.9473884925246239,
"step": 191
},
{
"clip_fraction": 0.0,
"completion_length": 1624.8541564941406,
"dapo/avg_reward_std": 0.33193936944007874,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.46969697692177514,
"dapo/num_sampling_attempts": 2.75,
"dapo/sampling_efficiency": 44.791666666666664,
"dapo/total_prompts_processed": 16.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.21942857142857142,
"grad_norm": 0.011828861199319363,
"kl": 0.0003381967544555664,
"learning_rate": 1.0554024673218806e-07,
"loss": -0.0125,
"reward": 0.7034952798858285,
"reward_std": 0.9275326952338219,
"step": 192
},
{
"clip_fraction": 0.0,
"completion_length": 2333.607650756836,
"dapo/avg_reward_std": 0.4260722654206412,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.6309523891125407,
"dapo/num_sampling_attempts": 1.75,
"dapo/sampling_efficiency": 70.83333333333333,
"dapo/total_prompts_processed": 10.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22057142857142858,
"grad_norm": 0.010871903039515018,
"kl": 0.0005550980567932129,
"learning_rate": 1.0437936906629334e-07,
"loss": -0.004,
"reward": 0.4316184278577566,
"reward_std": 0.9555172920227051,
"step": 193
},
{
"clip_fraction": 0.0,
"completion_length": 2939.9097442626953,
"dapo/avg_reward_std": 0.2783619257119986,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.3846153932122084,
"dapo/num_sampling_attempts": 3.25,
"dapo/sampling_efficiency": 47.39583333333333,
"dapo/total_prompts_processed": 19.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22171428571428572,
"grad_norm": 0.014206220395863056,
"kl": 0.0007078647613525391,
"learning_rate": 1.0335423176140511e-07,
"loss": 0.0805,
"reward": 0.7283875979483128,
"reward_std": 0.9719515442848206,
"step": 194
},
{
"clip_fraction": 0.0,
"completion_length": 1945.9653244018555,
"dapo/avg_reward_std": 0.3208765654187453,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.5438596551355562,
"dapo/num_sampling_attempts": 2.375,
"dapo/sampling_efficiency": 60.416666666666664,
"dapo/total_prompts_processed": 14.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22285714285714286,
"grad_norm": 0.015090257860720158,
"kl": 0.000569462776184082,
"learning_rate": 1.0246514708427701e-07,
"loss": -0.021,
"reward": 0.5579635920003057,
"reward_std": 0.9634370356798172,
"step": 195
},
{
"clip_fraction": 0.0,
"completion_length": 2212.5902709960938,
"dapo/avg_reward_std": 0.23615881362382105,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.2696078498573864,
"dapo/num_sampling_attempts": 4.25,
"dapo/sampling_efficiency": 27.916666666666664,
"dapo/total_prompts_processed": 25.5,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.224,
"grad_norm": 0.012650169432163239,
"kl": 0.0005346536636352539,
"learning_rate": 1.017123858587145e-07,
"loss": 0.0756,
"reward": 0.6994661018252373,
"reward_std": 0.9281085133552551,
"step": 196
},
{
"clip_fraction": 0.0,
"completion_length": 2392.7742919921875,
"dapo/avg_reward_std": 0.3088900530338287,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.406666676402092,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 45.3125,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22514285714285714,
"grad_norm": 0.01346337329596281,
"kl": 0.0006176233291625977,
"learning_rate": 1.0109617738307911e-07,
"loss": 0.0523,
"reward": 0.6644653081893921,
"reward_std": 0.9385305866599083,
"step": 197
},
{
"clip_fraction": 0.0,
"completion_length": 2743.819465637207,
"dapo/avg_reward_std": 0.3153854298591614,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4133333432674408,
"dapo/num_sampling_attempts": 3.125,
"dapo/sampling_efficiency": 43.75,
"dapo/total_prompts_processed": 18.75,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22628571428571428,
"grad_norm": 0.010797293856739998,
"kl": 0.000672459602355957,
"learning_rate": 1.0061670936044178e-07,
"loss": 0.04,
"reward": 0.5658168056979775,
"reward_std": 0.9682240337133408,
"step": 198
},
{
"clip_fraction": 0.0,
"completion_length": 2336.80558013916,
"dapo/avg_reward_std": 0.3246711401835732,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.4855072530715362,
"dapo/num_sampling_attempts": 2.875,
"dapo/sampling_efficiency": 41.666666666666664,
"dapo/total_prompts_processed": 17.25,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22742857142857142,
"grad_norm": 0.011765834875404835,
"kl": 0.00055694580078125,
"learning_rate": 1.002741278414069e-07,
"loss": 0.0308,
"reward": 0.6460054386407137,
"reward_std": 0.9711420610547066,
"step": 199
},
{
"clip_fraction": 0.0,
"completion_length": 2571.1875228881836,
"dapo/avg_reward_std": 0.29997331152359646,
"dapo/filter_reward_index": 0.0,
"dapo/kept_prompts_ratio": 0.486111119389534,
"dapo/num_sampling_attempts": 3.0,
"dapo/sampling_efficiency": 39.285714285714285,
"dapo/total_prompts_processed": 18.0,
"dapo/valid_prompts_collected": 6.0,
"epoch": 0.22857142857142856,
"grad_norm": 0.009876573458313942,
"kl": 0.0005443096160888672,
"learning_rate": 1.0006853717962393e-07,
"loss": 0.0268,
"reward": 0.5957941338419914,
"reward_std": 0.992652915418148,
"step": 200
},
{
"epoch": 0.22857142857142856,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.01698429927288089,
"train_runtime": 137940.7556,
"train_samples_per_second": 0.07,
"train_steps_per_second": 0.001
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}