{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22857142857142856, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_fraction": 0.0, "completion_length": 1681.8854370117188, "dapo/avg_reward_std": 0.3420590679896505, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48245614610220255, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 54.58333333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.001142857142857143, "grad_norm": 0.011931957677006721, "kl": 0.0, "learning_rate": 0.0, "loss": 0.0219, "reward": 0.8671084493398666, "reward_std": 0.964848667383194, "step": 1 }, { "clip_fraction": 0.0, "completion_length": 2172.913185119629, "dapo/avg_reward_std": 0.27327019289920207, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4824561500235608, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 67.41071428571428, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.002285714285714286, "grad_norm": 0.014162006787955761, "kl": 0.0, "learning_rate": 1e-07, "loss": 0.0232, "reward": 0.932205643504858, "reward_std": 0.9607091471552849, "step": 2 }, { "clip_fraction": 0.0, "completion_length": 2418.3611373901367, "dapo/avg_reward_std": 0.3202404692769051, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45833334177732465, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 51.04166666666666, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.0034285714285714284, "grad_norm": 0.011303936131298542, "kl": 0.0001301020383834839, "learning_rate": 2e-07, "loss": 0.0371, "reward": 0.5818949677050114, "reward_std": 0.928392305970192, "step": 3 }, { "clip_fraction": 0.0, "completion_length": 2080.6250228881836, "dapo/avg_reward_std": 0.3523675338788466, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4545454586094076, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.20833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.004571428571428572, "grad_norm": 0.010935964062809944, "kl": 8.246302604675293e-05, "learning_rate": 3e-07, "loss": 0.007, "reward": 0.6902085058391094, "reward_std": 0.9576746746897697, "step": 4 }, { "clip_fraction": 0.0, "completion_length": 2208.1910247802734, "dapo/avg_reward_std": 0.33842799224351583, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4912280746196446, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 54.166666666666664, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.005714285714285714, "grad_norm": 0.01424587145447731, "kl": 0.00011987239122390747, "learning_rate": 4e-07, "loss": 0.0916, "reward": 0.5482002776116133, "reward_std": 0.9192102774977684, "step": 5 }, { "clip_fraction": 0.0, "completion_length": 2428.8646087646484, "dapo/avg_reward_std": 0.2724780907233556, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37222223381201425, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 37.39583333333333, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.006857142857142857, "grad_norm": 0.012209060601890087, "kl": 0.00013336539268493652, "learning_rate": 5e-07, "loss": 0.063, "reward": 0.6304261162877083, "reward_std": 0.947055421769619, "step": 6 }, { "clip_fraction": 0.0, "completion_length": 2028.1111297607422, "dapo/avg_reward_std": 0.35396890342235565, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5151515284722502, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.008, "grad_norm": 0.01456605363637209, "kl": 0.00010842084884643555, "learning_rate": 6e-07, "loss": 0.0863, "reward": 0.7125897314399481, "reward_std": 0.938522607088089, "step": 7 }, { "clip_fraction": 0.0, "completion_length": 1825.9792022705078, "dapo/avg_reward_std": 0.3198123288154602, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45333334505558015, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 36.45833333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.009142857142857144, "grad_norm": 0.014117815531790257, "kl": 8.45193862915039e-05, "learning_rate": 7e-07, "loss": 0.024, "reward": 0.7728112610056996, "reward_std": 0.953309640288353, "step": 8 }, { "clip_fraction": 0.0, "completion_length": 2424.159713745117, "dapo/avg_reward_std": 0.4454919546842575, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.6785714392151151, "dapo/num_sampling_attempts": 1.75, "dapo/sampling_efficiency": 70.83333333333333, "dapo/total_prompts_processed": 10.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.010285714285714285, "grad_norm": 0.008895393460988998, "kl": 0.00011056661605834961, "learning_rate": 8e-07, "loss": 0.013, "reward": 0.6077092736959457, "reward_std": 0.994397833943367, "step": 9 }, { "clip_fraction": 0.0, "completion_length": 1959.0763702392578, "dapo/avg_reward_std": 0.25889470875263215, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.350000007947286, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 40.20833333333333, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.011428571428571429, "grad_norm": 0.011032010428607464, "kl": 8.809566497802734e-05, "learning_rate": 9e-07, "loss": 0.018, "reward": 0.7773313578218222, "reward_std": 0.9549762830138206, "step": 10 }, { "clip_fraction": 0.0, "completion_length": 2597.6979217529297, "dapo/avg_reward_std": 0.3167818512605584, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44202899284984754, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 42.70833333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.012571428571428572, "grad_norm": 0.010659257881343365, "kl": 0.00013309717178344727, "learning_rate": 1e-06, "loss": 0.0026, "reward": 0.5649524200707674, "reward_std": 0.9257139712572098, "step": 11 }, { "clip_fraction": 0.0, "completion_length": 2214.9444580078125, "dapo/avg_reward_std": 0.33351172175672317, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5648148208856583, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 49.99999999999999, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.013714285714285714, "grad_norm": 0.010501649230718613, "kl": 9.53376293182373e-05, "learning_rate": 9.997258721585931e-07, "loss": 0.0287, "reward": 0.7854772098362446, "reward_std": 0.9361946359276772, "step": 12 }, { "clip_fraction": 0.0, "completion_length": 1984.5416717529297, "dapo/avg_reward_std": 0.3313978049490187, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5925926052861743, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 56.666666666666664, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.014857142857142857, "grad_norm": 0.012102734297513962, "kl": 9.861588478088379e-05, "learning_rate": 9.989038226169207e-07, "loss": 0.0277, "reward": 0.9007548745721579, "reward_std": 0.9196444824337959, "step": 13 }, { "clip_fraction": 0.0, "completion_length": 2267.5069885253906, "dapo/avg_reward_std": 0.21889745750847986, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3186274560935357, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 40.63988095238095, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.016, "grad_norm": 0.01004031766206026, "kl": 0.00010375678539276123, "learning_rate": 9.975348529157229e-07, "loss": 0.0342, "reward": 0.5439228732138872, "reward_std": 0.9444419518113136, "step": 14 }, { "clip_fraction": 0.0, "completion_length": 2403.170135498047, "dapo/avg_reward_std": 0.24896668710491873, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4242424321445552, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 58.45238095238095, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.017142857142857144, "grad_norm": 0.013138854876160622, "kl": 0.00011286139488220215, "learning_rate": 9.956206309337066e-07, "loss": 0.0341, "reward": 0.6446905825287104, "reward_std": 0.9305006489157677, "step": 15 }, { "clip_fraction": 0.0, "completion_length": 2368.579849243164, "dapo/avg_reward_std": 0.32238917201757433, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4416666716337204, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 53.125, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.018285714285714287, "grad_norm": 0.009644324891269207, "kl": 0.00011764466762542725, "learning_rate": 9.931634888554935e-07, "loss": 0.0184, "reward": 0.6319684982299805, "reward_std": 0.9385868087410927, "step": 16 }, { "clip_fraction": 0.0, "completion_length": 2354.590286254883, "dapo/avg_reward_std": 0.2929895012466996, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41358025482407323, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 43.95833333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.019428571428571427, "grad_norm": 0.010750290006399155, "kl": 0.00012104213237762451, "learning_rate": 9.901664203302124e-07, "loss": 0.0512, "reward": 0.7495243214070797, "reward_std": 0.9604936093091965, "step": 17 }, { "clip_fraction": 0.0, "completion_length": 2353.548599243164, "dapo/avg_reward_std": 0.3144007975404913, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46212122250686993, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.5, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02057142857142857, "grad_norm": 0.0106205390766263, "kl": 0.0001283884048461914, "learning_rate": 9.866330768241983e-07, "loss": 0.0356, "reward": 0.7090531028807163, "reward_std": 0.927816279232502, "step": 18 }, { "clip_fraction": 0.0, "completion_length": 2599.90283203125, "dapo/avg_reward_std": 0.31102153037985164, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46527778667708236, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 43.125, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.021714285714285714, "grad_norm": 0.00998625811189413, "kl": 0.00011986494064331055, "learning_rate": 9.825677631722435e-07, "loss": 0.0501, "reward": 0.8357332646846771, "reward_std": 0.9608008861541748, "step": 19 }, { "clip_fraction": 0.0, "completion_length": 2307.482650756836, "dapo/avg_reward_std": 0.3105274804613807, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4545454633506862, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.022857142857142857, "grad_norm": 0.010738078504800797, "kl": 9.399652481079102e-05, "learning_rate": 9.779754323328192e-07, "loss": 0.0104, "reward": 0.7927055042237043, "reward_std": 0.9697678238153458, "step": 20 }, { "clip_fraction": 0.0, "completion_length": 1943.2500457763672, "dapo/avg_reward_std": 0.3021106570959091, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.384615390919722, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.78571428571428, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.024, "grad_norm": 0.01025764923542738, "kl": 6.92903995513916e-05, "learning_rate": 9.728616793536587e-07, "loss": 0.0005, "reward": 0.7050843685865402, "reward_std": 0.9542289972305298, "step": 21 }, { "clip_fraction": 0.0, "completion_length": 2265.222198486328, "dapo/avg_reward_std": 0.2858178478020888, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4102564144593019, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 36.160714285714285, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.025142857142857144, "grad_norm": 0.015554007142782211, "kl": 0.00011515617370605469, "learning_rate": 9.672327345550543e-07, "loss": 0.1143, "reward": 0.7392658032476902, "reward_std": 0.9592578783631325, "step": 22 }, { "clip_fraction": 0.0, "completion_length": 2213.857635498047, "dapo/avg_reward_std": 0.28609917419297354, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.410714291036129, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 38.66071428571428, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.026285714285714287, "grad_norm": 0.00819400418549776, "kl": 7.683038711547852e-05, "learning_rate": 9.610954559391704e-07, "loss": 0.018, "reward": 0.6645980039611459, "reward_std": 0.919261984527111, "step": 23 }, { "clip_fraction": 0.0, "completion_length": 1544.9930610656738, "dapo/avg_reward_std": 0.27062960465749103, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38888889948527017, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 37.20238095238095, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.027428571428571427, "grad_norm": 0.013472510501742363, "kl": 6.948411464691162e-05, "learning_rate": 9.54457320834625e-07, "loss": 0.0006, "reward": 0.6155341246630996, "reward_std": 0.9053066149353981, "step": 24 }, { "clip_fraction": 0.0, "completion_length": 2005.5104598999023, "dapo/avg_reward_std": 0.2877837224253293, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38505747760164327, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 38.75, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02857142857142857, "grad_norm": 0.011138558387756348, "kl": 8.162856101989746e-05, "learning_rate": 9.473264167865171e-07, "loss": 0.0493, "reward": 0.6912501659244299, "reward_std": 0.9633006453514099, "step": 25 }, { "clip_fraction": 0.0, "completion_length": 2387.5555725097656, "dapo/avg_reward_std": 0.19959817528724672, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3055555591980616, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 44.49404761904761, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.029714285714285714, "grad_norm": 0.011900709010660648, "kl": 9.435415267944336e-05, "learning_rate": 9.397114317029974e-07, "loss": 0.0815, "reward": 0.5562675036489964, "reward_std": 0.9110650941729546, "step": 26 }, { "clip_fraction": 0.0, "completion_length": 2044.7292137145996, "dapo/avg_reward_std": 0.3619746658951044, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.6354166744276881, "dapo/num_sampling_attempts": 2.0, "dapo/sampling_efficiency": 69.16666666666666, "dapo/total_prompts_processed": 12.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.030857142857142857, "grad_norm": 0.01303341705352068, "kl": 8.736550807952881e-05, "learning_rate": 9.316216432703916e-07, "loss": 0.0141, "reward": 0.7769045419991016, "reward_std": 0.9760870188474655, "step": 27 }, { "clip_fraction": 0.0, "completion_length": 2458.9305572509766, "dapo/avg_reward_std": 0.2839898039465365, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.427536239442618, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 42.08333333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.032, "grad_norm": 0.013889433816075325, "kl": 0.00014150142669677734, "learning_rate": 9.230669076497687e-07, "loss": 0.0479, "reward": 0.5980293937027454, "reward_std": 0.9796791076660156, "step": 28 }, { "clip_fraction": 0.0, "completion_length": 2496.451416015625, "dapo/avg_reward_std": 0.35542283952236176, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5648148175742891, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 67.5, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03314285714285714, "grad_norm": 0.011365516111254692, "kl": 0.00010502338409423828, "learning_rate": 9.140576474687263e-07, "loss": 0.0278, "reward": 0.6495406329631805, "reward_std": 0.9649527370929718, "step": 29 }, { "clip_fraction": 0.0, "completion_length": 1831.333351135254, "dapo/avg_reward_std": 0.2628121712933416, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41304348279600556, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 60.625, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03428571428571429, "grad_norm": 0.012428080663084984, "kl": 8.240342140197754e-05, "learning_rate": 9.046048391230247e-07, "loss": 0.0408, "reward": 0.7913381233811378, "reward_std": 0.9801043272018433, "step": 30 }, { "clip_fraction": 0.0, "completion_length": 2105.7118225097656, "dapo/avg_reward_std": 0.2843361473083496, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4266666781902313, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 53.75, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03542857142857143, "grad_norm": 0.016210218891501427, "kl": 0.0001112520694732666, "learning_rate": 8.9471999940354e-07, "loss": 0.1052, "reward": 0.5814057979732752, "reward_std": 0.9699539840221405, "step": 31 }, { "clip_fraction": 0.0, "completion_length": 2366.718818664551, "dapo/avg_reward_std": 0.2371666719173563, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34482759648355943, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 38.4375, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.036571428571428574, "grad_norm": 0.01111757755279541, "kl": 0.00011564791202545166, "learning_rate": 8.844151714648274e-07, "loss": 0.0379, "reward": 0.6102676652371883, "reward_std": 0.9229060783982277, "step": 32 }, { "clip_fraction": 0.0, "completion_length": 2388.1909942626953, "dapo/avg_reward_std": 0.29336222237156284, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3118279624369837, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 42.1875, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.037714285714285714, "grad_norm": 0.01051933504641056, "kl": 9.141862392425537e-05, "learning_rate": 8.737029101523929e-07, "loss": 0.041, "reward": 0.6971308812499046, "reward_std": 0.9577681049704552, "step": 33 }, { "clip_fraction": 0.0, "completion_length": 2259.065963745117, "dapo/avg_reward_std": 0.3195795826613903, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5833333367481828, "dapo/num_sampling_attempts": 2.0, "dapo/sampling_efficiency": 62.49999999999999, "dapo/total_prompts_processed": 12.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.038857142857142854, "grad_norm": 0.010114133358001709, "kl": 9.936094284057617e-05, "learning_rate": 8.625962667065487e-07, "loss": 0.0019, "reward": 0.706351961940527, "reward_std": 0.9608398601412773, "step": 34 }, { "clip_fraction": 0.0, "completion_length": 2236.6563262939453, "dapo/avg_reward_std": 0.2805841226002266, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33908046679250126, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 30.952380952380942, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04, "grad_norm": 0.01071652490645647, "kl": 0.00013333559036254883, "learning_rate": 8.511087728614862e-07, "loss": 0.0108, "reward": 0.6857370678335428, "reward_std": 0.9366307482123375, "step": 35 }, { "clip_fraction": 0.0, "completion_length": 1998.9166717529297, "dapo/avg_reward_std": 0.30676539919593115, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4772727360779589, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 44.791666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04114285714285714, "grad_norm": 0.011716869659721851, "kl": 0.00010579824447631836, "learning_rate": 8.392544243589427e-07, "loss": 0.0577, "reward": 0.8430320359766483, "reward_std": 0.8613111302256584, "step": 36 }, { "clip_fraction": 0.0, "completion_length": 2699.8819580078125, "dapo/avg_reward_std": 0.280869146873211, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36781610034663104, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 36.45833333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04228571428571429, "grad_norm": 0.011984186246991158, "kl": 0.00011450052261352539, "learning_rate": 8.270476638965461e-07, "loss": 0.0641, "reward": 0.6952194459736347, "reward_std": 0.9531055390834808, "step": 37 }, { "clip_fraction": 0.0, "completion_length": 2508.343765258789, "dapo/avg_reward_std": 0.3086147890204475, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44444445485160466, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04342857142857143, "grad_norm": 0.014813189394772053, "kl": 0.00013363361358642578, "learning_rate": 8.145033635316128e-07, "loss": 0.0815, "reward": 0.6981049925088882, "reward_std": 0.9795023873448372, "step": 38 }, { "clip_fraction": 0.0, "completion_length": 2568.090286254883, "dapo/avg_reward_std": 0.2281228665149573, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30303030799735675, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 35.3125, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.044571428571428574, "grad_norm": 0.010284055955708027, "kl": 0.0001270771026611328, "learning_rate": 8.01636806561836e-07, "loss": 0.0129, "reward": 0.5480891708284616, "reward_std": 0.9542658925056458, "step": 39 }, { "clip_fraction": 0.0, "completion_length": 2255.0798721313477, "dapo/avg_reward_std": 0.3315709355202588, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46969698437235574, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.20833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.045714285714285714, "grad_norm": 0.01235182024538517, "kl": 0.00011420249938964844, "learning_rate": 7.884636689049422e-07, "loss": 0.0472, "reward": 0.8707308620214462, "reward_std": 0.9157829731702805, "step": 40 }, { "clip_fraction": 0.0, "completion_length": 2417.9444427490234, "dapo/avg_reward_std": 0.2831250044607347, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3655914020153784, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 37.723214285714285, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.046857142857142854, "grad_norm": 0.010439831763505936, "kl": 0.00012230873107910156, "learning_rate": 7.75e-07, "loss": 0.0395, "reward": 0.7518008537590504, "reward_std": 0.9689745083451271, "step": 41 }, { "clip_fraction": 0.0, "completion_length": 2325.5937881469727, "dapo/avg_reward_std": 0.28424168271677835, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3869047707745007, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 33.75, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.048, "grad_norm": 0.010445328429341316, "kl": 8.326023817062378e-05, "learning_rate": 7.612622032536507e-07, "loss": 0.0004, "reward": 0.6408937154337764, "reward_std": 0.9007892906665802, "step": 42 }, { "clip_fraction": 0.0, "completion_length": 2423.9617919921875, "dapo/avg_reward_std": 0.28680659715945905, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4038461624429776, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 46.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04914285714285714, "grad_norm": 0.010229532606899738, "kl": 0.00013530254364013672, "learning_rate": 7.472670160550848e-07, "loss": 0.0104, "reward": 0.6538480781018734, "reward_std": 0.9688718169927597, "step": 43 }, { "clip_fraction": 0.0, "completion_length": 2088.677085876465, "dapo/avg_reward_std": 0.3208466252455345, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4423077031970024, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05028571428571429, "grad_norm": 0.011106742545962334, "kl": 0.00012566149234771729, "learning_rate": 7.330314893841101e-07, "loss": 0.0239, "reward": 0.8764502704143524, "reward_std": 0.9285347983241081, "step": 44 }, { "clip_fraction": 0.0, "completion_length": 1721.781234741211, "dapo/avg_reward_std": 0.3683280497789383, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5083333447575569, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 47.916666666666664, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05142857142857143, "grad_norm": 0.01152133010327816, "kl": 7.429718971252441e-05, "learning_rate": 7.185729670371604e-07, "loss": 0.0259, "reward": 0.8203496672213078, "reward_std": 0.9882074818015099, "step": 45 }, { "clip_fraction": 0.0, "completion_length": 3020.9757232666016, "dapo/avg_reward_std": 0.294668085873127, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37500000691839624, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 38.660714285714285, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.052571428571428575, "grad_norm": 0.009526599198579788, "kl": 0.00014853477478027344, "learning_rate": 7.039090644965509e-07, "loss": 0.0314, "reward": 0.6035567373037338, "reward_std": 0.9617942646145821, "step": 46 }, { "clip_fraction": 0.0, "completion_length": 2869.8958892822266, "dapo/avg_reward_std": 0.37419558623257804, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5196078481043086, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 66.66666666666666, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.053714285714285714, "grad_norm": 0.008854555897414684, "kl": 0.00012740492820739746, "learning_rate": 6.890576474687263e-07, "loss": 0.0266, "reward": 0.5126286232843995, "reward_std": 0.9323688969016075, "step": 47 }, { "clip_fraction": 0.0, "completion_length": 1974.5069999694824, "dapo/avg_reward_std": 0.31826632221539813, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42361111628512543, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 43.541666666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.054857142857142854, "grad_norm": 0.012630482204258442, "kl": 0.00011485815048217773, "learning_rate": 6.740368101176495e-07, "loss": 0.0259, "reward": 0.7998449765145779, "reward_std": 0.9614248275756836, "step": 48 }, { "clip_fraction": 0.0, "completion_length": 2775.854164123535, "dapo/avg_reward_std": 0.24803236694563002, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41269841435409726, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 65.97222222222223, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.056, "grad_norm": 0.0115203270688653, "kl": 0.00010813772678375244, "learning_rate": 6.588648530198504e-07, "loss": 0.0626, "reward": 0.5735284592956305, "reward_std": 0.9657324403524399, "step": 49 }, { "clip_fraction": 0.0, "completion_length": 2555.2743377685547, "dapo/avg_reward_std": 0.3077625359098117, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.423611119389534, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 48.33333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05714285714285714, "grad_norm": 0.012258801609277725, "kl": 0.00013893842697143555, "learning_rate": 6.435602608679916e-07, "loss": 0.0575, "reward": 0.8288873583078384, "reward_std": 0.950613297522068, "step": 50 }, { "clip_fraction": 0.0, "completion_length": 2645.576400756836, "dapo/avg_reward_std": 0.3462034153441588, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4236111169060071, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 39.99999999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05828571428571429, "grad_norm": 0.01161988079547882, "kl": 0.0001646280288696289, "learning_rate": 6.281416799501187e-07, "loss": 0.046, "reward": 0.46879277005791664, "reward_std": 0.9387945607304573, "step": 51 }, { "clip_fraction": 0.0, "completion_length": 2043.677101135254, "dapo/avg_reward_std": 0.3387378570826157, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4347826171180476, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05942857142857143, "grad_norm": 0.011719447560608387, "kl": 0.00012214481830596924, "learning_rate": 6.126278954320294e-07, "loss": 0.0093, "reward": 0.7487262971699238, "reward_std": 0.9444489181041718, "step": 52 }, { "clip_fraction": 0.0, "completion_length": 2277.902801513672, "dapo/avg_reward_std": 0.269059170936716, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37356322695469035, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 41.88988095238095, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.060571428571428575, "grad_norm": 0.012477328069508076, "kl": 0.00015044212341308594, "learning_rate": 5.97037808470444e-07, "loss": 0.048, "reward": 0.6608240492641926, "reward_std": 0.9770755022764206, "step": 53 }, { "clip_fraction": 0.0, "completion_length": 2374.232635498047, "dapo/avg_reward_std": 0.34054997433786804, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.500000013605408, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 37.916666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.061714285714285715, "grad_norm": 0.013303548097610474, "kl": 0.0001438036561012268, "learning_rate": 5.813904131848564e-07, "loss": 0.0614, "reward": 0.75572844222188, "reward_std": 0.9565529599785805, "step": 54 }, { "clip_fraction": 0.0, "completion_length": 2442.232666015625, "dapo/avg_reward_std": 0.27056889484326047, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4097222263614337, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 45.83333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06285714285714286, "grad_norm": 0.011922283098101616, "kl": 0.00014710426330566406, "learning_rate": 5.657047735161255e-07, "loss": 0.0447, "reward": 0.6145301992073655, "reward_std": 0.9308876842260361, "step": 55 }, { "clip_fraction": 0.0, "completion_length": 2163.7604064941406, "dapo/avg_reward_std": 0.306766193537485, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.47619048612458365, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 57.291666666666664, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.064, "grad_norm": 0.009786682203412056, "kl": 0.00011900067329406738, "learning_rate": 5.5e-07, "loss": 0.0353, "reward": 0.7467220462858677, "reward_std": 0.9404179230332375, "step": 56 }, { "clip_fraction": 0.0, "completion_length": 1992.7430953979492, "dapo/avg_reward_std": 0.21240893006324768, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2631579006188794, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 27.708333333333332, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06514285714285714, "grad_norm": 0.015636112540960312, "kl": 0.00013278424739837646, "learning_rate": 5.342952264838747e-07, "loss": 0.0652, "reward": 0.5448480695486069, "reward_std": 0.8946049734950066, "step": 57 }, { "clip_fraction": 0.0, "completion_length": 1786.927101135254, "dapo/avg_reward_std": 0.27395731459061307, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.479166679084301, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 51.979166666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06628571428571428, "grad_norm": 0.012302345596253872, "kl": 0.00010266900062561035, "learning_rate": 5.186095868151436e-07, "loss": 0.0222, "reward": 0.7567729391157627, "reward_std": 0.9539604857563972, "step": 58 }, { "clip_fraction": 0.0, "completion_length": 1871.125015258789, "dapo/avg_reward_std": 0.26716366639504063, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3461538547506699, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 51.25, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06742857142857143, "grad_norm": 0.012423303909599781, "kl": 0.00013174861669540405, "learning_rate": 5.02962191529556e-07, "loss": 0.0051, "reward": 0.5472707431763411, "reward_std": 0.9848242700099945, "step": 59 }, { "clip_fraction": 0.0, "completion_length": 2110.0104446411133, "dapo/avg_reward_std": 0.27772934675216676, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3933333379030228, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 55.416666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06857142857142857, "grad_norm": 0.010305487550795078, "kl": 0.00013266503810882568, "learning_rate": 4.873721045679706e-07, "loss": -0.0051, "reward": 0.5918029174208641, "reward_std": 0.9419775605201721, "step": 60 }, { "clip_fraction": 0.0, "completion_length": 1820.1597595214844, "dapo/avg_reward_std": 0.2844862639904022, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.351190483463662, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 39.28571428571428, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06971428571428571, "grad_norm": 0.01057644933462143, "kl": 9.304285049438477e-05, "learning_rate": 4.7185832004988133e-07, "loss": 0.0019, "reward": 0.5361353289335966, "reward_std": 0.9243106096982956, "step": 61 }, { "clip_fraction": 0.0, "completion_length": 2268.913215637207, "dapo/avg_reward_std": 0.2805037432246738, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3456790220958215, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 39.791666666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07085714285714285, "grad_norm": 0.010327951982617378, "kl": 0.00013640522956848145, "learning_rate": 4.5643973913200837e-07, "loss": 0.011, "reward": 0.5703515652567148, "reward_std": 0.9485230222344398, "step": 62 }, { "clip_fraction": 0.0, "completion_length": 2150.541679382324, "dapo/avg_reward_std": 0.3610766388868031, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000164697045, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.072, "grad_norm": 0.01420843880623579, "kl": 0.00017371773719787598, "learning_rate": 4.4113514698014953e-07, "loss": 0.027, "reward": 0.8152667284011841, "reward_std": 0.9553957208991051, "step": 63 }, { "clip_fraction": 0.0, "completion_length": 2542.954879760742, "dapo/avg_reward_std": 0.25789711397627124, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4057971077120822, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 55.0, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07314285714285715, "grad_norm": 0.010388275608420372, "kl": 0.00016424059867858887, "learning_rate": 4.2596318988235037e-07, "loss": 0.0153, "reward": 0.8328269198536873, "reward_std": 0.946412943303585, "step": 64 }, { "clip_fraction": 0.0, "completion_length": 2573.9132385253906, "dapo/avg_reward_std": 0.27658049833206905, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4682539779515493, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 67.01388888888889, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07428571428571429, "grad_norm": 0.016587890684604645, "kl": 0.0002205371856689453, "learning_rate": 4.1094235253127374e-07, "loss": 0.071, "reward": 0.8272522762417793, "reward_std": 0.9939362108707428, "step": 65 }, { "clip_fraction": 0.0, "completion_length": 2272.4132080078125, "dapo/avg_reward_std": 0.28441278512279194, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38888889737427235, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 49.375, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07542857142857143, "grad_norm": 0.01080800499767065, "kl": 0.00015676021575927734, "learning_rate": 3.9609093550344907e-07, "loss": -0.0104, "reward": 0.7243790216743946, "reward_std": 1.0099836066365242, "step": 66 }, { "clip_fraction": 0.0, "completion_length": 2551.920150756836, "dapo/avg_reward_std": 0.29605763202363794, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4242424314672297, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 50.416666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07657142857142857, "grad_norm": 0.01253009494394064, "kl": 0.0001944899559020996, "learning_rate": 3.8142703296283953e-07, "loss": 0.0544, "reward": 0.7982187271118164, "reward_std": 0.9796509444713593, "step": 67 }, { "clip_fraction": 0.0, "completion_length": 2039.6910400390625, "dapo/avg_reward_std": 0.3305485857029756, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4305555634200573, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07771428571428571, "grad_norm": 0.013196859508752823, "kl": 0.00021713972091674805, "learning_rate": 3.6696851061588994e-07, "loss": 0.0185, "reward": 0.8682084418833256, "reward_std": 0.9861341118812561, "step": 68 }, { "clip_fraction": 0.0, "completion_length": 2549.642364501953, "dapo/avg_reward_std": 0.28639274001121523, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4133333384990692, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07885714285714286, "grad_norm": 0.010159006342291832, "kl": 0.00016075372695922852, "learning_rate": 3.5273298394491515e-07, "loss": -0.0284, "reward": 0.5912708025425673, "reward_std": 0.9797485172748566, "step": 69 }, { "clip_fraction": 0.0, "completion_length": 2719.5382232666016, "dapo/avg_reward_std": 0.28611900960957565, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.351851859026485, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.625, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08, "grad_norm": 0.011270755901932716, "kl": 0.00022423267364501953, "learning_rate": 3.387377967463493e-07, "loss": 0.0265, "reward": 0.5740308649837971, "reward_std": 0.8749020621180534, "step": 70 }, { "clip_fraction": 0.0, "completion_length": 2073.2916946411133, "dapo/avg_reward_std": 0.28938476492961246, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45833334264655906, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 42.49999999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08114285714285714, "grad_norm": 0.011867412365972996, "kl": 0.0001347661018371582, "learning_rate": 3.250000000000001e-07, "loss": -0.0577, "reward": 0.5955507848411798, "reward_std": 0.9116542786359787, "step": 71 }, { "clip_fraction": 0.0, "completion_length": 2239.322914123535, "dapo/avg_reward_std": 0.30952110344713385, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4469697041945024, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 44.166666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08228571428571428, "grad_norm": 0.011070906184613705, "kl": 0.000155717134475708, "learning_rate": 3.115363310950578e-07, "loss": 0.0339, "reward": 0.7990612685680389, "reward_std": 0.9683424234390259, "step": 72 }, { "clip_fraction": 0.0, "completion_length": 2044.489601135254, "dapo/avg_reward_std": 0.21984713185917248, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3131313206571521, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 36.77083333333333, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08342857142857144, "grad_norm": 0.014109701849520206, "kl": 0.0001436173915863037, "learning_rate": 2.9836319343816397e-07, "loss": 0.085, "reward": 0.8676656074821949, "reward_std": 0.9657078757882118, "step": 73 }, { "clip_fraction": 0.0, "completion_length": 1958.7361068725586, "dapo/avg_reward_std": 0.30799518460812775, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4927536339863487, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 40.625, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08457142857142858, "grad_norm": 0.013041837140917778, "kl": 0.0001519918441772461, "learning_rate": 2.854966364683872e-07, "loss": 0.0492, "reward": 0.6045123310759664, "reward_std": 0.9384523630142212, "step": 74 }, { "clip_fraction": 0.0, "completion_length": 1523.1284942626953, "dapo/avg_reward_std": 0.31539708146682155, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.391025647521019, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 36.875, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08571428571428572, "grad_norm": 0.014472462236881256, "kl": 0.0001392364501953125, "learning_rate": 2.729523361034538e-07, "loss": 0.0358, "reward": 0.7163376174867153, "reward_std": 0.9508332461118698, "step": 75 }, { "clip_fraction": 0.0, "completion_length": 2640.7813110351562, "dapo/avg_reward_std": 0.3144421911239624, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44000000655651095, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 43.75, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08685714285714285, "grad_norm": 0.011127221398055553, "kl": 0.0002060532569885254, "learning_rate": 2.6074557564105724e-07, "loss": 0.0604, "reward": 0.6046733632683754, "reward_std": 0.9528723284602165, "step": 76 }, { "clip_fraction": 0.0, "completion_length": 2088.7292098999023, "dapo/avg_reward_std": 0.3257487453520298, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4444444552063942, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 45.31249999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.088, "grad_norm": 0.013021063059568405, "kl": 0.00017440319061279297, "learning_rate": 2.488912271385139e-07, "loss": 0.0353, "reward": 0.5843205824494362, "reward_std": 0.9498706609010696, "step": 77 }, { "clip_fraction": 0.0, "completion_length": 2710.0069580078125, "dapo/avg_reward_std": 0.4117408903206096, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5784313836518455, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 52.08333333333333, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08914285714285715, "grad_norm": 0.00956858042627573, "kl": 0.00020110607147216797, "learning_rate": 2.374037332934512e-07, "loss": -0.0019, "reward": 0.7558267749845982, "reward_std": 0.9872319549322128, "step": 78 }, { "clip_fraction": 0.0, "completion_length": 2532.888916015625, "dapo/avg_reward_std": 0.29725510747201983, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34408603031789103, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 31.696428571428562, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09028571428571429, "grad_norm": 0.010455719195306301, "kl": 0.00019878149032592773, "learning_rate": 2.2629708984760706e-07, "loss": 0.0433, "reward": 0.7071553282439709, "reward_std": 0.936428040266037, "step": 79 }, { "clip_fraction": 0.0, "completion_length": 2045.3507232666016, "dapo/avg_reward_std": 0.24797727167606354, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4318181872367859, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 56.24999999999999, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09142857142857143, "grad_norm": 0.011657273396849632, "kl": 0.00015923380851745605, "learning_rate": 2.1558482853517253e-07, "loss": 0.0016, "reward": 0.8354307417757809, "reward_std": 0.9478549808263779, "step": 80 }, { "clip_fraction": 0.0, "completion_length": 2517.621482849121, "dapo/avg_reward_std": 0.3837103931342854, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5882353055126527, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 56.24999999999999, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09257142857142857, "grad_norm": 0.011230596341192722, "kl": 0.00020751357078552246, "learning_rate": 2.0528000059645995e-07, "loss": 0.0523, "reward": 0.6180859599262476, "reward_std": 0.9601781144738197, "step": 81 }, { "clip_fraction": 0.0, "completion_length": 2189.6805725097656, "dapo/avg_reward_std": 0.33485331758856773, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.47916667970518273, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 42.70833333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09371428571428571, "grad_norm": 0.01085925567895174, "kl": 0.00018781423568725586, "learning_rate": 1.9539516087697517e-07, "loss": 0.0277, "reward": 0.7506253309547901, "reward_std": 0.9654112830758095, "step": 82 }, { "clip_fraction": 0.0, "completion_length": 2063.197952270508, "dapo/avg_reward_std": 0.3108914480322883, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45238096444379716, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 47.291666666666664, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09485714285714286, "grad_norm": 0.01137411966919899, "kl": 0.00018197298049926758, "learning_rate": 1.8594235253127372e-07, "loss": 0.0165, "reward": 0.6088770590722561, "reward_std": 0.9752795398235321, "step": 83 }, { "clip_fraction": 0.0, "completion_length": 2032.7708587646484, "dapo/avg_reward_std": 0.35138528971444993, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000070957911, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 40.62499999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.096, "grad_norm": 0.009788557887077332, "kl": 0.0001645982265472412, "learning_rate": 1.7693309235023127e-07, "loss": -0.0005, "reward": 0.6485470458865166, "reward_std": 0.8980466201901436, "step": 84 }, { "clip_fraction": 0.0, "completion_length": 2723.2083892822266, "dapo/avg_reward_std": 0.35491983592510223, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5500000104308128, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 46.87499999999999, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09714285714285714, "grad_norm": 0.012261813506484032, "kl": 0.0002092123031616211, "learning_rate": 1.6837835672960831e-07, "loss": 0.0428, "reward": 0.769347533583641, "reward_std": 0.9622702524065971, "step": 85 }, { "clip_fraction": 0.0, "completion_length": 2813.6979370117188, "dapo/avg_reward_std": 0.31041908973739263, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46825397582281203, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 53.125, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09828571428571428, "grad_norm": 0.013307915069162846, "kl": 0.00022363662719726562, "learning_rate": 1.6028856829700258e-07, "loss": 0.0893, "reward": 0.7634551003575325, "reward_std": 0.9385863840579987, "step": 86 }, { "clip_fraction": 0.0, "completion_length": 2645.5486907958984, "dapo/avg_reward_std": 0.29486309762658747, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37931035099358396, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09942857142857142, "grad_norm": 0.009606744162738323, "kl": 0.00017684698104858398, "learning_rate": 1.5267358321348285e-07, "loss": 0.0337, "reward": 0.6225443221628666, "reward_std": 0.9135682806372643, "step": 87 }, { "clip_fraction": 0.0, "completion_length": 2211.1111221313477, "dapo/avg_reward_std": 0.2131810395254029, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30092593075500595, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 25.535714285714285, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10057142857142858, "grad_norm": 0.011731365695595741, "kl": 0.00017218291759490967, "learning_rate": 1.4554267916537495e-07, "loss": 0.0114, "reward": 0.574246758595109, "reward_std": 0.9149169996380806, "step": 88 }, { "clip_fraction": 0.0, "completion_length": 2617.9445037841797, "dapo/avg_reward_std": 0.34073091808118317, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5087719379287017, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 47.91666666666666, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10171428571428572, "grad_norm": 0.013213962316513062, "kl": 0.0002383589744567871, "learning_rate": 1.3890454406082956e-07, "loss": 0.072, "reward": 0.7886459194123745, "reward_std": 0.9416129812598228, "step": 89 }, { "clip_fraction": 0.0, "completion_length": 2265.7743225097656, "dapo/avg_reward_std": 0.39400896430015564, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48412699145930155, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 46.24999999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10285714285714286, "grad_norm": 0.011279975064098835, "kl": 0.00017967820167541504, "learning_rate": 1.3276726544494571e-07, "loss": 0.0115, "reward": 0.8188270814716816, "reward_std": 0.956598699092865, "step": 90 }, { "clip_fraction": 0.0, "completion_length": 1751.7951850891113, "dapo/avg_reward_std": 0.346651555462317, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44696970690380444, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 46.875, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.104, "grad_norm": 0.013495221734046936, "kl": 0.00012958049774169922, "learning_rate": 1.2713832064634125e-07, "loss": 0.0244, "reward": 0.7544833142310381, "reward_std": 0.920841209590435, "step": 91 }, { "clip_fraction": 0.0, "completion_length": 2176.5868530273438, "dapo/avg_reward_std": 0.31276301860809325, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3866666704416275, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 42.410714285714285, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10514285714285715, "grad_norm": 0.014705290086567402, "kl": 0.00018972158432006836, "learning_rate": 1.220245676671809e-07, "loss": 0.082, "reward": 0.6609778106212616, "reward_std": 0.9741540849208832, "step": 92 }, { "clip_fraction": 0.0, "completion_length": 2418.0035095214844, "dapo/avg_reward_std": 0.3533540232615037, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45454546131870965, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 50.416666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10628571428571429, "grad_norm": 0.014526835642755032, "kl": 0.00022083520889282227, "learning_rate": 1.1743223682775649e-07, "loss": 0.0467, "reward": 0.6240662466734648, "reward_std": 0.9587830454111099, "step": 93 }, { "clip_fraction": 0.0, "completion_length": 1759.4409713745117, "dapo/avg_reward_std": 0.31654878084858257, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4166666741172473, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 47.70833333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10742857142857143, "grad_norm": 0.011724472045898438, "kl": 0.00012111663818359375, "learning_rate": 1.1336692317580158e-07, "loss": -0.0008, "reward": 0.8961930721998215, "reward_std": 0.9275476858019829, "step": 94 }, { "clip_fraction": 0.0, "completion_length": 1968.3958435058594, "dapo/avg_reward_std": 0.31933523178100587, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36000000715255737, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10857142857142857, "grad_norm": 0.012760731391608715, "kl": 0.00015205144882202148, "learning_rate": 1.0983357966978745e-07, "loss": 0.0303, "reward": 0.7966429069638252, "reward_std": 0.9104023575782776, "step": 95 }, { "clip_fraction": 0.0, "completion_length": 1705.9930610656738, "dapo/avg_reward_std": 0.26930796217035363, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38888889771920665, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 48.4375, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10971428571428571, "grad_norm": 0.016185246407985687, "kl": 0.00014796853065490723, "learning_rate": 1.068365111445064e-07, "loss": -0.0016, "reward": 0.7683778572827578, "reward_std": 0.9466121271252632, "step": 96 }, { "clip_fraction": 0.0, "completion_length": 2056.079864501953, "dapo/avg_reward_std": 0.3310448744080283, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48484849387949164, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 51.785714285714285, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11085714285714286, "grad_norm": 0.010300490073859692, "kl": 0.00016963481903076172, "learning_rate": 1.0437936906629334e-07, "loss": 0.0027, "reward": 0.7596820928156376, "reward_std": 0.9540099799633026, "step": 97 }, { "clip_fraction": 0.0, "completion_length": 2592.8403244018555, "dapo/avg_reward_std": 0.21406691299902425, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3018018116016646, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 30.376984126984123, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.112, "grad_norm": 0.01034973282366991, "kl": 0.000193670392036438, "learning_rate": 1.0246514708427701e-07, "loss": 0.0254, "reward": 0.7206093966960907, "reward_std": 0.9074158370494843, "step": 98 }, { "clip_fraction": 0.0, "completion_length": 2686.343780517578, "dapo/avg_reward_std": 0.24782394810959144, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35802469926851765, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 44.513888888888886, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11314285714285714, "grad_norm": 0.011502859182655811, "kl": 0.00023734569549560547, "learning_rate": 1.0109617738307911e-07, "loss": 0.0346, "reward": 0.6300379456952214, "reward_std": 0.9057611152529716, "step": 99 }, { "clip_fraction": 0.0, "completion_length": 2050.166664123535, "dapo/avg_reward_std": 0.3082110931475957, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.43055556404093903, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 44.166666666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11428571428571428, "grad_norm": 0.015181603841483593, "kl": 0.00023311376571655273, "learning_rate": 1.002741278414069e-07, "loss": 0.0389, "reward": 0.7550710588693619, "reward_std": 0.9816905185580254, "step": 100 }, { "clip_fraction": 0.0, "completion_length": 2261.625045776367, "dapo/avg_reward_std": 0.2656887276419278, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3563218414783478, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 30.952380952380942, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11542857142857142, "grad_norm": 0.012256976217031479, "kl": 0.0002308487892150879, "learning_rate": 1e-07, "loss": 0.0255, "reward": 0.6794679276645184, "reward_std": 0.936141237616539, "step": 101 }, { "clip_fraction": 0.0, "completion_length": 2019.2500381469727, "dapo/avg_reward_std": 0.27603574914316975, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40322581414253483, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 31.14583333333333, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11657142857142858, "grad_norm": 0.011883130297064781, "kl": 0.00018447637557983398, "learning_rate": 6.203955092681039e-07, "loss": 0.0566, "reward": 0.9531724825501442, "reward_std": 0.9424103274941444, "step": 102 }, { "clip_fraction": 0.0, "completion_length": 2447.142379760742, "dapo/avg_reward_std": 0.2595460871855418, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3833333447575569, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 28.958333333333332, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11771428571428572, "grad_norm": 0.010165790095925331, "kl": 0.00018906593322753906, "learning_rate": 6.126278954320294e-07, "loss": 0.0361, "reward": 0.8079591542482376, "reward_std": 0.9323313534259796, "step": 103 }, { "clip_fraction": 0.0, "completion_length": 2414.4305725097656, "dapo/avg_reward_std": 0.2675211922875766, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3505747174394542, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 42.013888888888886, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11885714285714286, "grad_norm": 0.009563453495502472, "kl": 0.0002244710922241211, "learning_rate": 6.048412045323164e-07, "loss": 0.0367, "reward": 0.6746065132319927, "reward_std": 0.9439321234822273, "step": 104 }, { "clip_fraction": 0.0, "completion_length": 2350.4653396606445, "dapo/avg_reward_std": 0.2709802109183687, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33838384621071094, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 39.58333333333333, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12, "grad_norm": 0.011944189667701721, "kl": 0.00023399293422698975, "learning_rate": 5.97037808470444e-07, "loss": 0.0133, "reward": 0.7501634955406189, "reward_std": 0.9493465423583984, "step": 105 }, { "clip_fraction": 0.0, "completion_length": 2232.5590209960938, "dapo/avg_reward_std": 0.2304972934311834, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32183908234382497, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 46.05654761904762, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12114285714285715, "grad_norm": 0.011308341287076473, "kl": 0.0002868175506591797, "learning_rate": 5.892200842364462e-07, "loss": 0.017, "reward": 0.8449488952755928, "reward_std": 0.9235394075512886, "step": 106 }, { "clip_fraction": 0.0, "completion_length": 2245.4306259155273, "dapo/avg_reward_std": 0.32372228088586225, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37681160478488257, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 45.53571428571428, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12228571428571429, "grad_norm": 0.01079760491847992, "kl": 0.00018143653869628906, "learning_rate": 5.813904131848564e-07, "loss": 0.0407, "reward": 0.876940418034792, "reward_std": 0.945194236934185, "step": 107 }, { "clip_fraction": 0.0, "completion_length": 2877.4410095214844, "dapo/avg_reward_std": 0.31851592376118615, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44444445201328825, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 47.916666666666664, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12342857142857143, "grad_norm": 0.008532079868018627, "kl": 0.00026416778564453125, "learning_rate": 5.735511803093248e-07, "loss": 0.0189, "reward": 0.5354619715362787, "reward_std": 0.9343887642025948, "step": 108 }, { "clip_fraction": 0.0, "completion_length": 2287.3403396606445, "dapo/avg_reward_std": 0.2637126021660291, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41025641560554504, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 44.6875, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12457142857142857, "grad_norm": 0.010662744753062725, "kl": 0.00025856494903564453, "learning_rate": 5.657047735161255e-07, "loss": 0.0139, "reward": 0.6945868469774723, "reward_std": 0.945196196436882, "step": 109 }, { "clip_fraction": 0.0, "completion_length": 2099.197898864746, "dapo/avg_reward_std": 0.2950383967586926, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.380952388048172, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 38.541666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12571428571428572, "grad_norm": 0.011256680823862553, "kl": 0.0002060532569885254, "learning_rate": 5.578535828967777e-07, "loss": 0.0106, "reward": 0.6000825632363558, "reward_std": 0.9193084537982941, "step": 110 }, { "clip_fraction": 0.0, "completion_length": 2716.079864501953, "dapo/avg_reward_std": 0.2741352463590688, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3620689691140734, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 41.488095238095234, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12685714285714286, "grad_norm": 0.009062284603714943, "kl": 0.0002288222312927246, "learning_rate": 5.5e-07, "loss": 0.0461, "reward": 0.6751261968165636, "reward_std": 0.9856812655925751, "step": 111 }, { "clip_fraction": 0.0, "completion_length": 3045.125, "dapo/avg_reward_std": 0.36701818108558654, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5888888945182165, "dapo/num_sampling_attempts": 1.875, "dapo/sampling_efficiency": 65.625, "dapo/total_prompts_processed": 11.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.128, "grad_norm": 0.013615131378173828, "kl": 0.0003104209899902344, "learning_rate": 5.421464171032224e-07, "loss": 0.0541, "reward": 0.6107649356126785, "reward_std": 0.9386496767401695, "step": 112 }, { "clip_fraction": 0.0, "completion_length": 2261.1597442626953, "dapo/avg_reward_std": 0.2829060518741608, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.413333340883255, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12914285714285714, "grad_norm": 0.01245199330151081, "kl": 0.0002949833869934082, "learning_rate": 5.342952264838747e-07, "loss": 0.0273, "reward": 0.7544166818261147, "reward_std": 0.9633913785219193, "step": 113 }, { "clip_fraction": 0.0, "completion_length": 2030.1041946411133, "dapo/avg_reward_std": 0.2660287490912846, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32738095788019045, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 45.535714285714285, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13028571428571428, "grad_norm": 0.011100664734840393, "kl": 0.00016885995864868164, "learning_rate": 5.264488196906752e-07, "loss": 0.0649, "reward": 0.5986752398312092, "reward_std": 0.9739916548132896, "step": 114 }, { "clip_fraction": 0.0, "completion_length": 2791.465301513672, "dapo/avg_reward_std": 0.297807412147522, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44000000655651095, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 42.08333333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13142857142857142, "grad_norm": 0.011278674006462097, "kl": 0.0002925395965576172, "learning_rate": 5.186095868151436e-07, "loss": 0.0586, "reward": 0.6219565980136395, "reward_std": 0.9591977074742317, "step": 115 }, { "clip_fraction": 0.0, "completion_length": 2804.6493606567383, "dapo/avg_reward_std": 0.35703572371731634, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42028986371081806, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 43.12499999999999, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13257142857142856, "grad_norm": 0.01122019812464714, "kl": 0.00034046173095703125, "learning_rate": 5.107799157635538e-07, "loss": 0.0233, "reward": 0.469740716740489, "reward_std": 0.9214994236826897, "step": 116 }, { "clip_fraction": 0.0, "completion_length": 2037.885456085205, "dapo/avg_reward_std": 0.30805256009101867, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4666666799783707, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 44.27083333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1337142857142857, "grad_norm": 0.014056873507797718, "kl": 0.0002486705780029297, "learning_rate": 5.02962191529556e-07, "loss": 0.038, "reward": 0.9076524265110493, "reward_std": 0.9655390456318855, "step": 117 }, { "clip_fraction": 0.0, "completion_length": 2517.215316772461, "dapo/avg_reward_std": 0.23199922059263503, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3571428635290691, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 45.535714285714285, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13485714285714287, "grad_norm": 0.011827170848846436, "kl": 0.00034999847412109375, "learning_rate": 4.951587954676837e-07, "loss": 0.023, "reward": 0.5725362580269575, "reward_std": 0.9489376917481422, "step": 118 }, { "clip_fraction": 0.0, "completion_length": 2309.763916015625, "dapo/avg_reward_std": 0.33521059803340747, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48550726084605506, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 44.166666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.136, "grad_norm": 0.014920210465788841, "kl": 0.0003477334976196289, "learning_rate": 4.873721045679706e-07, "loss": 0.0967, "reward": 0.7152486853301525, "reward_std": 0.9450967088341713, "step": 119 }, { "clip_fraction": 0.0, "completion_length": 2588.423629760742, "dapo/avg_reward_std": 0.322092001636823, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4236111156642437, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 40.62499999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13714285714285715, "grad_norm": 0.009259553626179695, "kl": 0.0002923011779785156, "learning_rate": 4.79604490731896e-07, "loss": 0.0204, "reward": 0.5492150112986565, "reward_std": 0.9336576908826828, "step": 120 }, { "clip_fraction": 0.0, "completion_length": 2224.4583282470703, "dapo/avg_reward_std": 0.2846992796375638, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4603174633923031, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1382857142857143, "grad_norm": 0.012681272812187672, "kl": 0.0002828836441040039, "learning_rate": 4.7185832004988133e-07, "loss": 0.084, "reward": 0.8260641098022461, "reward_std": 0.9569381102919579, "step": 121 }, { "clip_fraction": 0.0, "completion_length": 2042.4618453979492, "dapo/avg_reward_std": 0.21980997684754824, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2500000039213582, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 32.916666666666664, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13942857142857143, "grad_norm": 0.014447196386754513, "kl": 0.0002307891845703125, "learning_rate": 4.641359520805548e-07, "loss": 0.0797, "reward": 0.5401283344253898, "reward_std": 0.8589324243366718, "step": 122 }, { "clip_fraction": 0.0, "completion_length": 1821.270851135254, "dapo/avg_reward_std": 0.30661167701085407, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4652777910232544, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 51.979166666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14057142857142857, "grad_norm": 0.012464089319109917, "kl": 0.00021943449974060059, "learning_rate": 4.5643973913200837e-07, "loss": 0.0524, "reward": 0.7340994998812675, "reward_std": 0.948041707277298, "step": 123 }, { "clip_fraction": 0.0, "completion_length": 1917.8576431274414, "dapo/avg_reward_std": 0.26710175829274313, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3452381023338863, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 35.83333333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1417142857142857, "grad_norm": 0.01295761950314045, "kl": 0.00027441978454589844, "learning_rate": 4.4877202554526084e-07, "loss": 0.0395, "reward": 0.44990649446845055, "reward_std": 0.9298848733305931, "step": 124 }, { "clip_fraction": 0.0, "completion_length": 2151.381965637207, "dapo/avg_reward_std": 0.2770674500776374, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4347826164701711, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 55.729166666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14285714285714285, "grad_norm": 0.011758905835449696, "kl": 0.00027173757553100586, "learning_rate": 4.4113514698014953e-07, "loss": -0.0284, "reward": 0.5582777298986912, "reward_std": 0.9428363367915154, "step": 125 }, { "clip_fraction": 0.0, "completion_length": 1810.0104522705078, "dapo/avg_reward_std": 0.21576001878940698, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2777777860562007, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 36.45833333333333, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.144, "grad_norm": 0.011465526185929775, "kl": 0.00021535158157348633, "learning_rate": 4.3353142970386557e-07, "loss": -0.0108, "reward": 0.6622855560854077, "reward_std": 0.9075748100876808, "step": 126 }, { "clip_fraction": 0.0, "completion_length": 2243.732635498047, "dapo/avg_reward_std": 0.2920382275031163, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3782051377571546, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 32.291666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14514285714285713, "grad_norm": 0.01050955057144165, "kl": 0.00027239322662353516, "learning_rate": 4.2596318988235037e-07, "loss": 0.0464, "reward": 0.533456489443779, "reward_std": 0.9191579967737198, "step": 127 }, { "clip_fraction": 0.0, "completion_length": 2544.6875, "dapo/avg_reward_std": 0.27494730835869197, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46031746906893595, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 46.24999999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1462857142857143, "grad_norm": 0.017111552879214287, "kl": 0.0003796815872192383, "learning_rate": 4.1843273287476854e-07, "loss": 0.0784, "reward": 0.7016365043818951, "reward_std": 0.986565351486206, "step": 128 }, { "clip_fraction": 0.0, "completion_length": 2367.3646240234375, "dapo/avg_reward_std": 0.23454804884062874, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34567901988824207, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 47.222222222222214, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14742857142857144, "grad_norm": 0.009941876865923405, "kl": 0.00034287571907043457, "learning_rate": 4.1094235253127374e-07, "loss": 0.0061, "reward": 0.7930427435785532, "reward_std": 0.9500019550323486, "step": 129 }, { "clip_fraction": 0.0, "completion_length": 2704.125045776367, "dapo/avg_reward_std": 0.33850957382292973, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5158730284089134, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 47.291666666666664, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14857142857142858, "grad_norm": 0.012839604169130325, "kl": 0.0004292726516723633, "learning_rate": 4.034943304942796e-07, "loss": 0.0353, "reward": 0.6285950914025307, "reward_std": 0.9615181535482407, "step": 130 }, { "clip_fraction": 0.0, "completion_length": 2475.5104370117188, "dapo/avg_reward_std": 0.30972740189595654, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4696969762444496, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.82738095238095, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14971428571428572, "grad_norm": 0.00980184692889452, "kl": 0.00036525726318359375, "learning_rate": 3.9609093550344907e-07, "loss": -0.0176, "reward": 0.7969067245721817, "reward_std": 0.9501392021775246, "step": 131 }, { "clip_fraction": 0.0, "completion_length": 2550.0034790039062, "dapo/avg_reward_std": 0.2723326214722225, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30952381661960054, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 29.999999999999993, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15085714285714286, "grad_norm": 0.017491888254880905, "kl": 0.00042629241943359375, "learning_rate": 3.8873442270461485e-07, "loss": 0.0772, "reward": 0.6202478259801865, "reward_std": 0.9556004330515862, "step": 132 }, { "clip_fraction": 0.0, "completion_length": 2056.5000534057617, "dapo/avg_reward_std": 0.3334644228219986, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5333333447575569, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 47.916666666666664, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.152, "grad_norm": 0.012553170323371887, "kl": 0.0004407167434692383, "learning_rate": 3.8142703296283953e-07, "loss": -0.0185, "reward": 0.7429189011454582, "reward_std": 1.0187850967049599, "step": 133 }, { "clip_fraction": 0.0, "completion_length": 2446.9270935058594, "dapo/avg_reward_std": 0.28044995562783603, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32758621254871634, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 32.41071428571428, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15314285714285714, "grad_norm": 0.010931877419352531, "kl": 0.00035947561264038086, "learning_rate": 3.7417099217982686e-07, "loss": 0.0372, "reward": 0.6385626457631588, "reward_std": 0.9372833296656609, "step": 134 }, { "clip_fraction": 0.0, "completion_length": 2216.4479370117188, "dapo/avg_reward_std": 0.3021530819435914, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40972223070760566, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 47.291666666666664, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15428571428571428, "grad_norm": 0.014175104908645153, "kl": 0.00040656328201293945, "learning_rate": 3.6696851061588994e-07, "loss": 0.0637, "reward": 0.6612157337367535, "reward_std": 0.9335344135761261, "step": 135 }, { "clip_fraction": 0.0, "completion_length": 1879.3889083862305, "dapo/avg_reward_std": 0.24328701660550875, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36206897219707224, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 31.77083333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15542857142857142, "grad_norm": 0.011906570754945278, "kl": 0.0002865791320800781, "learning_rate": 3.5982178221668533e-07, "loss": 0.0254, "reward": 0.621966740116477, "reward_std": 0.9788949191570282, "step": 136 }, { "clip_fraction": 0.0, "completion_length": 2137.1180725097656, "dapo/avg_reward_std": 0.19872227481433324, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2857142903975078, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 26.180555555555557, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15657142857142858, "grad_norm": 0.011921432800590992, "kl": 0.000335007905960083, "learning_rate": 3.5273298394491515e-07, "loss": 0.0425, "reward": 0.8858193010091782, "reward_std": 0.960886999964714, "step": 137 }, { "clip_fraction": 0.0, "completion_length": 2133.6875381469727, "dapo/avg_reward_std": 0.30985672700972783, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5079365216550373, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 50.41666666666666, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15771428571428572, "grad_norm": 0.011959100142121315, "kl": 0.00031131505966186523, "learning_rate": 3.45704275117204e-07, "loss": 0.0473, "reward": 0.8114638328552246, "reward_std": 0.9208285436034203, "step": 138 }, { "clip_fraction": 0.0, "completion_length": 1887.6423797607422, "dapo/avg_reward_std": 0.30113077312707903, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45833334401249887, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 47.916666666666664, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15885714285714286, "grad_norm": 0.01248843315988779, "kl": 0.0003123283386230469, "learning_rate": 3.387377967463493e-07, "loss": 0.0133, "reward": 0.4802711680531502, "reward_std": 0.9749159812927246, "step": 139 }, { "clip_fraction": 0.0, "completion_length": 1555.0173835754395, "dapo/avg_reward_std": 0.2354262595375379, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3750000074505806, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 58.05555555555555, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16, "grad_norm": 0.01651233807206154, "kl": 0.0003075599670410156, "learning_rate": 3.3183567088914833e-07, "loss": -0.0302, "reward": 0.8893436007201672, "reward_std": 0.9632327631115913, "step": 140 }, { "clip_fraction": 0.0, "completion_length": 2886.878517150879, "dapo/avg_reward_std": 0.2881770460378556, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48412698933056425, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 53.12499999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16114285714285714, "grad_norm": 0.010870919562876225, "kl": 0.0004533529281616211, "learning_rate": 3.250000000000001e-07, "loss": 0.0545, "reward": 0.612054293975234, "reward_std": 0.9482586532831192, "step": 141 }, { "clip_fraction": 0.0, "completion_length": 1937.1458740234375, "dapo/avg_reward_std": 0.3629945723906807, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.47826088057911914, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 47.39583333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16228571428571428, "grad_norm": 0.011180016212165356, "kl": 0.00029921531677246094, "learning_rate": 3.182328662904756e-07, "loss": -0.0113, "reward": 0.6175431702286005, "reward_std": 0.9589766189455986, "step": 142 }, { "clip_fraction": 0.0, "completion_length": 2465.3159942626953, "dapo/avg_reward_std": 0.3803708272821763, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5588235381771537, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 60.41666666666666, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16342857142857142, "grad_norm": 0.012431374751031399, "kl": 0.00048232078552246094, "learning_rate": 3.115363310950578e-07, "loss": 0.0679, "reward": 0.7579541122540832, "reward_std": 0.9723308756947517, "step": 143 }, { "clip_fraction": 0.0, "completion_length": 2344.1180419921875, "dapo/avg_reward_std": 0.21175828889796608, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26315790179528686, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 25.347222222222218, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16457142857142856, "grad_norm": 0.010860033333301544, "kl": 0.0004448890686035156, "learning_rate": 3.0491243424323783e-07, "loss": -0.0005, "reward": 0.5643926626071334, "reward_std": 0.9328553825616837, "step": 144 }, { "clip_fraction": 0.0, "completion_length": 2482.1389389038086, "dapo/avg_reward_std": 0.37865253537893295, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.6145833432674408, "dapo/num_sampling_attempts": 2.0, "dapo/sampling_efficiency": 58.33333333333333, "dapo/total_prompts_processed": 12.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1657142857142857, "grad_norm": 0.011065399274230003, "kl": 0.0004658699035644531, "learning_rate": 2.9836319343816397e-07, "loss": 0.0412, "reward": 0.8742740526795387, "reward_std": 0.9688765779137611, "step": 145 }, { "clip_fraction": 0.0, "completion_length": 2297.6806030273438, "dapo/avg_reward_std": 0.40159281912971945, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.627450992955881, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 56.24999999999999, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16685714285714287, "grad_norm": 0.014852684922516346, "kl": 0.00038546323776245117, "learning_rate": 2.918906036420294e-07, "loss": 0.1043, "reward": 0.7259054481983185, "reward_std": 0.9452414810657501, "step": 146 }, { "clip_fraction": 0.0, "completion_length": 2485.2639389038086, "dapo/avg_reward_std": 0.2594580222731051, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42028986435869464, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 43.125, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.168, "grad_norm": 0.011770485900342464, "kl": 0.00037994980812072754, "learning_rate": 2.854966364683872e-07, "loss": 0.0414, "reward": 0.596230074763298, "reward_std": 0.944911852478981, "step": 147 }, { "clip_fraction": 0.0, "completion_length": 2030.6180877685547, "dapo/avg_reward_std": 0.28245899453759193, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.43055556528270245, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 47.70833333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16914285714285715, "grad_norm": 0.010600890032947063, "kl": 0.0003261566162109375, "learning_rate": 2.791832395815782e-07, "loss": 0.018, "reward": 0.5254655107855797, "reward_std": 0.9357841089367867, "step": 148 }, { "clip_fraction": 0.0, "completion_length": 2956.184051513672, "dapo/avg_reward_std": 0.3112214480837186, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44444445334374905, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 42.08333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1702857142857143, "grad_norm": 0.010259653441607952, "kl": 0.00048613548278808594, "learning_rate": 2.729523361034538e-07, "loss": 0.0339, "reward": 0.6315554305911064, "reward_std": 0.9876029044389725, "step": 149 }, { "clip_fraction": 0.0, "completion_length": 2855.0520629882812, "dapo/avg_reward_std": 0.32461989257070756, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5370370447635651, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 55.20833333333333, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17142857142857143, "grad_norm": 0.011087276972830296, "kl": 0.0005285739898681641, "learning_rate": 2.6680582402757324e-07, "loss": 0.054, "reward": 0.80087810754776, "reward_std": 1.0038108006119728, "step": 150 }, { "clip_fraction": 0.0, "completion_length": 2653.5834197998047, "dapo/avg_reward_std": 0.24287073779851198, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36979167396202683, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 30.327380952380953, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17257142857142857, "grad_norm": 0.011102661490440369, "kl": 0.0005296468734741211, "learning_rate": 2.6074557564105724e-07, "loss": 0.0527, "reward": 0.7124785147607327, "reward_std": 0.9657682925462723, "step": 151 }, { "clip_fraction": 0.0, "completion_length": 2141.173614501953, "dapo/avg_reward_std": 0.25965497308763963, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333396706088, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 36.875, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1737142857142857, "grad_norm": 0.01081050094217062, "kl": 0.00039577484130859375, "learning_rate": 2.547734369542718e-07, "loss": 0.0232, "reward": 0.5607589241117239, "reward_std": 0.9106607139110565, "step": 152 }, { "clip_fraction": 0.0, "completion_length": 2218.2882347106934, "dapo/avg_reward_std": 0.24554675072431564, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.39583333767950535, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 48.75, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17485714285714285, "grad_norm": 0.01474699191749096, "kl": 0.000436246395111084, "learning_rate": 2.488912271385139e-07, "loss": 0.0585, "reward": 0.4214355852454901, "reward_std": 0.9400415197014809, "step": 153 }, { "clip_fraction": 0.0, "completion_length": 2466.3368377685547, "dapo/avg_reward_std": 0.3308070342649113, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46212121776559134, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 38.541666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.176, "grad_norm": 0.011210402473807335, "kl": 0.0004417896270751953, "learning_rate": 2.4310073797187573e-07, "loss": -0.0244, "reward": 0.7323229797184467, "reward_std": 0.9493635967373848, "step": 154 }, { "clip_fraction": 0.0, "completion_length": 2012.8715438842773, "dapo/avg_reward_std": 0.2809670078754425, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4133333426713943, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 35.83333333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17714285714285713, "grad_norm": 0.01654733158648014, "kl": 0.00036275386810302734, "learning_rate": 2.374037332934512e-07, "loss": 0.0589, "reward": 0.6634213328361511, "reward_std": 0.8785304054617882, "step": 155 }, { "clip_fraction": 0.0, "completion_length": 2291.3021240234375, "dapo/avg_reward_std": 0.3599580733672432, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44927537182103033, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 45.53571428571428, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1782857142857143, "grad_norm": 0.011936171911656857, "kl": 0.00043827295303344727, "learning_rate": 2.3180194846605364e-07, "loss": 0.0699, "reward": 0.8599490560591221, "reward_std": 0.9719394743442535, "step": 156 }, { "clip_fraction": 0.0, "completion_length": 2499.791702270508, "dapo/avg_reward_std": 0.3457585884766145, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5378787998448719, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17942857142857144, "grad_norm": 0.01289551891386509, "kl": 0.00048601627349853516, "learning_rate": 2.2629708984760706e-07, "loss": 0.0584, "reward": 0.6511420179158449, "reward_std": 0.9461185112595558, "step": 157 }, { "clip_fraction": 0.0, "completion_length": 2437.9375228881836, "dapo/avg_reward_std": 0.23957703853475637, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36781609829129847, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 37.291666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18057142857142858, "grad_norm": 0.012769551016390324, "kl": 0.0004298686981201172, "learning_rate": 2.2089083427137329e-07, "loss": 0.0258, "reward": 0.6606059782207012, "reward_std": 0.9088018089532852, "step": 158 }, { "clip_fraction": 0.0, "completion_length": 1726.5868225097656, "dapo/avg_reward_std": 0.3139249332573103, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3913043543048527, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 55.416666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18171428571428572, "grad_norm": 0.013688490726053715, "kl": 0.00027683377265930176, "learning_rate": 2.1558482853517253e-07, "loss": 0.0506, "reward": 0.7147123599424958, "reward_std": 0.9531080722808838, "step": 159 }, { "clip_fraction": 0.0, "completion_length": 1593.003475189209, "dapo/avg_reward_std": 0.2799004193010001, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33908046782016754, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 45.3125, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18285714285714286, "grad_norm": 0.020229365676641464, "kl": 0.00033217668533325195, "learning_rate": 2.1038068889975259e-07, "loss": 0.0296, "reward": 0.7677402682602406, "reward_std": 0.9385703578591347, "step": 160 }, { "clip_fraction": 0.0, "completion_length": 1877.9444274902344, "dapo/avg_reward_std": 0.36716995636622113, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4930555621782939, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 44.49404761904761, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.184, "grad_norm": 0.012556586414575577, "kl": 0.00037413835525512695, "learning_rate": 2.0528000059645995e-07, "loss": 0.0401, "reward": 0.6385876163840294, "reward_std": 0.9741755276918411, "step": 161 }, { "clip_fraction": 0.0, "completion_length": 2543.1145782470703, "dapo/avg_reward_std": 0.20304026060244618, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28921569226419225, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 26.249999999999996, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18514285714285714, "grad_norm": 0.010984732769429684, "kl": 0.0005058050155639648, "learning_rate": 2.0028431734436308e-07, "loss": 0.0214, "reward": 0.8138710260391235, "reward_std": 0.937220610678196, "step": 162 }, { "clip_fraction": 0.0, "completion_length": 2579.7916946411133, "dapo/avg_reward_std": 0.2669851701049244, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333397612852, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 31.38888888888889, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18628571428571428, "grad_norm": 0.01393849402666092, "kl": 0.0005407929420471191, "learning_rate": 1.9539516087697517e-07, "loss": 0.0557, "reward": 0.6086596520617604, "reward_std": 0.9360831007361412, "step": 163 }, { "clip_fraction": 0.0, "completion_length": 2303.781295776367, "dapo/avg_reward_std": 0.2889538109302521, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40384616129673445, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18742857142857142, "grad_norm": 0.012467012740671635, "kl": 0.0005753040313720703, "learning_rate": 1.9061402047871833e-07, "loss": 0.0286, "reward": 0.7579413987696171, "reward_std": 0.966604009270668, "step": 164 }, { "clip_fraction": 0.0, "completion_length": 2215.8715744018555, "dapo/avg_reward_std": 0.2284111071910177, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3630952446588448, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 49.72222222222222, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18857142857142858, "grad_norm": 0.013376005925238132, "kl": 0.00038570165634155273, "learning_rate": 1.8594235253127372e-07, "loss": 0.0737, "reward": 0.6369971446692944, "reward_std": 0.944696456193924, "step": 165 }, { "clip_fraction": 0.0, "completion_length": 2194.999984741211, "dapo/avg_reward_std": 0.35230770577555115, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5289855158847311, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 40.416666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18971428571428572, "grad_norm": 0.00896221399307251, "kl": 0.0004324018955230713, "learning_rate": 1.8138158006995363e-07, "loss": -0.0087, "reward": 0.770520705729723, "reward_std": 0.9258415997028351, "step": 166 }, { "clip_fraction": 0.0, "completion_length": 2363.9861373901367, "dapo/avg_reward_std": 0.23058613193662544, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2763157930029066, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 30.44642857142857, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19085714285714286, "grad_norm": 0.011913989670574665, "kl": 0.0005799531936645508, "learning_rate": 1.7693309235023127e-07, "loss": 0.0282, "reward": 0.8937316909432411, "reward_std": 0.9134809225797653, "step": 167 }, { "clip_fraction": 0.0, "completion_length": 1846.3229217529297, "dapo/avg_reward_std": 0.2788652099412063, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37931035459041595, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 33.03571428571428, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.192, "grad_norm": 0.013345438055694103, "kl": 0.00038933753967285156, "learning_rate": 1.7259824442455923e-07, "loss": 0.0657, "reward": 0.5173812105786055, "reward_std": 0.9046202600002289, "step": 168 }, { "clip_fraction": 0.0, "completion_length": 1632.9965515136719, "dapo/avg_reward_std": 0.33004767837978544, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.49206350318023134, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 51.45833333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19314285714285714, "grad_norm": 0.016018711030483246, "kl": 0.0004235506057739258, "learning_rate": 1.6837835672960831e-07, "loss": -0.0266, "reward": 0.7293304707854986, "reward_std": 0.9580913484096527, "step": 169 }, { "clip_fraction": 0.0, "completion_length": 2218.357666015625, "dapo/avg_reward_std": 0.30882045084779913, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4242424287579276, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 49.37499999999999, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19428571428571428, "grad_norm": 0.012691031210124493, "kl": 0.0005915164947509766, "learning_rate": 1.6427471468404952e-07, "loss": 0.0375, "reward": 0.731636168435216, "reward_std": 0.9506037011742592, "step": 170 }, { "clip_fraction": 0.0, "completion_length": 2086.989585876465, "dapo/avg_reward_std": 0.26685478786627453, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.372222230831782, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 36.45833333333333, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19542857142857142, "grad_norm": 0.0107533298432827, "kl": 0.00045359134674072266, "learning_rate": 1.6028856829700258e-07, "loss": 0.0268, "reward": 0.6401270348578691, "reward_std": 0.9421326443552971, "step": 171 }, { "clip_fraction": 0.0, "completion_length": 1523.298625946045, "dapo/avg_reward_std": 0.2958875367274651, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4294871888481654, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19657142857142856, "grad_norm": 0.02487981878221035, "kl": 0.00044208765029907227, "learning_rate": 1.5642113178727193e-07, "loss": 0.0215, "reward": 0.5742892920970917, "reward_std": 0.9192508533596992, "step": 172 }, { "clip_fraction": 0.0, "completion_length": 2197.4722290039062, "dapo/avg_reward_std": 0.33716599914160644, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4545454619960351, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.20833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1977142857142857, "grad_norm": 0.00999497715383768, "kl": 0.0006158351898193359, "learning_rate": 1.5267358321348285e-07, "loss": -0.0198, "reward": 0.6909432113170624, "reward_std": 0.9331774786114693, "step": 173 }, { "clip_fraction": 0.0, "completion_length": 2469.1909942626953, "dapo/avg_reward_std": 0.31674497947096825, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4722222325702508, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 47.61904761904762, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19885714285714284, "grad_norm": 0.027324816212058067, "kl": 0.0005202293395996094, "learning_rate": 1.4904706411523448e-07, "loss": 0.1381, "reward": 0.7919853329658508, "reward_std": 0.9734821692109108, "step": 174 }, { "clip_fraction": 0.0, "completion_length": 2290.7292098999023, "dapo/avg_reward_std": 0.2796748812709536, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41071429369705065, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 31.041666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2, "grad_norm": 0.011332061141729355, "kl": 0.000499039888381958, "learning_rate": 1.4554267916537495e-07, "loss": 0.0026, "reward": 0.5971913021057844, "reward_std": 0.9767839089035988, "step": 175 }, { "clip_fraction": 0.0, "completion_length": 2643.475685119629, "dapo/avg_reward_std": 0.30459834399976227, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4649122922044051, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 54.58333333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20114285714285715, "grad_norm": 0.011058920994400978, "kl": 0.0006421804428100586, "learning_rate": 1.4216149583350755e-07, "loss": 0.0243, "reward": 0.801079198718071, "reward_std": 1.0328236892819405, "step": 176 }, { "clip_fraction": 0.0, "completion_length": 2657.517364501953, "dapo/avg_reward_std": 0.268055671826005, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3072916711680591, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 30.32738095238095, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2022857142857143, "grad_norm": 0.012514113448560238, "kl": 0.0006227493286132812, "learning_rate": 1.3890454406082956e-07, "loss": 0.066, "reward": 0.5342087037861347, "reward_std": 0.9403787776827812, "step": 177 }, { "clip_fraction": 0.0, "completion_length": 1730.2395935058594, "dapo/avg_reward_std": 0.22906314557598484, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.354838716406976, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 49.99999999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20342857142857143, "grad_norm": 0.013909725472331047, "kl": 0.0004641413688659668, "learning_rate": 1.3577281594640182e-07, "loss": -0.0032, "reward": 0.817855941131711, "reward_std": 0.9715805351734161, "step": 178 }, { "clip_fraction": 0.0, "completion_length": 1916.9652633666992, "dapo/avg_reward_std": 0.33905652307328726, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000085149493, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 49.99999999999999, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20457142857142857, "grad_norm": 0.010170280002057552, "kl": 0.00033092498779296875, "learning_rate": 1.3276726544494571e-07, "loss": 0.0153, "reward": 0.6332587338984013, "reward_std": 0.9844094663858414, "step": 179 }, { "clip_fraction": 0.0, "completion_length": 2013.7534942626953, "dapo/avg_reward_std": 0.4115603660282336, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5175438719360452, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2057142857142857, "grad_norm": 0.010059732012450695, "kl": 0.0004872828722000122, "learning_rate": 1.2988880807625927e-07, "loss": 0.012, "reward": 0.7964395936578512, "reward_std": 0.9064052030444145, "step": 180 }, { "clip_fraction": 0.0, "completion_length": 2538.3159713745117, "dapo/avg_reward_std": 0.3185795678032769, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3703703775450035, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.0, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20685714285714285, "grad_norm": 0.009190794080495834, "kl": 0.0005941390991210938, "learning_rate": 1.2713832064634125e-07, "loss": -0.0091, "reward": 0.6052752519026399, "reward_std": 0.9398948326706886, "step": 181 }, { "clip_fraction": 0.0, "completion_length": 1992.0277557373047, "dapo/avg_reward_std": 0.30058977752923965, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3392857238650322, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 45.32738095238095, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.208, "grad_norm": 0.017918387427926064, "kl": 0.00043332576751708984, "learning_rate": 1.2451664098030743e-07, "loss": 0.0782, "reward": 0.7308525424450636, "reward_std": 0.8988610878586769, "step": 182 }, { "clip_fraction": 0.0, "completion_length": 2368.312515258789, "dapo/avg_reward_std": 0.2227620858213176, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40579710317694623, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 48.33333333333333, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20914285714285713, "grad_norm": 0.01093615498393774, "kl": 0.0005226731300354004, "learning_rate": 1.220245676671809e-07, "loss": -0.0097, "reward": 0.6296821031719446, "reward_std": 0.9496165588498116, "step": 183 }, { "clip_fraction": 0.0, "completion_length": 1855.0486297607422, "dapo/avg_reward_std": 0.3308859848976135, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4133333384990692, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2102857142857143, "grad_norm": 0.013805963099002838, "kl": 0.0004195570945739746, "learning_rate": 1.1966285981663407e-07, "loss": 0.0542, "reward": 0.8230033777654171, "reward_std": 0.9269852489233017, "step": 184 }, { "clip_fraction": 0.0, "completion_length": 2737.260452270508, "dapo/avg_reward_std": 0.3074522775908311, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.45138889489074546, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 49.37499999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21142857142857144, "grad_norm": 0.01179632730782032, "kl": 0.0006718635559082031, "learning_rate": 1.1743223682775649e-07, "loss": 0.0529, "reward": 0.6228375509381294, "reward_std": 0.9775977432727814, "step": 185 }, { "clip_fraction": 0.0, "completion_length": 2526.899368286133, "dapo/avg_reward_std": 0.2964219942688942, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.48333334401249883, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 58.33333333333333, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21257142857142858, "grad_norm": 0.014796112664043903, "kl": 0.0005816221237182617, "learning_rate": 1.1533337816991931e-07, "loss": 0.088, "reward": 0.8448536917567253, "reward_std": 0.9608767181634903, "step": 186 }, { "clip_fraction": 0.0, "completion_length": 2288.274345397949, "dapo/avg_reward_std": 0.3166468055159957, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34567901823255753, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.972222222222214, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21371428571428572, "grad_norm": 0.011898735538125038, "kl": 0.000521540641784668, "learning_rate": 1.1336692317580158e-07, "loss": 0.0415, "reward": 0.7687236070632935, "reward_std": 0.9334599822759628, "step": 187 }, { "clip_fraction": 0.0, "completion_length": 2432.531265258789, "dapo/avg_reward_std": 0.28751447051763535, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4513888979951541, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 53.33333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21485714285714286, "grad_norm": 0.010497819632291794, "kl": 0.0007112026214599609, "learning_rate": 1.1153347084664419e-07, "loss": 0.0185, "reward": 0.7899295631796122, "reward_std": 0.9512373134493828, "step": 188 }, { "clip_fraction": 0.0, "completion_length": 1948.9167022705078, "dapo/avg_reward_std": 0.30568089832862216, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46527779040237266, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 36.87499999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.216, "grad_norm": 0.013562222942709923, "kl": 0.0006091594696044922, "learning_rate": 1.0983357966978745e-07, "loss": 0.0388, "reward": 0.6485428418964148, "reward_std": 0.9110815972089767, "step": 189 }, { "clip_fraction": 0.0, "completion_length": 2494.395866394043, "dapo/avg_reward_std": 0.27111421525478363, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3863636404275894, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 45.20833333333333, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21714285714285714, "grad_norm": 0.00931188277900219, "kl": 0.0006044209003448486, "learning_rate": 1.0826776744855121e-07, "loss": 0.0024, "reward": 0.5944220442324877, "reward_std": 0.9433802142739296, "step": 190 }, { "clip_fraction": 0.0, "completion_length": 2601.7569427490234, "dapo/avg_reward_std": 0.3233232215046883, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.49166667386889457, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 49.375, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21828571428571428, "grad_norm": 0.011869938112795353, "kl": 0.0006383061408996582, "learning_rate": 1.068365111445064e-07, "loss": 0.0221, "reward": 0.5644997656345367, "reward_std": 0.9473884925246239, "step": 191 }, { "clip_fraction": 0.0, "completion_length": 1624.8541564941406, "dapo/avg_reward_std": 0.33193936944007874, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.46969697692177514, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 44.791666666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21942857142857142, "grad_norm": 0.011828861199319363, "kl": 0.0003381967544555664, "learning_rate": 1.0554024673218806e-07, "loss": -0.0125, "reward": 0.7034952798858285, "reward_std": 0.9275326952338219, "step": 192 }, { "clip_fraction": 0.0, "completion_length": 2333.607650756836, "dapo/avg_reward_std": 0.4260722654206412, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.6309523891125407, "dapo/num_sampling_attempts": 1.75, "dapo/sampling_efficiency": 70.83333333333333, "dapo/total_prompts_processed": 10.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22057142857142858, "grad_norm": 0.010871903039515018, "kl": 0.0005550980567932129, "learning_rate": 1.0437936906629334e-07, "loss": -0.004, "reward": 0.4316184278577566, "reward_std": 0.9555172920227051, "step": 193 }, { "clip_fraction": 0.0, "completion_length": 2939.9097442626953, "dapo/avg_reward_std": 0.2783619257119986, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3846153932122084, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 47.39583333333333, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22171428571428572, "grad_norm": 0.014206220395863056, "kl": 0.0007078647613525391, "learning_rate": 1.0335423176140511e-07, "loss": 0.0805, "reward": 0.7283875979483128, "reward_std": 0.9719515442848206, "step": 194 }, { "clip_fraction": 0.0, "completion_length": 1945.9653244018555, "dapo/avg_reward_std": 0.3208765654187453, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5438596551355562, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 60.416666666666664, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22285714285714286, "grad_norm": 0.015090257860720158, "kl": 0.000569462776184082, "learning_rate": 1.0246514708427701e-07, "loss": -0.021, "reward": 0.5579635920003057, "reward_std": 0.9634370356798172, "step": 195 }, { "clip_fraction": 0.0, "completion_length": 2212.5902709960938, "dapo/avg_reward_std": 0.23615881362382105, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2696078498573864, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 27.916666666666664, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.224, "grad_norm": 0.012650169432163239, "kl": 0.0005346536636352539, "learning_rate": 1.017123858587145e-07, "loss": 0.0756, "reward": 0.6994661018252373, "reward_std": 0.9281085133552551, "step": 196 }, { "clip_fraction": 0.0, "completion_length": 2392.7742919921875, "dapo/avg_reward_std": 0.3088900530338287, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.406666676402092, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 45.3125, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22514285714285714, "grad_norm": 0.01346337329596281, "kl": 0.0006176233291625977, "learning_rate": 1.0109617738307911e-07, "loss": 0.0523, "reward": 0.6644653081893921, "reward_std": 0.9385305866599083, "step": 197 }, { "clip_fraction": 0.0, "completion_length": 2743.819465637207, "dapo/avg_reward_std": 0.3153854298591614, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4133333432674408, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 43.75, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22628571428571428, "grad_norm": 0.010797293856739998, "kl": 0.000672459602355957, "learning_rate": 1.0061670936044178e-07, "loss": 0.04, "reward": 0.5658168056979775, "reward_std": 0.9682240337133408, "step": 198 }, { "clip_fraction": 0.0, "completion_length": 2336.80558013916, "dapo/avg_reward_std": 0.3246711401835732, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4855072530715362, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22742857142857142, "grad_norm": 0.011765834875404835, "kl": 0.00055694580078125, "learning_rate": 1.002741278414069e-07, "loss": 0.0308, "reward": 0.6460054386407137, "reward_std": 0.9711420610547066, "step": 199 }, { "clip_fraction": 0.0, "completion_length": 2571.1875228881836, "dapo/avg_reward_std": 0.29997331152359646, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.486111119389534, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 39.285714285714285, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22857142857142856, "grad_norm": 0.009876573458313942, "kl": 0.0005443096160888672, "learning_rate": 1.0006853717962393e-07, "loss": 0.0268, "reward": 0.5957941338419914, "reward_std": 0.992652915418148, "step": 200 }, { "epoch": 0.22857142857142856, "step": 200, "total_flos": 0.0, "train_loss": 0.01698429927288089, "train_runtime": 137940.7556, "train_samples_per_second": 0.07, "train_steps_per_second": 0.001 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }