| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.22857142857142856, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1681.8854370117188, | |
| "dapo/avg_reward_std": 0.3420590679896505, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48245614610220255, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 54.58333333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.001142857142857143, | |
| "grad_norm": 0.011931957677006721, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0219, | |
| "reward": 0.8671084493398666, | |
| "reward_std": 0.964848667383194, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2172.913185119629, | |
| "dapo/avg_reward_std": 0.27327019289920207, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4824561500235608, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 67.41071428571428, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.002285714285714286, | |
| "grad_norm": 0.014162006787955761, | |
| "kl": 0.0, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0232, | |
| "reward": 0.932205643504858, | |
| "reward_std": 0.9607091471552849, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2418.3611373901367, | |
| "dapo/avg_reward_std": 0.3202404692769051, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45833334177732465, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 51.04166666666666, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.0034285714285714284, | |
| "grad_norm": 0.011303936131298542, | |
| "kl": 0.0001301020383834839, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0371, | |
| "reward": 0.5818949677050114, | |
| "reward_std": 0.928392305970192, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2080.6250228881836, | |
| "dapo/avg_reward_std": 0.3523675338788466, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4545454586094076, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.20833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.004571428571428572, | |
| "grad_norm": 0.010935964062809944, | |
| "kl": 8.246302604675293e-05, | |
| "learning_rate": 3e-07, | |
| "loss": 0.007, | |
| "reward": 0.6902085058391094, | |
| "reward_std": 0.9576746746897697, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2208.1910247802734, | |
| "dapo/avg_reward_std": 0.33842799224351583, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4912280746196446, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 54.166666666666664, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.005714285714285714, | |
| "grad_norm": 0.01424587145447731, | |
| "kl": 0.00011987239122390747, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0916, | |
| "reward": 0.5482002776116133, | |
| "reward_std": 0.9192102774977684, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2428.8646087646484, | |
| "dapo/avg_reward_std": 0.2724780907233556, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37222223381201425, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 37.39583333333333, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.006857142857142857, | |
| "grad_norm": 0.012209060601890087, | |
| "kl": 0.00013336539268493652, | |
| "learning_rate": 5e-07, | |
| "loss": 0.063, | |
| "reward": 0.6304261162877083, | |
| "reward_std": 0.947055421769619, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2028.1111297607422, | |
| "dapo/avg_reward_std": 0.35396890342235565, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5151515284722502, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.008, | |
| "grad_norm": 0.01456605363637209, | |
| "kl": 0.00010842084884643555, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0863, | |
| "reward": 0.7125897314399481, | |
| "reward_std": 0.938522607088089, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1825.9792022705078, | |
| "dapo/avg_reward_std": 0.3198123288154602, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45333334505558015, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 36.45833333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.009142857142857144, | |
| "grad_norm": 0.014117815531790257, | |
| "kl": 8.45193862915039e-05, | |
| "learning_rate": 7e-07, | |
| "loss": 0.024, | |
| "reward": 0.7728112610056996, | |
| "reward_std": 0.953309640288353, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2424.159713745117, | |
| "dapo/avg_reward_std": 0.4454919546842575, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.6785714392151151, | |
| "dapo/num_sampling_attempts": 1.75, | |
| "dapo/sampling_efficiency": 70.83333333333333, | |
| "dapo/total_prompts_processed": 10.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.010285714285714285, | |
| "grad_norm": 0.008895393460988998, | |
| "kl": 0.00011056661605834961, | |
| "learning_rate": 8e-07, | |
| "loss": 0.013, | |
| "reward": 0.6077092736959457, | |
| "reward_std": 0.994397833943367, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1959.0763702392578, | |
| "dapo/avg_reward_std": 0.25889470875263215, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.350000007947286, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 40.20833333333333, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.011428571428571429, | |
| "grad_norm": 0.011032010428607464, | |
| "kl": 8.809566497802734e-05, | |
| "learning_rate": 9e-07, | |
| "loss": 0.018, | |
| "reward": 0.7773313578218222, | |
| "reward_std": 0.9549762830138206, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2597.6979217529297, | |
| "dapo/avg_reward_std": 0.3167818512605584, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44202899284984754, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 42.70833333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.012571428571428572, | |
| "grad_norm": 0.010659257881343365, | |
| "kl": 0.00013309717178344727, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0026, | |
| "reward": 0.5649524200707674, | |
| "reward_std": 0.9257139712572098, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2214.9444580078125, | |
| "dapo/avg_reward_std": 0.33351172175672317, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5648148208856583, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 49.99999999999999, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.013714285714285714, | |
| "grad_norm": 0.010501649230718613, | |
| "kl": 9.53376293182373e-05, | |
| "learning_rate": 9.997258721585931e-07, | |
| "loss": 0.0287, | |
| "reward": 0.7854772098362446, | |
| "reward_std": 0.9361946359276772, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1984.5416717529297, | |
| "dapo/avg_reward_std": 0.3313978049490187, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5925926052861743, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 56.666666666666664, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.014857142857142857, | |
| "grad_norm": 0.012102734297513962, | |
| "kl": 9.861588478088379e-05, | |
| "learning_rate": 9.989038226169207e-07, | |
| "loss": 0.0277, | |
| "reward": 0.9007548745721579, | |
| "reward_std": 0.9196444824337959, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2267.5069885253906, | |
| "dapo/avg_reward_std": 0.21889745750847986, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3186274560935357, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 40.63988095238095, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.016, | |
| "grad_norm": 0.01004031766206026, | |
| "kl": 0.00010375678539276123, | |
| "learning_rate": 9.975348529157229e-07, | |
| "loss": 0.0342, | |
| "reward": 0.5439228732138872, | |
| "reward_std": 0.9444419518113136, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2403.170135498047, | |
| "dapo/avg_reward_std": 0.24896668710491873, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4242424321445552, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 58.45238095238095, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.017142857142857144, | |
| "grad_norm": 0.013138854876160622, | |
| "kl": 0.00011286139488220215, | |
| "learning_rate": 9.956206309337066e-07, | |
| "loss": 0.0341, | |
| "reward": 0.6446905825287104, | |
| "reward_std": 0.9305006489157677, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2368.579849243164, | |
| "dapo/avg_reward_std": 0.32238917201757433, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4416666716337204, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 53.125, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.018285714285714287, | |
| "grad_norm": 0.009644324891269207, | |
| "kl": 0.00011764466762542725, | |
| "learning_rate": 9.931634888554935e-07, | |
| "loss": 0.0184, | |
| "reward": 0.6319684982299805, | |
| "reward_std": 0.9385868087410927, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2354.590286254883, | |
| "dapo/avg_reward_std": 0.2929895012466996, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41358025482407323, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 43.95833333333333, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.019428571428571427, | |
| "grad_norm": 0.010750290006399155, | |
| "kl": 0.00012104213237762451, | |
| "learning_rate": 9.901664203302124e-07, | |
| "loss": 0.0512, | |
| "reward": 0.7495243214070797, | |
| "reward_std": 0.9604936093091965, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2353.548599243164, | |
| "dapo/avg_reward_std": 0.3144007975404913, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46212122250686993, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.5, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02057142857142857, | |
| "grad_norm": 0.0106205390766263, | |
| "kl": 0.0001283884048461914, | |
| "learning_rate": 9.866330768241983e-07, | |
| "loss": 0.0356, | |
| "reward": 0.7090531028807163, | |
| "reward_std": 0.927816279232502, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2599.90283203125, | |
| "dapo/avg_reward_std": 0.31102153037985164, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46527778667708236, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 43.125, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.021714285714285714, | |
| "grad_norm": 0.00998625811189413, | |
| "kl": 0.00011986494064331055, | |
| "learning_rate": 9.825677631722435e-07, | |
| "loss": 0.0501, | |
| "reward": 0.8357332646846771, | |
| "reward_std": 0.9608008861541748, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2307.482650756836, | |
| "dapo/avg_reward_std": 0.3105274804613807, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4545454633506862, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.022857142857142857, | |
| "grad_norm": 0.010738078504800797, | |
| "kl": 9.399652481079102e-05, | |
| "learning_rate": 9.779754323328192e-07, | |
| "loss": 0.0104, | |
| "reward": 0.7927055042237043, | |
| "reward_std": 0.9697678238153458, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1943.2500457763672, | |
| "dapo/avg_reward_std": 0.3021106570959091, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.384615390919722, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.78571428571428, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.024, | |
| "grad_norm": 0.01025764923542738, | |
| "kl": 6.92903995513916e-05, | |
| "learning_rate": 9.728616793536587e-07, | |
| "loss": 0.0005, | |
| "reward": 0.7050843685865402, | |
| "reward_std": 0.9542289972305298, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2265.222198486328, | |
| "dapo/avg_reward_std": 0.2858178478020888, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4102564144593019, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 36.160714285714285, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.025142857142857144, | |
| "grad_norm": 0.015554007142782211, | |
| "kl": 0.00011515617370605469, | |
| "learning_rate": 9.672327345550543e-07, | |
| "loss": 0.1143, | |
| "reward": 0.7392658032476902, | |
| "reward_std": 0.9592578783631325, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2213.857635498047, | |
| "dapo/avg_reward_std": 0.28609917419297354, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.410714291036129, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 38.66071428571428, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.026285714285714287, | |
| "grad_norm": 0.00819400418549776, | |
| "kl": 7.683038711547852e-05, | |
| "learning_rate": 9.610954559391704e-07, | |
| "loss": 0.018, | |
| "reward": 0.6645980039611459, | |
| "reward_std": 0.919261984527111, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1544.9930610656738, | |
| "dapo/avg_reward_std": 0.27062960465749103, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38888889948527017, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 37.20238095238095, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.027428571428571427, | |
| "grad_norm": 0.013472510501742363, | |
| "kl": 6.948411464691162e-05, | |
| "learning_rate": 9.54457320834625e-07, | |
| "loss": 0.0006, | |
| "reward": 0.6155341246630996, | |
| "reward_std": 0.9053066149353981, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2005.5104598999023, | |
| "dapo/avg_reward_std": 0.2877837224253293, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38505747760164327, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 38.75, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 0.011138558387756348, | |
| "kl": 8.162856101989746e-05, | |
| "learning_rate": 9.473264167865171e-07, | |
| "loss": 0.0493, | |
| "reward": 0.6912501659244299, | |
| "reward_std": 0.9633006453514099, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2387.5555725097656, | |
| "dapo/avg_reward_std": 0.19959817528724672, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3055555591980616, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 44.49404761904761, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.029714285714285714, | |
| "grad_norm": 0.011900709010660648, | |
| "kl": 9.435415267944336e-05, | |
| "learning_rate": 9.397114317029974e-07, | |
| "loss": 0.0815, | |
| "reward": 0.5562675036489964, | |
| "reward_std": 0.9110650941729546, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2044.7292137145996, | |
| "dapo/avg_reward_std": 0.3619746658951044, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.6354166744276881, | |
| "dapo/num_sampling_attempts": 2.0, | |
| "dapo/sampling_efficiency": 69.16666666666666, | |
| "dapo/total_prompts_processed": 12.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.030857142857142857, | |
| "grad_norm": 0.01303341705352068, | |
| "kl": 8.736550807952881e-05, | |
| "learning_rate": 9.316216432703916e-07, | |
| "loss": 0.0141, | |
| "reward": 0.7769045419991016, | |
| "reward_std": 0.9760870188474655, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2458.9305572509766, | |
| "dapo/avg_reward_std": 0.2839898039465365, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.427536239442618, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 42.08333333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.032, | |
| "grad_norm": 0.013889433816075325, | |
| "kl": 0.00014150142669677734, | |
| "learning_rate": 9.230669076497687e-07, | |
| "loss": 0.0479, | |
| "reward": 0.5980293937027454, | |
| "reward_std": 0.9796791076660156, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2496.451416015625, | |
| "dapo/avg_reward_std": 0.35542283952236176, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5648148175742891, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 67.5, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03314285714285714, | |
| "grad_norm": 0.011365516111254692, | |
| "kl": 0.00010502338409423828, | |
| "learning_rate": 9.140576474687263e-07, | |
| "loss": 0.0278, | |
| "reward": 0.6495406329631805, | |
| "reward_std": 0.9649527370929718, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1831.333351135254, | |
| "dapo/avg_reward_std": 0.2628121712933416, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41304348279600556, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 60.625, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03428571428571429, | |
| "grad_norm": 0.012428080663084984, | |
| "kl": 8.240342140197754e-05, | |
| "learning_rate": 9.046048391230247e-07, | |
| "loss": 0.0408, | |
| "reward": 0.7913381233811378, | |
| "reward_std": 0.9801043272018433, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2105.7118225097656, | |
| "dapo/avg_reward_std": 0.2843361473083496, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4266666781902313, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 53.75, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.03542857142857143, | |
| "grad_norm": 0.016210218891501427, | |
| "kl": 0.0001112520694732666, | |
| "learning_rate": 8.9471999940354e-07, | |
| "loss": 0.1052, | |
| "reward": 0.5814057979732752, | |
| "reward_std": 0.9699539840221405, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2366.718818664551, | |
| "dapo/avg_reward_std": 0.2371666719173563, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34482759648355943, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 38.4375, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.036571428571428574, | |
| "grad_norm": 0.01111757755279541, | |
| "kl": 0.00011564791202545166, | |
| "learning_rate": 8.844151714648274e-07, | |
| "loss": 0.0379, | |
| "reward": 0.6102676652371883, | |
| "reward_std": 0.9229060783982277, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2388.1909942626953, | |
| "dapo/avg_reward_std": 0.29336222237156284, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3118279624369837, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 42.1875, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.037714285714285714, | |
| "grad_norm": 0.01051933504641056, | |
| "kl": 9.141862392425537e-05, | |
| "learning_rate": 8.737029101523929e-07, | |
| "loss": 0.041, | |
| "reward": 0.6971308812499046, | |
| "reward_std": 0.9577681049704552, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2259.065963745117, | |
| "dapo/avg_reward_std": 0.3195795826613903, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5833333367481828, | |
| "dapo/num_sampling_attempts": 2.0, | |
| "dapo/sampling_efficiency": 62.49999999999999, | |
| "dapo/total_prompts_processed": 12.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.038857142857142854, | |
| "grad_norm": 0.010114133358001709, | |
| "kl": 9.936094284057617e-05, | |
| "learning_rate": 8.625962667065487e-07, | |
| "loss": 0.0019, | |
| "reward": 0.706351961940527, | |
| "reward_std": 0.9608398601412773, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2236.6563262939453, | |
| "dapo/avg_reward_std": 0.2805841226002266, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33908046679250126, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 30.952380952380942, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04, | |
| "grad_norm": 0.01071652490645647, | |
| "kl": 0.00013333559036254883, | |
| "learning_rate": 8.511087728614862e-07, | |
| "loss": 0.0108, | |
| "reward": 0.6857370678335428, | |
| "reward_std": 0.9366307482123375, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1998.9166717529297, | |
| "dapo/avg_reward_std": 0.30676539919593115, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4772727360779589, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 44.791666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04114285714285714, | |
| "grad_norm": 0.011716869659721851, | |
| "kl": 0.00010579824447631836, | |
| "learning_rate": 8.392544243589427e-07, | |
| "loss": 0.0577, | |
| "reward": 0.8430320359766483, | |
| "reward_std": 0.8613111302256584, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2699.8819580078125, | |
| "dapo/avg_reward_std": 0.280869146873211, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36781610034663104, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 36.45833333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04228571428571429, | |
| "grad_norm": 0.011984186246991158, | |
| "kl": 0.00011450052261352539, | |
| "learning_rate": 8.270476638965461e-07, | |
| "loss": 0.0641, | |
| "reward": 0.6952194459736347, | |
| "reward_std": 0.9531055390834808, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2508.343765258789, | |
| "dapo/avg_reward_std": 0.3086147890204475, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44444445485160466, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04342857142857143, | |
| "grad_norm": 0.014813189394772053, | |
| "kl": 0.00013363361358642578, | |
| "learning_rate": 8.145033635316128e-07, | |
| "loss": 0.0815, | |
| "reward": 0.6981049925088882, | |
| "reward_std": 0.9795023873448372, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2568.090286254883, | |
| "dapo/avg_reward_std": 0.2281228665149573, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30303030799735675, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 35.3125, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.044571428571428574, | |
| "grad_norm": 0.010284055955708027, | |
| "kl": 0.0001270771026611328, | |
| "learning_rate": 8.01636806561836e-07, | |
| "loss": 0.0129, | |
| "reward": 0.5480891708284616, | |
| "reward_std": 0.9542658925056458, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2255.0798721313477, | |
| "dapo/avg_reward_std": 0.3315709355202588, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46969698437235574, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.20833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 0.01235182024538517, | |
| "kl": 0.00011420249938964844, | |
| "learning_rate": 7.884636689049422e-07, | |
| "loss": 0.0472, | |
| "reward": 0.8707308620214462, | |
| "reward_std": 0.9157829731702805, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2417.9444427490234, | |
| "dapo/avg_reward_std": 0.2831250044607347, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3655914020153784, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 37.723214285714285, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.046857142857142854, | |
| "grad_norm": 0.010439831763505936, | |
| "kl": 0.00012230873107910156, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.0395, | |
| "reward": 0.7518008537590504, | |
| "reward_std": 0.9689745083451271, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2325.5937881469727, | |
| "dapo/avg_reward_std": 0.28424168271677835, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3869047707745007, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 33.75, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.048, | |
| "grad_norm": 0.010445328429341316, | |
| "kl": 8.326023817062378e-05, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.0004, | |
| "reward": 0.6408937154337764, | |
| "reward_std": 0.9007892906665802, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2423.9617919921875, | |
| "dapo/avg_reward_std": 0.28680659715945905, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4038461624429776, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 46.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.04914285714285714, | |
| "grad_norm": 0.010229532606899738, | |
| "kl": 0.00013530254364013672, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": 0.0104, | |
| "reward": 0.6538480781018734, | |
| "reward_std": 0.9688718169927597, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2088.677085876465, | |
| "dapo/avg_reward_std": 0.3208466252455345, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4423077031970024, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05028571428571429, | |
| "grad_norm": 0.011106742545962334, | |
| "kl": 0.00012566149234771729, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.0239, | |
| "reward": 0.8764502704143524, | |
| "reward_std": 0.9285347983241081, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1721.781234741211, | |
| "dapo/avg_reward_std": 0.3683280497789383, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5083333447575569, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 47.916666666666664, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05142857142857143, | |
| "grad_norm": 0.01152133010327816, | |
| "kl": 7.429718971252441e-05, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.0259, | |
| "reward": 0.8203496672213078, | |
| "reward_std": 0.9882074818015099, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3020.9757232666016, | |
| "dapo/avg_reward_std": 0.294668085873127, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37500000691839624, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 38.660714285714285, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.052571428571428575, | |
| "grad_norm": 0.009526599198579788, | |
| "kl": 0.00014853477478027344, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": 0.0314, | |
| "reward": 0.6035567373037338, | |
| "reward_std": 0.9617942646145821, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2869.8958892822266, | |
| "dapo/avg_reward_std": 0.37419558623257804, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5196078481043086, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 66.66666666666666, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.053714285714285714, | |
| "grad_norm": 0.008854555897414684, | |
| "kl": 0.00012740492820739746, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.0266, | |
| "reward": 0.5126286232843995, | |
| "reward_std": 0.9323688969016075, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1974.5069999694824, | |
| "dapo/avg_reward_std": 0.31826632221539813, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42361111628512543, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 43.541666666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.054857142857142854, | |
| "grad_norm": 0.012630482204258442, | |
| "kl": 0.00011485815048217773, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.0259, | |
| "reward": 0.7998449765145779, | |
| "reward_std": 0.9614248275756836, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2775.854164123535, | |
| "dapo/avg_reward_std": 0.24803236694563002, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41269841435409726, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 65.97222222222223, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.056, | |
| "grad_norm": 0.0115203270688653, | |
| "kl": 0.00010813772678375244, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": 0.0626, | |
| "reward": 0.5735284592956305, | |
| "reward_std": 0.9657324403524399, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2555.2743377685547, | |
| "dapo/avg_reward_std": 0.3077625359098117, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.423611119389534, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 48.33333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 0.012258801609277725, | |
| "kl": 0.00013893842697143555, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": 0.0575, | |
| "reward": 0.8288873583078384, | |
| "reward_std": 0.950613297522068, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2645.576400756836, | |
| "dapo/avg_reward_std": 0.3462034153441588, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4236111169060071, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 39.99999999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05828571428571429, | |
| "grad_norm": 0.01161988079547882, | |
| "kl": 0.0001646280288696289, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.046, | |
| "reward": 0.46879277005791664, | |
| "reward_std": 0.9387945607304573, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2043.677101135254, | |
| "dapo/avg_reward_std": 0.3387378570826157, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4347826171180476, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.05942857142857143, | |
| "grad_norm": 0.011719447560608387, | |
| "kl": 0.00012214481830596924, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0093, | |
| "reward": 0.7487262971699238, | |
| "reward_std": 0.9444489181041718, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2277.902801513672, | |
| "dapo/avg_reward_std": 0.269059170936716, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37356322695469035, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 41.88988095238095, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.060571428571428575, | |
| "grad_norm": 0.012477328069508076, | |
| "kl": 0.00015044212341308594, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.048, | |
| "reward": 0.6608240492641926, | |
| "reward_std": 0.9770755022764206, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2374.232635498047, | |
| "dapo/avg_reward_std": 0.34054997433786804, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.500000013605408, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 37.916666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.061714285714285715, | |
| "grad_norm": 0.013303548097610474, | |
| "kl": 0.0001438036561012268, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0614, | |
| "reward": 0.75572844222188, | |
| "reward_std": 0.9565529599785805, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2442.232666015625, | |
| "dapo/avg_reward_std": 0.27056889484326047, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4097222263614337, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 45.83333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06285714285714286, | |
| "grad_norm": 0.011922283098101616, | |
| "kl": 0.00014710426330566406, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0447, | |
| "reward": 0.6145301992073655, | |
| "reward_std": 0.9308876842260361, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2163.7604064941406, | |
| "dapo/avg_reward_std": 0.306766193537485, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.47619048612458365, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 57.291666666666664, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.064, | |
| "grad_norm": 0.009786682203412056, | |
| "kl": 0.00011900067329406738, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0353, | |
| "reward": 0.7467220462858677, | |
| "reward_std": 0.9404179230332375, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1992.7430953979492, | |
| "dapo/avg_reward_std": 0.21240893006324768, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2631579006188794, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 27.708333333333332, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06514285714285714, | |
| "grad_norm": 0.015636112540960312, | |
| "kl": 0.00013278424739837646, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0652, | |
| "reward": 0.5448480695486069, | |
| "reward_std": 0.8946049734950066, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1786.927101135254, | |
| "dapo/avg_reward_std": 0.27395731459061307, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.479166679084301, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 51.979166666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06628571428571428, | |
| "grad_norm": 0.012302345596253872, | |
| "kl": 0.00010266900062561035, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.0222, | |
| "reward": 0.7567729391157627, | |
| "reward_std": 0.9539604857563972, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1871.125015258789, | |
| "dapo/avg_reward_std": 0.26716366639504063, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3461538547506699, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 51.25, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06742857142857143, | |
| "grad_norm": 0.012423303909599781, | |
| "kl": 0.00013174861669540405, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.0051, | |
| "reward": 0.5472707431763411, | |
| "reward_std": 0.9848242700099945, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2110.0104446411133, | |
| "dapo/avg_reward_std": 0.27772934675216676, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3933333379030228, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 55.416666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06857142857142857, | |
| "grad_norm": 0.010305487550795078, | |
| "kl": 0.00013266503810882568, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": -0.0051, | |
| "reward": 0.5918029174208641, | |
| "reward_std": 0.9419775605201721, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1820.1597595214844, | |
| "dapo/avg_reward_std": 0.2844862639904022, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.351190483463662, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 39.28571428571428, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.06971428571428571, | |
| "grad_norm": 0.01057644933462143, | |
| "kl": 9.304285049438477e-05, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.0019, | |
| "reward": 0.5361353289335966, | |
| "reward_std": 0.9243106096982956, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2268.913215637207, | |
| "dapo/avg_reward_std": 0.2805037432246738, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3456790220958215, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 39.791666666666664, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07085714285714285, | |
| "grad_norm": 0.010327951982617378, | |
| "kl": 0.00013640522956848145, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.011, | |
| "reward": 0.5703515652567148, | |
| "reward_std": 0.9485230222344398, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2150.541679382324, | |
| "dapo/avg_reward_std": 0.3610766388868031, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000164697045, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.072, | |
| "grad_norm": 0.01420843880623579, | |
| "kl": 0.00017371773719787598, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.027, | |
| "reward": 0.8152667284011841, | |
| "reward_std": 0.9553957208991051, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2542.954879760742, | |
| "dapo/avg_reward_std": 0.25789711397627124, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4057971077120822, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 55.0, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07314285714285715, | |
| "grad_norm": 0.010388275608420372, | |
| "kl": 0.00016424059867858887, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0153, | |
| "reward": 0.8328269198536873, | |
| "reward_std": 0.946412943303585, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2573.9132385253906, | |
| "dapo/avg_reward_std": 0.27658049833206905, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4682539779515493, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 67.01388888888889, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07428571428571429, | |
| "grad_norm": 0.016587890684604645, | |
| "kl": 0.0002205371856689453, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.071, | |
| "reward": 0.8272522762417793, | |
| "reward_std": 0.9939362108707428, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2272.4132080078125, | |
| "dapo/avg_reward_std": 0.28441278512279194, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38888889737427235, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 49.375, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07542857142857143, | |
| "grad_norm": 0.01080800499767065, | |
| "kl": 0.00015676021575927734, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": -0.0104, | |
| "reward": 0.7243790216743946, | |
| "reward_std": 1.0099836066365242, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2551.920150756836, | |
| "dapo/avg_reward_std": 0.29605763202363794, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4242424314672297, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 50.416666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07657142857142857, | |
| "grad_norm": 0.01253009494394064, | |
| "kl": 0.0001944899559020996, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.0544, | |
| "reward": 0.7982187271118164, | |
| "reward_std": 0.9796509444713593, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2039.6910400390625, | |
| "dapo/avg_reward_std": 0.3305485857029756, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4305555634200573, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07771428571428571, | |
| "grad_norm": 0.013196859508752823, | |
| "kl": 0.00021713972091674805, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.0185, | |
| "reward": 0.8682084418833256, | |
| "reward_std": 0.9861341118812561, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2549.642364501953, | |
| "dapo/avg_reward_std": 0.28639274001121523, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4133333384990692, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.07885714285714286, | |
| "grad_norm": 0.010159006342291832, | |
| "kl": 0.00016075372695922852, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": -0.0284, | |
| "reward": 0.5912708025425673, | |
| "reward_std": 0.9797485172748566, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2719.5382232666016, | |
| "dapo/avg_reward_std": 0.28611900960957565, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.351851859026485, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.625, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08, | |
| "grad_norm": 0.011270755901932716, | |
| "kl": 0.00022423267364501953, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0265, | |
| "reward": 0.5740308649837971, | |
| "reward_std": 0.8749020621180534, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2073.2916946411133, | |
| "dapo/avg_reward_std": 0.28938476492961246, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45833334264655906, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 42.49999999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08114285714285714, | |
| "grad_norm": 0.011867412365972996, | |
| "kl": 0.0001347661018371582, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": -0.0577, | |
| "reward": 0.5955507848411798, | |
| "reward_std": 0.9116542786359787, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2239.322914123535, | |
| "dapo/avg_reward_std": 0.30952110344713385, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4469697041945024, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 44.166666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08228571428571428, | |
| "grad_norm": 0.011070906184613705, | |
| "kl": 0.000155717134475708, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0339, | |
| "reward": 0.7990612685680389, | |
| "reward_std": 0.9683424234390259, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2044.489601135254, | |
| "dapo/avg_reward_std": 0.21984713185917248, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3131313206571521, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 36.77083333333333, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08342857142857144, | |
| "grad_norm": 0.014109701849520206, | |
| "kl": 0.0001436173915863037, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.085, | |
| "reward": 0.8676656074821949, | |
| "reward_std": 0.9657078757882118, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1958.7361068725586, | |
| "dapo/avg_reward_std": 0.30799518460812775, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4927536339863487, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 40.625, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08457142857142858, | |
| "grad_norm": 0.013041837140917778, | |
| "kl": 0.0001519918441772461, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0492, | |
| "reward": 0.6045123310759664, | |
| "reward_std": 0.9384523630142212, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1523.1284942626953, | |
| "dapo/avg_reward_std": 0.31539708146682155, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.391025647521019, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 36.875, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 0.014472462236881256, | |
| "kl": 0.0001392364501953125, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0358, | |
| "reward": 0.7163376174867153, | |
| "reward_std": 0.9508332461118698, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2640.7813110351562, | |
| "dapo/avg_reward_std": 0.3144421911239624, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44000000655651095, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 43.75, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08685714285714285, | |
| "grad_norm": 0.011127221398055553, | |
| "kl": 0.0002060532569885254, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0604, | |
| "reward": 0.6046733632683754, | |
| "reward_std": 0.9528723284602165, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2088.7292098999023, | |
| "dapo/avg_reward_std": 0.3257487453520298, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4444444552063942, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 45.31249999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.088, | |
| "grad_norm": 0.013021063059568405, | |
| "kl": 0.00017440319061279297, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.0353, | |
| "reward": 0.5843205824494362, | |
| "reward_std": 0.9498706609010696, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2710.0069580078125, | |
| "dapo/avg_reward_std": 0.4117408903206096, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5784313836518455, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 52.08333333333333, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.08914285714285715, | |
| "grad_norm": 0.00956858042627573, | |
| "kl": 0.00020110607147216797, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": -0.0019, | |
| "reward": 0.7558267749845982, | |
| "reward_std": 0.9872319549322128, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2532.888916015625, | |
| "dapo/avg_reward_std": 0.29725510747201983, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34408603031789103, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 31.696428571428562, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09028571428571429, | |
| "grad_norm": 0.010455719195306301, | |
| "kl": 0.00019878149032592773, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0433, | |
| "reward": 0.7071553282439709, | |
| "reward_std": 0.936428040266037, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2045.3507232666016, | |
| "dapo/avg_reward_std": 0.24797727167606354, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4318181872367859, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 56.24999999999999, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 0.011657273396849632, | |
| "kl": 0.00015923380851745605, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0016, | |
| "reward": 0.8354307417757809, | |
| "reward_std": 0.9478549808263779, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2517.621482849121, | |
| "dapo/avg_reward_std": 0.3837103931342854, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5882353055126527, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 56.24999999999999, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09257142857142857, | |
| "grad_norm": 0.011230596341192722, | |
| "kl": 0.00020751357078552246, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0523, | |
| "reward": 0.6180859599262476, | |
| "reward_std": 0.9601781144738197, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2189.6805725097656, | |
| "dapo/avg_reward_std": 0.33485331758856773, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.47916667970518273, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 42.70833333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09371428571428571, | |
| "grad_norm": 0.01085925567895174, | |
| "kl": 0.00018781423568725586, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.0277, | |
| "reward": 0.7506253309547901, | |
| "reward_std": 0.9654112830758095, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2063.197952270508, | |
| "dapo/avg_reward_std": 0.3108914480322883, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45238096444379716, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 47.291666666666664, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09485714285714286, | |
| "grad_norm": 0.01137411966919899, | |
| "kl": 0.00018197298049926758, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0165, | |
| "reward": 0.6088770590722561, | |
| "reward_std": 0.9752795398235321, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2032.7708587646484, | |
| "dapo/avg_reward_std": 0.35138528971444993, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000070957911, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 40.62499999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.096, | |
| "grad_norm": 0.009788557887077332, | |
| "kl": 0.0001645982265472412, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": -0.0005, | |
| "reward": 0.6485470458865166, | |
| "reward_std": 0.8980466201901436, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2723.2083892822266, | |
| "dapo/avg_reward_std": 0.35491983592510223, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5500000104308128, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 46.87499999999999, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09714285714285714, | |
| "grad_norm": 0.012261813506484032, | |
| "kl": 0.0002092123031616211, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": 0.0428, | |
| "reward": 0.769347533583641, | |
| "reward_std": 0.9622702524065971, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2813.6979370117188, | |
| "dapo/avg_reward_std": 0.31041908973739263, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46825397582281203, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 53.125, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09828571428571428, | |
| "grad_norm": 0.013307915069162846, | |
| "kl": 0.00022363662719726562, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": 0.0893, | |
| "reward": 0.7634551003575325, | |
| "reward_std": 0.9385863840579987, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2645.5486907958984, | |
| "dapo/avg_reward_std": 0.29486309762658747, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37931035099358396, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 38.95833333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.09942857142857142, | |
| "grad_norm": 0.009606744162738323, | |
| "kl": 0.00017684698104858398, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": 0.0337, | |
| "reward": 0.6225443221628666, | |
| "reward_std": 0.9135682806372643, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2211.1111221313477, | |
| "dapo/avg_reward_std": 0.2131810395254029, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30092593075500595, | |
| "dapo/num_sampling_attempts": 4.5, | |
| "dapo/sampling_efficiency": 25.535714285714285, | |
| "dapo/total_prompts_processed": 27.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10057142857142858, | |
| "grad_norm": 0.011731365695595741, | |
| "kl": 0.00017218291759490967, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0114, | |
| "reward": 0.574246758595109, | |
| "reward_std": 0.9149169996380806, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2617.9445037841797, | |
| "dapo/avg_reward_std": 0.34073091808118317, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5087719379287017, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 47.91666666666666, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10171428571428572, | |
| "grad_norm": 0.013213962316513062, | |
| "kl": 0.0002383589744567871, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.072, | |
| "reward": 0.7886459194123745, | |
| "reward_std": 0.9416129812598228, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2265.7743225097656, | |
| "dapo/avg_reward_std": 0.39400896430015564, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48412699145930155, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 46.24999999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10285714285714286, | |
| "grad_norm": 0.011279975064098835, | |
| "kl": 0.00017967820167541504, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0115, | |
| "reward": 0.8188270814716816, | |
| "reward_std": 0.956598699092865, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1751.7951850891113, | |
| "dapo/avg_reward_std": 0.346651555462317, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44696970690380444, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 46.875, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.104, | |
| "grad_norm": 0.013495221734046936, | |
| "kl": 0.00012958049774169922, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": 0.0244, | |
| "reward": 0.7544833142310381, | |
| "reward_std": 0.920841209590435, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2176.5868530273438, | |
| "dapo/avg_reward_std": 0.31276301860809325, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3866666704416275, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 42.410714285714285, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10514285714285715, | |
| "grad_norm": 0.014705290086567402, | |
| "kl": 0.00018972158432006836, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": 0.082, | |
| "reward": 0.6609778106212616, | |
| "reward_std": 0.9741540849208832, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2418.0035095214844, | |
| "dapo/avg_reward_std": 0.3533540232615037, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45454546131870965, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 50.416666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10628571428571429, | |
| "grad_norm": 0.014526835642755032, | |
| "kl": 0.00022083520889282227, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0467, | |
| "reward": 0.6240662466734648, | |
| "reward_std": 0.9587830454111099, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1759.4409713745117, | |
| "dapo/avg_reward_std": 0.31654878084858257, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4166666741172473, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 47.70833333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10742857142857143, | |
| "grad_norm": 0.011724472045898438, | |
| "kl": 0.00012111663818359375, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": -0.0008, | |
| "reward": 0.8961930721998215, | |
| "reward_std": 0.9275476858019829, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1968.3958435058594, | |
| "dapo/avg_reward_std": 0.31933523178100587, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36000000715255737, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10857142857142857, | |
| "grad_norm": 0.012760731391608715, | |
| "kl": 0.00015205144882202148, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0303, | |
| "reward": 0.7966429069638252, | |
| "reward_std": 0.9104023575782776, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1705.9930610656738, | |
| "dapo/avg_reward_std": 0.26930796217035363, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.38888889771920665, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 48.4375, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.10971428571428571, | |
| "grad_norm": 0.016185246407985687, | |
| "kl": 0.00014796853065490723, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": -0.0016, | |
| "reward": 0.7683778572827578, | |
| "reward_std": 0.9466121271252632, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2056.079864501953, | |
| "dapo/avg_reward_std": 0.3310448744080283, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48484849387949164, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 51.785714285714285, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11085714285714286, | |
| "grad_norm": 0.010300490073859692, | |
| "kl": 0.00016963481903076172, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": 0.0027, | |
| "reward": 0.7596820928156376, | |
| "reward_std": 0.9540099799633026, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2592.8403244018555, | |
| "dapo/avg_reward_std": 0.21406691299902425, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3018018116016646, | |
| "dapo/num_sampling_attempts": 4.625, | |
| "dapo/sampling_efficiency": 30.376984126984123, | |
| "dapo/total_prompts_processed": 27.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.112, | |
| "grad_norm": 0.01034973282366991, | |
| "kl": 0.000193670392036438, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": 0.0254, | |
| "reward": 0.7206093966960907, | |
| "reward_std": 0.9074158370494843, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2686.343780517578, | |
| "dapo/avg_reward_std": 0.24782394810959144, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.35802469926851765, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 44.513888888888886, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11314285714285714, | |
| "grad_norm": 0.011502859182655811, | |
| "kl": 0.00023734569549560547, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.0346, | |
| "reward": 0.6300379456952214, | |
| "reward_std": 0.9057611152529716, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2050.166664123535, | |
| "dapo/avg_reward_std": 0.3082110931475957, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.43055556404093903, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 44.166666666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 0.015181603841483593, | |
| "kl": 0.00023311376571655273, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0389, | |
| "reward": 0.7550710588693619, | |
| "reward_std": 0.9816905185580254, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2261.625045776367, | |
| "dapo/avg_reward_std": 0.2656887276419278, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3563218414783478, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 30.952380952380942, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11542857142857142, | |
| "grad_norm": 0.012256976217031479, | |
| "kl": 0.0002308487892150879, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0255, | |
| "reward": 0.6794679276645184, | |
| "reward_std": 0.936141237616539, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2019.2500381469727, | |
| "dapo/avg_reward_std": 0.27603574914316975, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40322581414253483, | |
| "dapo/num_sampling_attempts": 3.875, | |
| "dapo/sampling_efficiency": 31.14583333333333, | |
| "dapo/total_prompts_processed": 23.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11657142857142858, | |
| "grad_norm": 0.011883130297064781, | |
| "kl": 0.00018447637557983398, | |
| "learning_rate": 6.203955092681039e-07, | |
| "loss": 0.0566, | |
| "reward": 0.9531724825501442, | |
| "reward_std": 0.9424103274941444, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2447.142379760742, | |
| "dapo/avg_reward_std": 0.2595460871855418, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3833333447575569, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 28.958333333333332, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11771428571428572, | |
| "grad_norm": 0.010165790095925331, | |
| "kl": 0.00018906593322753906, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0361, | |
| "reward": 0.8079591542482376, | |
| "reward_std": 0.9323313534259796, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2414.4305725097656, | |
| "dapo/avg_reward_std": 0.2675211922875766, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3505747174394542, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 42.013888888888886, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.11885714285714286, | |
| "grad_norm": 0.009563453495502472, | |
| "kl": 0.0002244710922241211, | |
| "learning_rate": 6.048412045323164e-07, | |
| "loss": 0.0367, | |
| "reward": 0.6746065132319927, | |
| "reward_std": 0.9439321234822273, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2350.4653396606445, | |
| "dapo/avg_reward_std": 0.2709802109183687, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33838384621071094, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 39.58333333333333, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12, | |
| "grad_norm": 0.011944189667701721, | |
| "kl": 0.00023399293422698975, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.0133, | |
| "reward": 0.7501634955406189, | |
| "reward_std": 0.9493465423583984, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2232.5590209960938, | |
| "dapo/avg_reward_std": 0.2304972934311834, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32183908234382497, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 46.05654761904762, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12114285714285715, | |
| "grad_norm": 0.011308341287076473, | |
| "kl": 0.0002868175506591797, | |
| "learning_rate": 5.892200842364462e-07, | |
| "loss": 0.017, | |
| "reward": 0.8449488952755928, | |
| "reward_std": 0.9235394075512886, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2245.4306259155273, | |
| "dapo/avg_reward_std": 0.32372228088586225, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37681160478488257, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 45.53571428571428, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12228571428571429, | |
| "grad_norm": 0.01079760491847992, | |
| "kl": 0.00018143653869628906, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0407, | |
| "reward": 0.876940418034792, | |
| "reward_std": 0.945194236934185, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2877.4410095214844, | |
| "dapo/avg_reward_std": 0.31851592376118615, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44444445201328825, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 47.916666666666664, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12342857142857143, | |
| "grad_norm": 0.008532079868018627, | |
| "kl": 0.00026416778564453125, | |
| "learning_rate": 5.735511803093248e-07, | |
| "loss": 0.0189, | |
| "reward": 0.5354619715362787, | |
| "reward_std": 0.9343887642025948, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2287.3403396606445, | |
| "dapo/avg_reward_std": 0.2637126021660291, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41025641560554504, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 44.6875, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12457142857142857, | |
| "grad_norm": 0.010662744753062725, | |
| "kl": 0.00025856494903564453, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0139, | |
| "reward": 0.6945868469774723, | |
| "reward_std": 0.945196196436882, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2099.197898864746, | |
| "dapo/avg_reward_std": 0.2950383967586926, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.380952388048172, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 38.541666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12571428571428572, | |
| "grad_norm": 0.011256680823862553, | |
| "kl": 0.0002060532569885254, | |
| "learning_rate": 5.578535828967777e-07, | |
| "loss": 0.0106, | |
| "reward": 0.6000825632363558, | |
| "reward_std": 0.9193084537982941, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2716.079864501953, | |
| "dapo/avg_reward_std": 0.2741352463590688, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3620689691140734, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 41.488095238095234, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12685714285714286, | |
| "grad_norm": 0.009062284603714943, | |
| "kl": 0.0002288222312927246, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0461, | |
| "reward": 0.6751261968165636, | |
| "reward_std": 0.9856812655925751, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 3045.125, | |
| "dapo/avg_reward_std": 0.36701818108558654, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5888888945182165, | |
| "dapo/num_sampling_attempts": 1.875, | |
| "dapo/sampling_efficiency": 65.625, | |
| "dapo/total_prompts_processed": 11.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.128, | |
| "grad_norm": 0.013615131378173828, | |
| "kl": 0.0003104209899902344, | |
| "learning_rate": 5.421464171032224e-07, | |
| "loss": 0.0541, | |
| "reward": 0.6107649356126785, | |
| "reward_std": 0.9386496767401695, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2261.1597442626953, | |
| "dapo/avg_reward_std": 0.2829060518741608, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.413333340883255, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.12914285714285714, | |
| "grad_norm": 0.01245199330151081, | |
| "kl": 0.0002949833869934082, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0273, | |
| "reward": 0.7544166818261147, | |
| "reward_std": 0.9633913785219193, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2030.1041946411133, | |
| "dapo/avg_reward_std": 0.2660287490912846, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32738095788019045, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 45.535714285714285, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13028571428571428, | |
| "grad_norm": 0.011100664734840393, | |
| "kl": 0.00016885995864868164, | |
| "learning_rate": 5.264488196906752e-07, | |
| "loss": 0.0649, | |
| "reward": 0.5986752398312092, | |
| "reward_std": 0.9739916548132896, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2791.465301513672, | |
| "dapo/avg_reward_std": 0.297807412147522, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44000000655651095, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 42.08333333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13142857142857142, | |
| "grad_norm": 0.011278674006462097, | |
| "kl": 0.0002925395965576172, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.0586, | |
| "reward": 0.6219565980136395, | |
| "reward_std": 0.9591977074742317, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2804.6493606567383, | |
| "dapo/avg_reward_std": 0.35703572371731634, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42028986371081806, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 43.12499999999999, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13257142857142856, | |
| "grad_norm": 0.01122019812464714, | |
| "kl": 0.00034046173095703125, | |
| "learning_rate": 5.107799157635538e-07, | |
| "loss": 0.0233, | |
| "reward": 0.469740716740489, | |
| "reward_std": 0.9214994236826897, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2037.885456085205, | |
| "dapo/avg_reward_std": 0.30805256009101867, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4666666799783707, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 44.27083333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1337142857142857, | |
| "grad_norm": 0.014056873507797718, | |
| "kl": 0.0002486705780029297, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.038, | |
| "reward": 0.9076524265110493, | |
| "reward_std": 0.9655390456318855, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2517.215316772461, | |
| "dapo/avg_reward_std": 0.23199922059263503, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3571428635290691, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 45.535714285714285, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13485714285714287, | |
| "grad_norm": 0.011827170848846436, | |
| "kl": 0.00034999847412109375, | |
| "learning_rate": 4.951587954676837e-07, | |
| "loss": 0.023, | |
| "reward": 0.5725362580269575, | |
| "reward_std": 0.9489376917481422, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2309.763916015625, | |
| "dapo/avg_reward_std": 0.33521059803340747, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48550726084605506, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 44.166666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.136, | |
| "grad_norm": 0.014920210465788841, | |
| "kl": 0.0003477334976196289, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0967, | |
| "reward": 0.7152486853301525, | |
| "reward_std": 0.9450967088341713, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2588.423629760742, | |
| "dapo/avg_reward_std": 0.322092001636823, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4236111156642437, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 40.62499999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 0.009259553626179695, | |
| "kl": 0.0002923011779785156, | |
| "learning_rate": 4.79604490731896e-07, | |
| "loss": 0.0204, | |
| "reward": 0.5492150112986565, | |
| "reward_std": 0.9336576908826828, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2224.4583282470703, | |
| "dapo/avg_reward_std": 0.2846992796375638, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4603174633923031, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1382857142857143, | |
| "grad_norm": 0.012681272812187672, | |
| "kl": 0.0002828836441040039, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.084, | |
| "reward": 0.8260641098022461, | |
| "reward_std": 0.9569381102919579, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2042.4618453979492, | |
| "dapo/avg_reward_std": 0.21980997684754824, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2500000039213582, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 32.916666666666664, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.13942857142857143, | |
| "grad_norm": 0.014447196386754513, | |
| "kl": 0.0002307891845703125, | |
| "learning_rate": 4.641359520805548e-07, | |
| "loss": 0.0797, | |
| "reward": 0.5401283344253898, | |
| "reward_std": 0.8589324243366718, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1821.270851135254, | |
| "dapo/avg_reward_std": 0.30661167701085407, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4652777910232544, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 51.979166666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14057142857142857, | |
| "grad_norm": 0.012464089319109917, | |
| "kl": 0.00021943449974060059, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0524, | |
| "reward": 0.7340994998812675, | |
| "reward_std": 0.948041707277298, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1917.8576431274414, | |
| "dapo/avg_reward_std": 0.26710175829274313, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3452381023338863, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 35.83333333333333, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1417142857142857, | |
| "grad_norm": 0.01295761950314045, | |
| "kl": 0.00027441978454589844, | |
| "learning_rate": 4.4877202554526084e-07, | |
| "loss": 0.0395, | |
| "reward": 0.44990649446845055, | |
| "reward_std": 0.9298848733305931, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2151.381965637207, | |
| "dapo/avg_reward_std": 0.2770674500776374, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4347826164701711, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 55.729166666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.011758905835449696, | |
| "kl": 0.00027173757553100586, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": -0.0284, | |
| "reward": 0.5582777298986912, | |
| "reward_std": 0.9428363367915154, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1810.0104522705078, | |
| "dapo/avg_reward_std": 0.21576001878940698, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2777777860562007, | |
| "dapo/num_sampling_attempts": 4.125, | |
| "dapo/sampling_efficiency": 36.45833333333333, | |
| "dapo/total_prompts_processed": 24.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.144, | |
| "grad_norm": 0.011465526185929775, | |
| "kl": 0.00021535158157348633, | |
| "learning_rate": 4.3353142970386557e-07, | |
| "loss": -0.0108, | |
| "reward": 0.6622855560854077, | |
| "reward_std": 0.9075748100876808, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2243.732635498047, | |
| "dapo/avg_reward_std": 0.2920382275031163, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3782051377571546, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 32.291666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14514285714285713, | |
| "grad_norm": 0.01050955057144165, | |
| "kl": 0.00027239322662353516, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0464, | |
| "reward": 0.533456489443779, | |
| "reward_std": 0.9191579967737198, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2544.6875, | |
| "dapo/avg_reward_std": 0.27494730835869197, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46031746906893595, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 46.24999999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1462857142857143, | |
| "grad_norm": 0.017111552879214287, | |
| "kl": 0.0003796815872192383, | |
| "learning_rate": 4.1843273287476854e-07, | |
| "loss": 0.0784, | |
| "reward": 0.7016365043818951, | |
| "reward_std": 0.986565351486206, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2367.3646240234375, | |
| "dapo/avg_reward_std": 0.23454804884062874, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34567901988824207, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 47.222222222222214, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14742857142857144, | |
| "grad_norm": 0.009941876865923405, | |
| "kl": 0.00034287571907043457, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.0061, | |
| "reward": 0.7930427435785532, | |
| "reward_std": 0.9500019550323486, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2704.125045776367, | |
| "dapo/avg_reward_std": 0.33850957382292973, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5158730284089134, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 47.291666666666664, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14857142857142858, | |
| "grad_norm": 0.012839604169130325, | |
| "kl": 0.0004292726516723633, | |
| "learning_rate": 4.034943304942796e-07, | |
| "loss": 0.0353, | |
| "reward": 0.6285950914025307, | |
| "reward_std": 0.9615181535482407, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2475.5104370117188, | |
| "dapo/avg_reward_std": 0.30972740189595654, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4696969762444496, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 52.82738095238095, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.14971428571428572, | |
| "grad_norm": 0.00980184692889452, | |
| "kl": 0.00036525726318359375, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": -0.0176, | |
| "reward": 0.7969067245721817, | |
| "reward_std": 0.9501392021775246, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2550.0034790039062, | |
| "dapo/avg_reward_std": 0.2723326214722225, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.30952381661960054, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 29.999999999999993, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15085714285714286, | |
| "grad_norm": 0.017491888254880905, | |
| "kl": 0.00042629241943359375, | |
| "learning_rate": 3.8873442270461485e-07, | |
| "loss": 0.0772, | |
| "reward": 0.6202478259801865, | |
| "reward_std": 0.9556004330515862, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2056.5000534057617, | |
| "dapo/avg_reward_std": 0.3334644228219986, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5333333447575569, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 47.916666666666664, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.152, | |
| "grad_norm": 0.012553170323371887, | |
| "kl": 0.0004407167434692383, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": -0.0185, | |
| "reward": 0.7429189011454582, | |
| "reward_std": 1.0187850967049599, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2446.9270935058594, | |
| "dapo/avg_reward_std": 0.28044995562783603, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.32758621254871634, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 32.41071428571428, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15314285714285714, | |
| "grad_norm": 0.010931877419352531, | |
| "kl": 0.00035947561264038086, | |
| "learning_rate": 3.7417099217982686e-07, | |
| "loss": 0.0372, | |
| "reward": 0.6385626457631588, | |
| "reward_std": 0.9372833296656609, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2216.4479370117188, | |
| "dapo/avg_reward_std": 0.3021530819435914, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40972223070760566, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 47.291666666666664, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15428571428571428, | |
| "grad_norm": 0.014175104908645153, | |
| "kl": 0.00040656328201293945, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.0637, | |
| "reward": 0.6612157337367535, | |
| "reward_std": 0.9335344135761261, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1879.3889083862305, | |
| "dapo/avg_reward_std": 0.24328701660550875, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36206897219707224, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 31.77083333333333, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15542857142857142, | |
| "grad_norm": 0.011906570754945278, | |
| "kl": 0.0002865791320800781, | |
| "learning_rate": 3.5982178221668533e-07, | |
| "loss": 0.0254, | |
| "reward": 0.621966740116477, | |
| "reward_std": 0.9788949191570282, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2137.1180725097656, | |
| "dapo/avg_reward_std": 0.19872227481433324, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2857142903975078, | |
| "dapo/num_sampling_attempts": 4.375, | |
| "dapo/sampling_efficiency": 26.180555555555557, | |
| "dapo/total_prompts_processed": 26.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15657142857142858, | |
| "grad_norm": 0.011921432800590992, | |
| "kl": 0.000335007905960083, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.0425, | |
| "reward": 0.8858193010091782, | |
| "reward_std": 0.960886999964714, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2133.6875381469727, | |
| "dapo/avg_reward_std": 0.30985672700972783, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5079365216550373, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 50.41666666666666, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15771428571428572, | |
| "grad_norm": 0.011959100142121315, | |
| "kl": 0.00031131505966186523, | |
| "learning_rate": 3.45704275117204e-07, | |
| "loss": 0.0473, | |
| "reward": 0.8114638328552246, | |
| "reward_std": 0.9208285436034203, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1887.6423797607422, | |
| "dapo/avg_reward_std": 0.30113077312707903, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45833334401249887, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 47.916666666666664, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.15885714285714286, | |
| "grad_norm": 0.01248843315988779, | |
| "kl": 0.0003123283386230469, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0133, | |
| "reward": 0.4802711680531502, | |
| "reward_std": 0.9749159812927246, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1555.0173835754395, | |
| "dapo/avg_reward_std": 0.2354262595375379, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3750000074505806, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 58.05555555555555, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16, | |
| "grad_norm": 0.01651233807206154, | |
| "kl": 0.0003075599670410156, | |
| "learning_rate": 3.3183567088914833e-07, | |
| "loss": -0.0302, | |
| "reward": 0.8893436007201672, | |
| "reward_std": 0.9632327631115913, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2886.878517150879, | |
| "dapo/avg_reward_std": 0.2881770460378556, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48412698933056425, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 53.12499999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16114285714285714, | |
| "grad_norm": 0.010870919562876225, | |
| "kl": 0.0004533529281616211, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.0545, | |
| "reward": 0.612054293975234, | |
| "reward_std": 0.9482586532831192, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1937.1458740234375, | |
| "dapo/avg_reward_std": 0.3629945723906807, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.47826088057911914, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 47.39583333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16228571428571428, | |
| "grad_norm": 0.011180016212165356, | |
| "kl": 0.00029921531677246094, | |
| "learning_rate": 3.182328662904756e-07, | |
| "loss": -0.0113, | |
| "reward": 0.6175431702286005, | |
| "reward_std": 0.9589766189455986, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2465.3159942626953, | |
| "dapo/avg_reward_std": 0.3803708272821763, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5588235381771537, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 60.41666666666666, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16342857142857142, | |
| "grad_norm": 0.012431374751031399, | |
| "kl": 0.00048232078552246094, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0679, | |
| "reward": 0.7579541122540832, | |
| "reward_std": 0.9723308756947517, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2344.1180419921875, | |
| "dapo/avg_reward_std": 0.21175828889796608, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.26315790179528686, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 25.347222222222218, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16457142857142856, | |
| "grad_norm": 0.010860033333301544, | |
| "kl": 0.0004448890686035156, | |
| "learning_rate": 3.0491243424323783e-07, | |
| "loss": -0.0005, | |
| "reward": 0.5643926626071334, | |
| "reward_std": 0.9328553825616837, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2482.1389389038086, | |
| "dapo/avg_reward_std": 0.37865253537893295, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.6145833432674408, | |
| "dapo/num_sampling_attempts": 2.0, | |
| "dapo/sampling_efficiency": 58.33333333333333, | |
| "dapo/total_prompts_processed": 12.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1657142857142857, | |
| "grad_norm": 0.011065399274230003, | |
| "kl": 0.0004658699035644531, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": 0.0412, | |
| "reward": 0.8742740526795387, | |
| "reward_std": 0.9688765779137611, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2297.6806030273438, | |
| "dapo/avg_reward_std": 0.40159281912971945, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.627450992955881, | |
| "dapo/num_sampling_attempts": 2.125, | |
| "dapo/sampling_efficiency": 56.24999999999999, | |
| "dapo/total_prompts_processed": 12.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16685714285714287, | |
| "grad_norm": 0.014852684922516346, | |
| "kl": 0.00038546323776245117, | |
| "learning_rate": 2.918906036420294e-07, | |
| "loss": 0.1043, | |
| "reward": 0.7259054481983185, | |
| "reward_std": 0.9452414810657501, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2485.2639389038086, | |
| "dapo/avg_reward_std": 0.2594580222731051, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.42028986435869464, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 43.125, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.168, | |
| "grad_norm": 0.011770485900342464, | |
| "kl": 0.00037994980812072754, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0414, | |
| "reward": 0.596230074763298, | |
| "reward_std": 0.944911852478981, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2030.6180877685547, | |
| "dapo/avg_reward_std": 0.28245899453759193, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.43055556528270245, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 47.70833333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.16914285714285715, | |
| "grad_norm": 0.010600890032947063, | |
| "kl": 0.0003261566162109375, | |
| "learning_rate": 2.791832395815782e-07, | |
| "loss": 0.018, | |
| "reward": 0.5254655107855797, | |
| "reward_std": 0.9357841089367867, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2956.184051513672, | |
| "dapo/avg_reward_std": 0.3112214480837186, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44444445334374905, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 42.08333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1702857142857143, | |
| "grad_norm": 0.010259653441607952, | |
| "kl": 0.00048613548278808594, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0339, | |
| "reward": 0.6315554305911064, | |
| "reward_std": 0.9876029044389725, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2855.0520629882812, | |
| "dapo/avg_reward_std": 0.32461989257070756, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5370370447635651, | |
| "dapo/num_sampling_attempts": 2.25, | |
| "dapo/sampling_efficiency": 55.20833333333333, | |
| "dapo/total_prompts_processed": 13.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 0.011087276972830296, | |
| "kl": 0.0005285739898681641, | |
| "learning_rate": 2.6680582402757324e-07, | |
| "loss": 0.054, | |
| "reward": 0.80087810754776, | |
| "reward_std": 1.0038108006119728, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2653.5834197998047, | |
| "dapo/avg_reward_std": 0.24287073779851198, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36979167396202683, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 30.327380952380953, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17257142857142857, | |
| "grad_norm": 0.011102661490440369, | |
| "kl": 0.0005296468734741211, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0527, | |
| "reward": 0.7124785147607327, | |
| "reward_std": 0.9657682925462723, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2141.173614501953, | |
| "dapo/avg_reward_std": 0.25965497308763963, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333396706088, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 36.875, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1737142857142857, | |
| "grad_norm": 0.01081050094217062, | |
| "kl": 0.00039577484130859375, | |
| "learning_rate": 2.547734369542718e-07, | |
| "loss": 0.0232, | |
| "reward": 0.5607589241117239, | |
| "reward_std": 0.9106607139110565, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2218.2882347106934, | |
| "dapo/avg_reward_std": 0.24554675072431564, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.39583333767950535, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 48.75, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17485714285714285, | |
| "grad_norm": 0.01474699191749096, | |
| "kl": 0.000436246395111084, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.0585, | |
| "reward": 0.4214355852454901, | |
| "reward_std": 0.9400415197014809, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2466.3368377685547, | |
| "dapo/avg_reward_std": 0.3308070342649113, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46212121776559134, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 38.541666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.176, | |
| "grad_norm": 0.011210402473807335, | |
| "kl": 0.0004417896270751953, | |
| "learning_rate": 2.4310073797187573e-07, | |
| "loss": -0.0244, | |
| "reward": 0.7323229797184467, | |
| "reward_std": 0.9493635967373848, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2012.8715438842773, | |
| "dapo/avg_reward_std": 0.2809670078754425, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4133333426713943, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 35.83333333333333, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17714285714285713, | |
| "grad_norm": 0.01654733158648014, | |
| "kl": 0.00036275386810302734, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": 0.0589, | |
| "reward": 0.6634213328361511, | |
| "reward_std": 0.8785304054617882, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2291.3021240234375, | |
| "dapo/avg_reward_std": 0.3599580733672432, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.44927537182103033, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 45.53571428571428, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1782857142857143, | |
| "grad_norm": 0.011936171911656857, | |
| "kl": 0.00043827295303344727, | |
| "learning_rate": 2.3180194846605364e-07, | |
| "loss": 0.0699, | |
| "reward": 0.8599490560591221, | |
| "reward_std": 0.9719394743442535, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2499.791702270508, | |
| "dapo/avg_reward_std": 0.3457585884766145, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5378787998448719, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.17942857142857144, | |
| "grad_norm": 0.01289551891386509, | |
| "kl": 0.00048601627349853516, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0584, | |
| "reward": 0.6511420179158449, | |
| "reward_std": 0.9461185112595558, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2437.9375228881836, | |
| "dapo/avg_reward_std": 0.23957703853475637, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.36781609829129847, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 37.291666666666664, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18057142857142858, | |
| "grad_norm": 0.012769551016390324, | |
| "kl": 0.0004298686981201172, | |
| "learning_rate": 2.2089083427137329e-07, | |
| "loss": 0.0258, | |
| "reward": 0.6606059782207012, | |
| "reward_std": 0.9088018089532852, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1726.5868225097656, | |
| "dapo/avg_reward_std": 0.3139249332573103, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3913043543048527, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 55.416666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18171428571428572, | |
| "grad_norm": 0.013688490726053715, | |
| "kl": 0.00027683377265930176, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0506, | |
| "reward": 0.7147123599424958, | |
| "reward_std": 0.9531080722808838, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1593.003475189209, | |
| "dapo/avg_reward_std": 0.2799004193010001, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.33908046782016754, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 45.3125, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 0.020229365676641464, | |
| "kl": 0.00033217668533325195, | |
| "learning_rate": 2.1038068889975259e-07, | |
| "loss": 0.0296, | |
| "reward": 0.7677402682602406, | |
| "reward_std": 0.9385703578591347, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1877.9444274902344, | |
| "dapo/avg_reward_std": 0.36716995636622113, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4930555621782939, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 44.49404761904761, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.184, | |
| "grad_norm": 0.012556586414575577, | |
| "kl": 0.00037413835525512695, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0401, | |
| "reward": 0.6385876163840294, | |
| "reward_std": 0.9741755276918411, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2543.1145782470703, | |
| "dapo/avg_reward_std": 0.20304026060244618, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.28921569226419225, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 26.249999999999996, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18514285714285714, | |
| "grad_norm": 0.010984732769429684, | |
| "kl": 0.0005058050155639648, | |
| "learning_rate": 2.0028431734436308e-07, | |
| "loss": 0.0214, | |
| "reward": 0.8138710260391235, | |
| "reward_std": 0.937220610678196, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2579.7916946411133, | |
| "dapo/avg_reward_std": 0.2669851701049244, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3333333397612852, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 31.38888888888889, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18628571428571428, | |
| "grad_norm": 0.01393849402666092, | |
| "kl": 0.0005407929420471191, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.0557, | |
| "reward": 0.6086596520617604, | |
| "reward_std": 0.9360831007361412, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2303.781295776367, | |
| "dapo/avg_reward_std": 0.2889538109302521, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40384616129673445, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18742857142857142, | |
| "grad_norm": 0.012467012740671635, | |
| "kl": 0.0005753040313720703, | |
| "learning_rate": 1.9061402047871833e-07, | |
| "loss": 0.0286, | |
| "reward": 0.7579413987696171, | |
| "reward_std": 0.966604009270668, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2215.8715744018555, | |
| "dapo/avg_reward_std": 0.2284111071910177, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3630952446588448, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 49.72222222222222, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18857142857142858, | |
| "grad_norm": 0.013376005925238132, | |
| "kl": 0.00038570165634155273, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0737, | |
| "reward": 0.6369971446692944, | |
| "reward_std": 0.944696456193924, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2194.999984741211, | |
| "dapo/avg_reward_std": 0.35230770577555115, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5289855158847311, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 40.416666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.18971428571428572, | |
| "grad_norm": 0.00896221399307251, | |
| "kl": 0.0004324018955230713, | |
| "learning_rate": 1.8138158006995363e-07, | |
| "loss": -0.0087, | |
| "reward": 0.770520705729723, | |
| "reward_std": 0.9258415997028351, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2363.9861373901367, | |
| "dapo/avg_reward_std": 0.23058613193662544, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2763157930029066, | |
| "dapo/num_sampling_attempts": 4.75, | |
| "dapo/sampling_efficiency": 30.44642857142857, | |
| "dapo/total_prompts_processed": 28.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19085714285714286, | |
| "grad_norm": 0.011913989670574665, | |
| "kl": 0.0005799531936645508, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": 0.0282, | |
| "reward": 0.8937316909432411, | |
| "reward_std": 0.9134809225797653, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1846.3229217529297, | |
| "dapo/avg_reward_std": 0.2788652099412063, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.37931035459041595, | |
| "dapo/num_sampling_attempts": 3.625, | |
| "dapo/sampling_efficiency": 33.03571428571428, | |
| "dapo/total_prompts_processed": 21.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.192, | |
| "grad_norm": 0.013345438055694103, | |
| "kl": 0.00038933753967285156, | |
| "learning_rate": 1.7259824442455923e-07, | |
| "loss": 0.0657, | |
| "reward": 0.5173812105786055, | |
| "reward_std": 0.9046202600002289, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1632.9965515136719, | |
| "dapo/avg_reward_std": 0.33004767837978544, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.49206350318023134, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 51.45833333333333, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19314285714285714, | |
| "grad_norm": 0.016018711030483246, | |
| "kl": 0.0004235506057739258, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": -0.0266, | |
| "reward": 0.7293304707854986, | |
| "reward_std": 0.9580913484096527, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2218.357666015625, | |
| "dapo/avg_reward_std": 0.30882045084779913, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4242424287579276, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 49.37499999999999, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19428571428571428, | |
| "grad_norm": 0.012691031210124493, | |
| "kl": 0.0005915164947509766, | |
| "learning_rate": 1.6427471468404952e-07, | |
| "loss": 0.0375, | |
| "reward": 0.731636168435216, | |
| "reward_std": 0.9506037011742592, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2086.989585876465, | |
| "dapo/avg_reward_std": 0.26685478786627453, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.372222230831782, | |
| "dapo/num_sampling_attempts": 3.75, | |
| "dapo/sampling_efficiency": 36.45833333333333, | |
| "dapo/total_prompts_processed": 22.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19542857142857142, | |
| "grad_norm": 0.0107533298432827, | |
| "kl": 0.00045359134674072266, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": 0.0268, | |
| "reward": 0.6401270348578691, | |
| "reward_std": 0.9421326443552971, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1523.298625946045, | |
| "dapo/avg_reward_std": 0.2958875367274651, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4294871888481654, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 41.041666666666664, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19657142857142856, | |
| "grad_norm": 0.02487981878221035, | |
| "kl": 0.00044208765029907227, | |
| "learning_rate": 1.5642113178727193e-07, | |
| "loss": 0.0215, | |
| "reward": 0.5742892920970917, | |
| "reward_std": 0.9192508533596992, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2197.4722290039062, | |
| "dapo/avg_reward_std": 0.33716599914160644, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4545454619960351, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.20833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.1977142857142857, | |
| "grad_norm": 0.00999497715383768, | |
| "kl": 0.0006158351898193359, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": -0.0198, | |
| "reward": 0.6909432113170624, | |
| "reward_std": 0.9331774786114693, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2469.1909942626953, | |
| "dapo/avg_reward_std": 0.31674497947096825, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4722222325702508, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 47.61904761904762, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.19885714285714284, | |
| "grad_norm": 0.027324816212058067, | |
| "kl": 0.0005202293395996094, | |
| "learning_rate": 1.4904706411523448e-07, | |
| "loss": 0.1381, | |
| "reward": 0.7919853329658508, | |
| "reward_std": 0.9734821692109108, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2290.7292098999023, | |
| "dapo/avg_reward_std": 0.2796748812709536, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.41071429369705065, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 31.041666666666664, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2, | |
| "grad_norm": 0.011332061141729355, | |
| "kl": 0.000499039888381958, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0026, | |
| "reward": 0.5971913021057844, | |
| "reward_std": 0.9767839089035988, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2643.475685119629, | |
| "dapo/avg_reward_std": 0.30459834399976227, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4649122922044051, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 54.58333333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20114285714285715, | |
| "grad_norm": 0.011058920994400978, | |
| "kl": 0.0006421804428100586, | |
| "learning_rate": 1.4216149583350755e-07, | |
| "loss": 0.0243, | |
| "reward": 0.801079198718071, | |
| "reward_std": 1.0328236892819405, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2657.517364501953, | |
| "dapo/avg_reward_std": 0.268055671826005, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3072916711680591, | |
| "dapo/num_sampling_attempts": 4.0, | |
| "dapo/sampling_efficiency": 30.32738095238095, | |
| "dapo/total_prompts_processed": 24.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2022857142857143, | |
| "grad_norm": 0.012514113448560238, | |
| "kl": 0.0006227493286132812, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.066, | |
| "reward": 0.5342087037861347, | |
| "reward_std": 0.9403787776827812, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1730.2395935058594, | |
| "dapo/avg_reward_std": 0.22906314557598484, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.354838716406976, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 49.99999999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20342857142857143, | |
| "grad_norm": 0.013909725472331047, | |
| "kl": 0.0004641413688659668, | |
| "learning_rate": 1.3577281594640182e-07, | |
| "loss": -0.0032, | |
| "reward": 0.817855941131711, | |
| "reward_std": 0.9715805351734161, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1916.9652633666992, | |
| "dapo/avg_reward_std": 0.33905652307328726, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5000000085149493, | |
| "dapo/num_sampling_attempts": 2.625, | |
| "dapo/sampling_efficiency": 49.99999999999999, | |
| "dapo/total_prompts_processed": 15.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20457142857142857, | |
| "grad_norm": 0.010170280002057552, | |
| "kl": 0.00033092498779296875, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0153, | |
| "reward": 0.6332587338984013, | |
| "reward_std": 0.9844094663858414, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2013.7534942626953, | |
| "dapo/avg_reward_std": 0.4115603660282336, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5175438719360452, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 48.95833333333333, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2057142857142857, | |
| "grad_norm": 0.010059732012450695, | |
| "kl": 0.0004872828722000122, | |
| "learning_rate": 1.2988880807625927e-07, | |
| "loss": 0.012, | |
| "reward": 0.7964395936578512, | |
| "reward_std": 0.9064052030444145, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2538.3159713745117, | |
| "dapo/avg_reward_std": 0.3185795678032769, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3703703775450035, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.0, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20685714285714285, | |
| "grad_norm": 0.009190794080495834, | |
| "kl": 0.0005941390991210938, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": -0.0091, | |
| "reward": 0.6052752519026399, | |
| "reward_std": 0.9398948326706886, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1992.0277557373047, | |
| "dapo/avg_reward_std": 0.30058977752923965, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3392857238650322, | |
| "dapo/num_sampling_attempts": 3.5, | |
| "dapo/sampling_efficiency": 45.32738095238095, | |
| "dapo/total_prompts_processed": 21.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.208, | |
| "grad_norm": 0.017918387427926064, | |
| "kl": 0.00043332576751708984, | |
| "learning_rate": 1.2451664098030743e-07, | |
| "loss": 0.0782, | |
| "reward": 0.7308525424450636, | |
| "reward_std": 0.8988610878586769, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2368.312515258789, | |
| "dapo/avg_reward_std": 0.2227620858213176, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.40579710317694623, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 48.33333333333333, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.20914285714285713, | |
| "grad_norm": 0.01093615498393774, | |
| "kl": 0.0005226731300354004, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": -0.0097, | |
| "reward": 0.6296821031719446, | |
| "reward_std": 0.9496165588498116, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1855.0486297607422, | |
| "dapo/avg_reward_std": 0.3308859848976135, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4133333384990692, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.2102857142857143, | |
| "grad_norm": 0.013805963099002838, | |
| "kl": 0.0004195570945739746, | |
| "learning_rate": 1.1966285981663407e-07, | |
| "loss": 0.0542, | |
| "reward": 0.8230033777654171, | |
| "reward_std": 0.9269852489233017, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2737.260452270508, | |
| "dapo/avg_reward_std": 0.3074522775908311, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.45138889489074546, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 49.37499999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21142857142857144, | |
| "grad_norm": 0.01179632730782032, | |
| "kl": 0.0006718635559082031, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0529, | |
| "reward": 0.6228375509381294, | |
| "reward_std": 0.9775977432727814, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2526.899368286133, | |
| "dapo/avg_reward_std": 0.2964219942688942, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.48333334401249883, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 58.33333333333333, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21257142857142858, | |
| "grad_norm": 0.014796112664043903, | |
| "kl": 0.0005816221237182617, | |
| "learning_rate": 1.1533337816991931e-07, | |
| "loss": 0.088, | |
| "reward": 0.8448536917567253, | |
| "reward_std": 0.9608767181634903, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2288.274345397949, | |
| "dapo/avg_reward_std": 0.3166468055159957, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.34567901823255753, | |
| "dapo/num_sampling_attempts": 3.375, | |
| "dapo/sampling_efficiency": 40.972222222222214, | |
| "dapo/total_prompts_processed": 20.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21371428571428572, | |
| "grad_norm": 0.011898735538125038, | |
| "kl": 0.000521540641784668, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": 0.0415, | |
| "reward": 0.7687236070632935, | |
| "reward_std": 0.9334599822759628, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2432.531265258789, | |
| "dapo/avg_reward_std": 0.28751447051763535, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4513888979951541, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 53.33333333333333, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21485714285714286, | |
| "grad_norm": 0.010497819632291794, | |
| "kl": 0.0007112026214599609, | |
| "learning_rate": 1.1153347084664419e-07, | |
| "loss": 0.0185, | |
| "reward": 0.7899295631796122, | |
| "reward_std": 0.9512373134493828, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1948.9167022705078, | |
| "dapo/avg_reward_std": 0.30568089832862216, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46527779040237266, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 36.87499999999999, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.216, | |
| "grad_norm": 0.013562222942709923, | |
| "kl": 0.0006091594696044922, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0388, | |
| "reward": 0.6485428418964148, | |
| "reward_std": 0.9110815972089767, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2494.395866394043, | |
| "dapo/avg_reward_std": 0.27111421525478363, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3863636404275894, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 45.20833333333333, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21714285714285714, | |
| "grad_norm": 0.00931188277900219, | |
| "kl": 0.0006044209003448486, | |
| "learning_rate": 1.0826776744855121e-07, | |
| "loss": 0.0024, | |
| "reward": 0.5944220442324877, | |
| "reward_std": 0.9433802142739296, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2601.7569427490234, | |
| "dapo/avg_reward_std": 0.3233232215046883, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.49166667386889457, | |
| "dapo/num_sampling_attempts": 2.5, | |
| "dapo/sampling_efficiency": 49.375, | |
| "dapo/total_prompts_processed": 15.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21828571428571428, | |
| "grad_norm": 0.011869938112795353, | |
| "kl": 0.0006383061408996582, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": 0.0221, | |
| "reward": 0.5644997656345367, | |
| "reward_std": 0.9473884925246239, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1624.8541564941406, | |
| "dapo/avg_reward_std": 0.33193936944007874, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.46969697692177514, | |
| "dapo/num_sampling_attempts": 2.75, | |
| "dapo/sampling_efficiency": 44.791666666666664, | |
| "dapo/total_prompts_processed": 16.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.21942857142857142, | |
| "grad_norm": 0.011828861199319363, | |
| "kl": 0.0003381967544555664, | |
| "learning_rate": 1.0554024673218806e-07, | |
| "loss": -0.0125, | |
| "reward": 0.7034952798858285, | |
| "reward_std": 0.9275326952338219, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2333.607650756836, | |
| "dapo/avg_reward_std": 0.4260722654206412, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.6309523891125407, | |
| "dapo/num_sampling_attempts": 1.75, | |
| "dapo/sampling_efficiency": 70.83333333333333, | |
| "dapo/total_prompts_processed": 10.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22057142857142858, | |
| "grad_norm": 0.010871903039515018, | |
| "kl": 0.0005550980567932129, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": -0.004, | |
| "reward": 0.4316184278577566, | |
| "reward_std": 0.9555172920227051, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2939.9097442626953, | |
| "dapo/avg_reward_std": 0.2783619257119986, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.3846153932122084, | |
| "dapo/num_sampling_attempts": 3.25, | |
| "dapo/sampling_efficiency": 47.39583333333333, | |
| "dapo/total_prompts_processed": 19.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22171428571428572, | |
| "grad_norm": 0.014206220395863056, | |
| "kl": 0.0007078647613525391, | |
| "learning_rate": 1.0335423176140511e-07, | |
| "loss": 0.0805, | |
| "reward": 0.7283875979483128, | |
| "reward_std": 0.9719515442848206, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 1945.9653244018555, | |
| "dapo/avg_reward_std": 0.3208765654187453, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.5438596551355562, | |
| "dapo/num_sampling_attempts": 2.375, | |
| "dapo/sampling_efficiency": 60.416666666666664, | |
| "dapo/total_prompts_processed": 14.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22285714285714286, | |
| "grad_norm": 0.015090257860720158, | |
| "kl": 0.000569462776184082, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": -0.021, | |
| "reward": 0.5579635920003057, | |
| "reward_std": 0.9634370356798172, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2212.5902709960938, | |
| "dapo/avg_reward_std": 0.23615881362382105, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.2696078498573864, | |
| "dapo/num_sampling_attempts": 4.25, | |
| "dapo/sampling_efficiency": 27.916666666666664, | |
| "dapo/total_prompts_processed": 25.5, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.224, | |
| "grad_norm": 0.012650169432163239, | |
| "kl": 0.0005346536636352539, | |
| "learning_rate": 1.017123858587145e-07, | |
| "loss": 0.0756, | |
| "reward": 0.6994661018252373, | |
| "reward_std": 0.9281085133552551, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2392.7742919921875, | |
| "dapo/avg_reward_std": 0.3088900530338287, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.406666676402092, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 45.3125, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22514285714285714, | |
| "grad_norm": 0.01346337329596281, | |
| "kl": 0.0006176233291625977, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.0523, | |
| "reward": 0.6644653081893921, | |
| "reward_std": 0.9385305866599083, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2743.819465637207, | |
| "dapo/avg_reward_std": 0.3153854298591614, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4133333432674408, | |
| "dapo/num_sampling_attempts": 3.125, | |
| "dapo/sampling_efficiency": 43.75, | |
| "dapo/total_prompts_processed": 18.75, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22628571428571428, | |
| "grad_norm": 0.010797293856739998, | |
| "kl": 0.000672459602355957, | |
| "learning_rate": 1.0061670936044178e-07, | |
| "loss": 0.04, | |
| "reward": 0.5658168056979775, | |
| "reward_std": 0.9682240337133408, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2336.80558013916, | |
| "dapo/avg_reward_std": 0.3246711401835732, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.4855072530715362, | |
| "dapo/num_sampling_attempts": 2.875, | |
| "dapo/sampling_efficiency": 41.666666666666664, | |
| "dapo/total_prompts_processed": 17.25, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22742857142857142, | |
| "grad_norm": 0.011765834875404835, | |
| "kl": 0.00055694580078125, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0308, | |
| "reward": 0.6460054386407137, | |
| "reward_std": 0.9711420610547066, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_fraction": 0.0, | |
| "completion_length": 2571.1875228881836, | |
| "dapo/avg_reward_std": 0.29997331152359646, | |
| "dapo/filter_reward_index": 0.0, | |
| "dapo/kept_prompts_ratio": 0.486111119389534, | |
| "dapo/num_sampling_attempts": 3.0, | |
| "dapo/sampling_efficiency": 39.285714285714285, | |
| "dapo/total_prompts_processed": 18.0, | |
| "dapo/valid_prompts_collected": 6.0, | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.009876573458313942, | |
| "kl": 0.0005443096160888672, | |
| "learning_rate": 1.0006853717962393e-07, | |
| "loss": 0.0268, | |
| "reward": 0.5957941338419914, | |
| "reward_std": 0.992652915418148, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "step": 200, | |
| "total_flos": 0.0, | |
| "train_loss": 0.01698429927288089, | |
| "train_runtime": 137940.7556, | |
| "train_samples_per_second": 0.07, | |
| "train_steps_per_second": 0.001 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |