{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22857142857142856, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_fraction": 0.0, "completion_length": 2216.625045776367, "dapo/avg_reward_std": 0.23920068350331536, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3735632248993578, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 33.86904761904762, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.001142857142857143, "grad_norm": 0.10874509066343307, "kl": 0.0, "learning_rate": 0.0, "loss": 0.0468, "reward": 0.6486758906394243, "reward_std": 0.9342863708734512, "step": 1 }, { "clip_fraction": 0.0, "completion_length": 2926.4757690429688, "dapo/avg_reward_std": 0.24011585204040303, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3284313836518456, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 26.874999999999993, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.002285714285714286, "grad_norm": 0.12814132869243622, "kl": 0.0, "learning_rate": 1e-07, "loss": 0.0508, "reward": 0.2922485675662756, "reward_std": 0.9327598959207535, "step": 2 }, { "clip_fraction": 0.0, "completion_length": 2888.1527709960938, "dapo/avg_reward_std": 0.2903491040070852, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36111111839612325, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 36.875, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.0034285714285714284, "grad_norm": 0.1155443787574768, "kl": 2.9146671295166016e-05, "learning_rate": 2e-07, "loss": 0.0647, "reward": 0.3509849710389972, "reward_std": 0.9315856546163559, "step": 3 }, { "clip_fraction": 0.0, "completion_length": 2535.718734741211, "dapo/avg_reward_std": 0.25628158891642533, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35802469595714853, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 41.56249999999999, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.004571428571428572, "grad_norm": 0.14338600635528564, "kl": 2.1044164896011353e-05, "learning_rate": 3e-07, "loss": 0.0536, "reward": 0.5615630690008402, "reward_std": 0.9670609682798386, "step": 4 }, { "clip_fraction": 0.0, "completion_length": 2548.916702270508, "dapo/avg_reward_std": 0.2889887053391029, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40804598814454573, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 36.875, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.005714285714285714, "grad_norm": 0.10121661424636841, "kl": 2.7820467948913574e-05, "learning_rate": 4e-07, "loss": 0.0263, "reward": 0.5986085031181574, "reward_std": 0.9444186091423035, "step": 5 }, { "clip_fraction": 0.0, "completion_length": 2357.579864501953, "dapo/avg_reward_std": 0.30308351665735245, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36309524306229185, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 37.5, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.006857142857142857, "grad_norm": 0.171969935297966, "kl": 2.6032328605651855e-05, "learning_rate": 5e-07, "loss": 0.0906, "reward": 0.4527070773765445, "reward_std": 0.9109365493059158, "step": 6 }, { "clip_fraction": 0.0, "completion_length": 2404.2534790039062, "dapo/avg_reward_std": 0.3077041815828394, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41975309506610586, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 37.916666666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.008, "grad_norm": 0.12406504899263382, "kl": 1.9066035747528076e-05, "learning_rate": 6e-07, "loss": 0.0645, "reward": 0.5808906648308039, "reward_std": 0.9664968773722649, "step": 7 }, { "clip_fraction": 0.0, "completion_length": 2833.3056030273438, "dapo/avg_reward_std": 0.2214778729023472, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31182796435971416, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 36.577380952380956, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.009142857142857144, "grad_norm": 0.13480524718761444, "kl": 3.4965574741363525e-05, "learning_rate": 7e-07, "loss": 0.0738, "reward": 0.5177570842206478, "reward_std": 0.9147621840238571, "step": 8 }, { "clip_fraction": 0.0, "completion_length": 2965.6736450195312, "dapo/avg_reward_std": 0.2788830002148946, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3888888966154169, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 46.36904761904761, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.010285714285714285, "grad_norm": 0.08226096630096436, "kl": 1.4536082744598389e-05, "learning_rate": 8e-07, "loss": 0.0316, "reward": 0.5644797384738922, "reward_std": 0.9423079788684845, "step": 9 }, { "clip_fraction": 0.0, "completion_length": 2574.461814880371, "dapo/avg_reward_std": 0.3602010520065532, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.588235302883036, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 61.45833333333333, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.011428571428571429, "grad_norm": 0.1667146533727646, "kl": 2.9319897294044495e-05, "learning_rate": 9e-07, "loss": 0.0894, "reward": 0.6415909845381975, "reward_std": 0.9869548827409744, "step": 10 }, { "clip_fraction": 0.0, "completion_length": 2798.982666015625, "dapo/avg_reward_std": 0.15393146287117684, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.14880952797830105, "dapo/num_sampling_attempts": 7.0, "dapo/sampling_efficiency": 15.882936507936506, "dapo/total_prompts_processed": 42.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.012571428571428572, "grad_norm": 0.1166534572839737, "kl": 2.0567327737808228e-05, "learning_rate": 1e-06, "loss": 0.0207, "reward": 0.2987014357931912, "reward_std": 0.868266686797142, "step": 11 }, { "clip_fraction": 0.0, "completion_length": 2377.555595397949, "dapo/avg_reward_std": 0.21645361091941595, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2968750037252903, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.125, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.013714285714285714, "grad_norm": 0.23483960330486298, "kl": 3.6854296922683716e-05, "learning_rate": 9.997258721585931e-07, "loss": 0.0491, "reward": 0.6348252706229687, "reward_std": 0.9863902181386948, "step": 12 }, { "clip_fraction": 0.0, "completion_length": 2688.1111755371094, "dapo/avg_reward_std": 0.34906478971242905, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.49166667461395264, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 52.08333333333333, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.014857142857142857, "grad_norm": 0.09364266693592072, "kl": 3.152713179588318e-05, "learning_rate": 9.989038226169207e-07, "loss": 0.0431, "reward": 0.5878111608326435, "reward_std": 0.9752944633364677, "step": 13 }, { "clip_fraction": 0.0, "completion_length": 2029.9132270812988, "dapo/avg_reward_std": 0.25792322993278505, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36666666984558105, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 57.5, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.016, "grad_norm": 0.13894271850585938, "kl": 4.156678915023804e-05, "learning_rate": 9.975348529157229e-07, "loss": 0.0279, "reward": 0.5834919223561883, "reward_std": 0.9710095003247261, "step": 14 }, { "clip_fraction": 0.0, "completion_length": 2817.8576583862305, "dapo/avg_reward_std": 0.3106007158756256, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5333333484828472, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 52.08333333333333, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.017142857142857144, "grad_norm": 0.08778129518032074, "kl": 3.078579902648926e-05, "learning_rate": 9.956206309337066e-07, "loss": 0.0343, "reward": 0.6716702915728092, "reward_std": 0.99223193526268, "step": 15 }, { "clip_fraction": 0.0, "completion_length": 2570.2500076293945, "dapo/avg_reward_std": 0.244095021715531, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35897436336829114, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 44.49404761904762, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.018285714285714287, "grad_norm": 0.07460447400808334, "kl": 0.00025935471057891846, "learning_rate": 9.931634888554935e-07, "loss": 0.0146, "reward": 0.7213943339884281, "reward_std": 0.9671430364251137, "step": 16 }, { "clip_fraction": 0.0, "completion_length": 2483.413215637207, "dapo/avg_reward_std": 0.2672279636065165, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35000000496705375, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 29.166666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.019428571428571427, "grad_norm": 0.12397046387195587, "kl": 0.00022289156913757324, "learning_rate": 9.901664203302124e-07, "loss": 0.0624, "reward": 0.4952134042978287, "reward_std": 0.9074268043041229, "step": 17 }, { "clip_fraction": 0.0, "completion_length": 2537.8194580078125, "dapo/avg_reward_std": 0.34170445956681905, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5438596567040995, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02057142857142857, "grad_norm": 0.1614188253879547, "kl": 0.0003694295883178711, "learning_rate": 9.866330768241983e-07, "loss": 0.1136, "reward": 0.6263789646327496, "reward_std": 0.9367138147354126, "step": 18 }, { "clip_fraction": 0.0, "completion_length": 2041.2916984558105, "dapo/avg_reward_std": 0.23441629879402393, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31818182224577124, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 38.36805555555556, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.021714285714285714, "grad_norm": 0.2115960717201233, "kl": 0.0005898326635360718, "learning_rate": 9.825677631722435e-07, "loss": 0.0603, "reward": 0.6228582374751568, "reward_std": 0.9455358982086182, "step": 19 }, { "clip_fraction": 0.0, "completion_length": 2392.7882385253906, "dapo/avg_reward_std": 0.22908216629709516, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2952381010566439, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 33.541666666666664, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.022857142857142857, "grad_norm": 0.20383711159229279, "kl": 0.0008958578109741211, "learning_rate": 9.779754323328192e-07, "loss": 0.1313, "reward": 0.41653589624911547, "reward_std": 0.9027180448174477, "step": 20 }, { "clip_fraction": 0.0, "completion_length": 2966.260452270508, "dapo/avg_reward_std": 0.16204138861762152, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25555555986033546, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 22.84722222222222, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.024, "grad_norm": 0.1098903939127922, "kl": 0.0002017766237258911, "learning_rate": 9.728616793536587e-07, "loss": 0.0825, "reward": 0.43902475386857986, "reward_std": 0.9111825451254845, "step": 21 }, { "clip_fraction": 0.0, "completion_length": 3016.357696533203, "dapo/avg_reward_std": 0.28799043401427893, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.42028986371081806, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 52.20238095238095, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.025142857142857144, "grad_norm": 0.1315963715314865, "kl": 0.0005468130111694336, "learning_rate": 9.672327345550543e-07, "loss": 0.0657, "reward": 0.5281127206981182, "reward_std": 0.9846171587705612, "step": 22 }, { "clip_fraction": 0.0, "completion_length": 2408.8333282470703, "dapo/avg_reward_std": 0.24506365811383282, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3703703780968984, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.74404761904761, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.026285714285714287, "grad_norm": 0.12457310408353806, "kl": 0.001109391450881958, "learning_rate": 9.610954559391704e-07, "loss": 0.0304, "reward": 0.6419337540864944, "reward_std": 0.9689808040857315, "step": 23 }, { "clip_fraction": 0.0, "completion_length": 2891.7777709960938, "dapo/avg_reward_std": 0.2580765459848487, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4420289954413538, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 51.785714285714285, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.027428571428571427, "grad_norm": 0.09673310071229935, "kl": 0.0006018728017807007, "learning_rate": 9.54457320834625e-07, "loss": 0.0143, "reward": 0.4589955974370241, "reward_std": 0.9405186697840691, "step": 24 }, { "clip_fraction": 0.0, "completion_length": 2994.8159790039062, "dapo/avg_reward_std": 0.24148962597052256, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3777777850627899, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 34.99999999999999, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.02857142857142857, "grad_norm": 0.12189235538244247, "kl": 0.0021944642066955566, "learning_rate": 9.473264167865171e-07, "loss": 0.0869, "reward": 0.4214114509522915, "reward_std": 0.918621838092804, "step": 25 }, { "clip_fraction": 0.0, "completion_length": 3106.2743530273438, "dapo/avg_reward_std": 0.21211836412549018, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.23750000558793544, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 25.729166666666664, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.029714285714285714, "grad_norm": 0.11006143689155579, "kl": 0.002092994749546051, "learning_rate": 9.397114317029974e-07, "loss": 0.0617, "reward": 0.4296974149765447, "reward_std": 0.9136241301894188, "step": 26 }, { "clip_fraction": 0.0, "completion_length": 2601.388946533203, "dapo/avg_reward_std": 0.24121128850513035, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28240741416811943, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 25.76388888888889, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.030857142857142857, "grad_norm": 0.11345893889665604, "kl": 0.003206908702850342, "learning_rate": 9.316216432703916e-07, "loss": 0.0926, "reward": 0.5876726619899273, "reward_std": 0.9382903277873993, "step": 27 }, { "clip_fraction": 0.0, "completion_length": 2861.6180839538574, "dapo/avg_reward_std": 0.23961352888080809, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3055555605226093, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 27.94642857142857, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.032, "grad_norm": 0.1445908397436142, "kl": 0.0031346678733825684, "learning_rate": 9.230669076497687e-07, "loss": 0.0852, "reward": 0.40619770623743534, "reward_std": 0.9506878778338432, "step": 28 }, { "clip_fraction": 0.0, "completion_length": 2729.1875, "dapo/avg_reward_std": 0.24243796567122142, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35555555919806164, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 39.93055555555555, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03314285714285714, "grad_norm": 0.11093314737081528, "kl": 0.0027089565992355347, "learning_rate": 9.140576474687263e-07, "loss": 0.0604, "reward": 0.6693072468042374, "reward_std": 0.9926005378365517, "step": 29 }, { "clip_fraction": 0.0, "completion_length": 3155.7083740234375, "dapo/avg_reward_std": 0.222336781601752, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.311827961956301, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 42.93154761904761, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03428571428571429, "grad_norm": 0.08208812773227692, "kl": 0.001552581787109375, "learning_rate": 9.046048391230247e-07, "loss": 0.0268, "reward": 0.521108225453645, "reward_std": 0.9469912871718407, "step": 30 }, { "clip_fraction": 0.0, "completion_length": 2657.559036254883, "dapo/avg_reward_std": 0.1865689324008094, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.259259263260497, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 51.076388888888886, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.03542857142857143, "grad_norm": 0.1316680908203125, "kl": 0.009428024291992188, "learning_rate": 8.9471999940354e-07, "loss": 0.0745, "reward": 0.6315789166837931, "reward_std": 0.9327967762947083, "step": 31 }, { "clip_fraction": 0.0, "completion_length": 3071.7535095214844, "dapo/avg_reward_std": 0.3048748767375946, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40000000298023225, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 55.104166666666664, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.036571428571428574, "grad_norm": 0.10442829132080078, "kl": 0.0021753311157226562, "learning_rate": 8.844151714648274e-07, "loss": 0.0567, "reward": 0.5447857324033976, "reward_std": 0.921301856637001, "step": 32 }, { "clip_fraction": 0.0, "completion_length": 3025.826416015625, "dapo/avg_reward_std": 0.23097028769552708, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3385416748933494, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.037714285714285714, "grad_norm": 0.09167502820491791, "kl": 0.003194093704223633, "learning_rate": 8.737029101523929e-07, "loss": 0.0612, "reward": 0.5547973131760955, "reward_std": 0.9730775579810143, "step": 33 }, { "clip_fraction": 0.0, "completion_length": 2558.7812423706055, "dapo/avg_reward_std": 0.2557758816650936, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3452381007373333, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 42.113095238095234, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.038857142857142854, "grad_norm": 0.11055821925401688, "kl": 0.019285082817077637, "learning_rate": 8.625962667065487e-07, "loss": 0.0831, "reward": 0.5826370492577553, "reward_std": 0.9168377369642258, "step": 34 }, { "clip_fraction": 0.0, "completion_length": 2909.2361602783203, "dapo/avg_reward_std": 0.22593376713414345, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30645161626800416, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 49.598214285714285, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04, "grad_norm": 0.09941194951534271, "kl": 0.004673004150390625, "learning_rate": 8.511087728614862e-07, "loss": 0.0581, "reward": 0.5392080545425415, "reward_std": 0.9793680757284164, "step": 35 }, { "clip_fraction": 0.0, "completion_length": 2629.3333435058594, "dapo/avg_reward_std": 0.2632370889186859, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.338541675824672, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 29.513888888888886, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04114285714285714, "grad_norm": 0.17353901267051697, "kl": 0.010207176208496094, "learning_rate": 8.392544243589427e-07, "loss": 0.0623, "reward": 0.5811682712519541, "reward_std": 0.9331383407115936, "step": 36 }, { "clip_fraction": 0.0, "completion_length": 3220.9409790039062, "dapo/avg_reward_std": 0.2187359256403787, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29047619913305556, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 36.25, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04228571428571429, "grad_norm": 0.10708803683519363, "kl": 0.0023801326751708984, "learning_rate": 8.270476638965461e-07, "loss": 0.0657, "reward": 0.48440539091825485, "reward_std": 0.9014616012573242, "step": 37 }, { "clip_fraction": 0.0, "completion_length": 3233.420135498047, "dapo/avg_reward_std": 0.2624325007200241, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30208333721384406, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 30.119047619047617, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04342857142857143, "grad_norm": 0.0923333689570427, "kl": 0.0031156539916992188, "learning_rate": 8.145033635316128e-07, "loss": 0.053, "reward": 0.45120809972286224, "reward_std": 0.9732232913374901, "step": 38 }, { "clip_fraction": 0.0, "completion_length": 2787.031280517578, "dapo/avg_reward_std": 0.1930955442644301, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.23412698933056422, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 23.244047619047617, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.044571428571428574, "grad_norm": 0.12707453966140747, "kl": 0.006325244903564453, "learning_rate": 8.01636806561836e-07, "loss": 0.0905, "reward": 0.5048832832835615, "reward_std": 0.9330806732177734, "step": 39 }, { "clip_fraction": 0.0, "completion_length": 2921.6180572509766, "dapo/avg_reward_std": 0.25906160804960465, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3009259340663751, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 31.562499999999996, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.045714285714285714, "grad_norm": 0.1152920126914978, "kl": 0.004504203796386719, "learning_rate": 7.884636689049422e-07, "loss": 0.0443, "reward": 0.3671413380652666, "reward_std": 0.9126428663730621, "step": 40 }, { "clip_fraction": 0.0, "completion_length": 3100.8194732666016, "dapo/avg_reward_std": 0.26266304695087933, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3985507280930229, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 59.895833333333336, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.046857142857142854, "grad_norm": 0.1462322324514389, "kl": 0.0058536529541015625, "learning_rate": 7.75e-07, "loss": 0.0836, "reward": 0.6537042334675789, "reward_std": 0.9643120691180229, "step": 41 }, { "clip_fraction": 0.0, "completion_length": 3083.8541870117188, "dapo/avg_reward_std": 0.2028282030540354, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.27941177215646296, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 34.61309523809524, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.048, "grad_norm": 0.11620575189590454, "kl": 0.005963563919067383, "learning_rate": 7.612622032536507e-07, "loss": 0.0756, "reward": 0.6132493373006582, "reward_std": 0.9271278157830238, "step": 42 }, { "clip_fraction": 0.0, "completion_length": 2860.6840209960938, "dapo/avg_reward_std": 0.2537354379892349, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31666667262713116, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 27.916666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.04914285714285714, "grad_norm": 0.15706917643547058, "kl": 0.012288570404052734, "learning_rate": 7.472670160550848e-07, "loss": 0.0864, "reward": 0.4896182883530855, "reward_std": 0.9406783953309059, "step": 43 }, { "clip_fraction": 0.0, "completion_length": 3230.951416015625, "dapo/avg_reward_std": 0.2785276919603348, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4047619104385376, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 35.20833333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05028571428571429, "grad_norm": 0.10281670838594437, "kl": 0.0028905868530273438, "learning_rate": 7.330314893841101e-07, "loss": 0.0474, "reward": 0.5266857808455825, "reward_std": 0.9769049882888794, "step": 44 }, { "clip_fraction": 0.0, "completion_length": 2598.888885498047, "dapo/avg_reward_std": 0.25520460651471066, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3205128231873879, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 54.61309523809524, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05142857142857143, "grad_norm": 0.20818237960338593, "kl": 0.0046825408935546875, "learning_rate": 7.185729670371604e-07, "loss": 0.111, "reward": 0.8208948634564877, "reward_std": 0.9365335553884506, "step": 45 }, { "clip_fraction": 0.0, "completion_length": 2529.66316986084, "dapo/avg_reward_std": 0.23859836988978916, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666753590107, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 25.535714285714285, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.052571428571428575, "grad_norm": 0.12924660742282867, "kl": 0.05440711975097656, "learning_rate": 7.039090644965509e-07, "loss": 0.058, "reward": 0.5307688321918249, "reward_std": 0.9391194358468056, "step": 46 }, { "clip_fraction": 0.0, "completion_length": 2737.288230895996, "dapo/avg_reward_std": 0.25754969901052016, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3678160998327979, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 41.14583333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.053714285714285714, "grad_norm": 0.1452113687992096, "kl": 0.01877737045288086, "learning_rate": 6.890576474687263e-07, "loss": 0.0601, "reward": 0.5596560873091221, "reward_std": 0.9911476969718933, "step": 47 }, { "clip_fraction": 0.0, "completion_length": 2543.0694885253906, "dapo/avg_reward_std": 0.2434165603839434, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3717948794364929, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 34.37499999999999, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.054857142857142854, "grad_norm": 0.15664616227149963, "kl": 0.008816719055175781, "learning_rate": 6.740368101176495e-07, "loss": 0.0783, "reward": 0.7667456082999706, "reward_std": 0.9330208897590637, "step": 48 }, { "clip_fraction": 0.0, "completion_length": 3054.357666015625, "dapo/avg_reward_std": 0.16933719928448016, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22222222693455526, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 26.5625, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.056, "grad_norm": 0.13884593546390533, "kl": 0.00569915771484375, "learning_rate": 6.588648530198504e-07, "loss": 0.0645, "reward": 0.7750914767384529, "reward_std": 0.9781928732991219, "step": 49 }, { "clip_fraction": 0.0, "completion_length": 3030.9652709960938, "dapo/avg_reward_std": 0.2089548914721518, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28282828629016876, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 33.779761904761905, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05714285714285714, "grad_norm": 0.13095000386238098, "kl": 0.005908966064453125, "learning_rate": 6.435602608679916e-07, "loss": 0.0854, "reward": 0.7626989148557186, "reward_std": 0.9684056863188744, "step": 50 }, { "clip_fraction": 0.0, "completion_length": 3176.8819274902344, "dapo/avg_reward_std": 0.2258962235516972, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29629630057348144, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 33.25892857142857, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05828571428571429, "grad_norm": 0.11041354387998581, "kl": 0.002262115478515625, "learning_rate": 6.281416799501187e-07, "loss": 0.0892, "reward": 0.6493857521563768, "reward_std": 0.9608959034085274, "step": 51 }, { "clip_fraction": 0.0, "completion_length": 2991.208366394043, "dapo/avg_reward_std": 0.23346692004374095, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3392857201397419, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 52.70833333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.05942857142857143, "grad_norm": 0.13827170431613922, "kl": 0.014558792114257812, "learning_rate": 6.126278954320294e-07, "loss": 0.0435, "reward": 0.5274152141064405, "reward_std": 0.9937505125999451, "step": 52 }, { "clip_fraction": 0.0, "completion_length": 2921.013946533203, "dapo/avg_reward_std": 0.2715419438378564, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3390804626818361, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 51.5625, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.060571428571428575, "grad_norm": 0.09735170006752014, "kl": 0.009172439575195312, "learning_rate": 5.97037808470444e-07, "loss": 0.0541, "reward": 0.7217882052063942, "reward_std": 0.9594404622912407, "step": 53 }, { "clip_fraction": 0.0, "completion_length": 3133.46875, "dapo/avg_reward_std": 0.2624934350068753, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35256411077884525, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 41.041666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.061714285714285715, "grad_norm": 0.10414379835128784, "kl": 0.010915756225585938, "learning_rate": 5.813904131848564e-07, "loss": 0.061, "reward": 0.5302782151848078, "reward_std": 0.9707583636045456, "step": 54 }, { "clip_fraction": 0.0, "completion_length": 3010.5938110351562, "dapo/avg_reward_std": 0.21664191484451295, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24444444941149818, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 19.791666666666664, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06285714285714286, "grad_norm": 0.11232081800699234, "kl": 0.012262344360351562, "learning_rate": 5.657047735161255e-07, "loss": 0.0561, "reward": 0.5284321270883083, "reward_std": 0.9165859594941139, "step": 55 }, { "clip_fraction": 0.0, "completion_length": 3144.951416015625, "dapo/avg_reward_std": 0.2279102834207671, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34523809807641165, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 45.32738095238095, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.064, "grad_norm": 0.13161872327327728, "kl": 0.007735252380371094, "learning_rate": 5.5e-07, "loss": 0.0717, "reward": 0.6519734226167202, "reward_std": 0.9642440155148506, "step": 56 }, { "clip_fraction": 0.0, "completion_length": 3222.6111450195312, "dapo/avg_reward_std": 0.2675224413042483, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4492753724689069, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 45.535714285714285, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06514285714285714, "grad_norm": 0.09332293272018433, "kl": 0.0064525604248046875, "learning_rate": 5.342952264838747e-07, "loss": 0.0302, "reward": 0.5501165799796581, "reward_std": 0.9585564360022545, "step": 57 }, { "clip_fraction": 0.0, "completion_length": 2679.9236907958984, "dapo/avg_reward_std": 0.17708626160254845, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24358974741055414, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 28.91865079365079, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06628571428571428, "grad_norm": 0.16309793293476105, "kl": 0.01690673828125, "learning_rate": 5.186095868151436e-07, "loss": 0.0846, "reward": 0.8469000309705734, "reward_std": 0.9497043192386627, "step": 58 }, { "clip_fraction": 0.0, "completion_length": 2847.048629760742, "dapo/avg_reward_std": 0.2622834824282548, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3563218476443455, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 29.999999999999993, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06742857142857143, "grad_norm": 0.09638360142707825, "kl": 0.0057086944580078125, "learning_rate": 5.02962191529556e-07, "loss": 0.0634, "reward": 0.6089529246091843, "reward_std": 0.9450863003730774, "step": 59 }, { "clip_fraction": 0.0, "completion_length": 3119.9132385253906, "dapo/avg_reward_std": 0.19833819533503333, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2674418656631958, "dapo/num_sampling_attempts": 5.375, "dapo/sampling_efficiency": 29.563492063492063, "dapo/total_prompts_processed": 32.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06857142857142857, "grad_norm": 0.1252850890159607, "kl": 0.008715629577636719, "learning_rate": 4.873721045679706e-07, "loss": 0.0666, "reward": 0.5249154977500439, "reward_std": 0.947566568851471, "step": 60 }, { "clip_fraction": 0.0, "completion_length": 2844.795181274414, "dapo/avg_reward_std": 0.2648707001373686, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35632184610284606, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 39.791666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.06971428571428571, "grad_norm": 0.10366301238536835, "kl": 0.056069374084472656, "learning_rate": 4.7185832004988133e-07, "loss": 0.037, "reward": 0.5161248315125704, "reward_std": 0.9692364558577538, "step": 61 }, { "clip_fraction": 0.0, "completion_length": 3053.951446533203, "dapo/avg_reward_std": 0.21576767837679064, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25225225574261434, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 37.013888888888886, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07085714285714285, "grad_norm": 0.14441759884357452, "kl": 0.009164810180664062, "learning_rate": 4.5643973913200837e-07, "loss": 0.0609, "reward": 0.6510533541440964, "reward_std": 0.9361515268683434, "step": 62 }, { "clip_fraction": 0.0, "completion_length": 3326.781280517578, "dapo/avg_reward_std": 0.2158982500885472, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3181818254066236, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 44.49404761904761, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.072, "grad_norm": 0.12127737700939178, "kl": 0.031108856201171875, "learning_rate": 4.4113514698014953e-07, "loss": 0.0463, "reward": 0.45860649459064007, "reward_std": 0.9209225550293922, "step": 63 }, { "clip_fraction": 0.0, "completion_length": 3208.6319885253906, "dapo/avg_reward_std": 0.28419332668699065, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3563218440475135, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 53.591269841269835, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07314285714285715, "grad_norm": 0.13326792418956757, "kl": 0.0061321258544921875, "learning_rate": 4.2596318988235037e-07, "loss": 0.0614, "reward": 0.5644803196191788, "reward_std": 0.9919605851173401, "step": 64 }, { "clip_fraction": 0.0, "completion_length": 2597.437530517578, "dapo/avg_reward_std": 0.2766759342380932, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3928571529686451, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 32.08333333333333, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07428571428571429, "grad_norm": 0.10434358566999435, "kl": 0.049472808837890625, "learning_rate": 4.1094235253127374e-07, "loss": 0.0312, "reward": 0.393868962302804, "reward_std": 0.9459580257534981, "step": 65 }, { "clip_fraction": 0.0, "completion_length": 2630.0833587646484, "dapo/avg_reward_std": 0.25837596147148695, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35802469595714853, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 31.666666666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07542857142857143, "grad_norm": 0.11327924579381943, "kl": 0.23560714721679688, "learning_rate": 3.9609093550344907e-07, "loss": 0.0563, "reward": 0.674448698759079, "reward_std": 0.9591537117958069, "step": 66 }, { "clip_fraction": 0.0, "completion_length": 3343.3159790039062, "dapo/avg_reward_std": 0.2785816714167595, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41666667101283866, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 42.08333333333333, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07657142857142857, "grad_norm": 0.10341926664113998, "kl": 0.005463600158691406, "learning_rate": 3.8142703296283953e-07, "loss": 0.0653, "reward": 0.42072685062885284, "reward_std": 0.9649706333875656, "step": 67 }, { "clip_fraction": 0.0, "completion_length": 2880.0590438842773, "dapo/avg_reward_std": 0.2447407204243872, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666728754838, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 28.591269841269842, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07771428571428571, "grad_norm": 0.15764088928699493, "kl": 0.011991500854492188, "learning_rate": 3.6696851061588994e-07, "loss": 0.1004, "reward": 0.537701515480876, "reward_std": 0.9107673466205597, "step": 68 }, { "clip_fraction": 0.0, "completion_length": 2839.0069580078125, "dapo/avg_reward_std": 0.21828406437849388, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26495726865071517, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 32.39583333333333, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.07885714285714286, "grad_norm": 0.1426348239183426, "kl": 0.16588592529296875, "learning_rate": 3.5273298394491515e-07, "loss": 0.065, "reward": 0.5752462260425091, "reward_std": 0.9265653118491173, "step": 69 }, { "clip_fraction": 0.0, "completion_length": 3154.9479370117188, "dapo/avg_reward_std": 0.24686445650600253, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40476191185769583, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 58.75, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08, "grad_norm": 0.09042708575725555, "kl": 0.015224456787109375, "learning_rate": 3.387377967463493e-07, "loss": 0.0278, "reward": 0.5091124139726162, "reward_std": 0.9951601624488831, "step": 70 }, { "clip_fraction": 0.0, "completion_length": 2558.7118377685547, "dapo/avg_reward_std": 0.24922772922686168, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35119048452803064, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 41.979166666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08114285714285714, "grad_norm": 0.18424691259860992, "kl": 0.012338638305664062, "learning_rate": 3.250000000000001e-07, "loss": 0.135, "reward": 0.80832345969975, "reward_std": 0.9256910160183907, "step": 71 }, { "clip_fraction": 0.0, "completion_length": 2797.5659790039062, "dapo/avg_reward_std": 0.3421325541677929, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4603174655210404, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 52.916666666666664, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08228571428571428, "grad_norm": 0.10505988448858261, "kl": 0.027385711669921875, "learning_rate": 3.115363310950578e-07, "loss": 0.0435, "reward": 0.5198174491524696, "reward_std": 0.932801865041256, "step": 72 }, { "clip_fraction": 0.0, "completion_length": 3024.5243225097656, "dapo/avg_reward_std": 0.26287247288611626, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334038334506, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 40.0297619047619, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08342857142857144, "grad_norm": 0.09084703773260117, "kl": 0.09223747253417969, "learning_rate": 2.9836319343816397e-07, "loss": 0.0314, "reward": 0.3449883237481117, "reward_std": 0.9521737843751907, "step": 73 }, { "clip_fraction": 0.0, "completion_length": 2648.7257080078125, "dapo/avg_reward_std": 0.2678213362340574, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38271605582148943, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 40.0, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08457142857142858, "grad_norm": 0.15155129134655, "kl": 1.0743579864501953, "learning_rate": 2.854966364683872e-07, "loss": 0.0851, "reward": 0.7227161657065153, "reward_std": 0.9239719212055206, "step": 74 }, { "clip_fraction": 0.0, "completion_length": 2659.388900756836, "dapo/avg_reward_std": 0.28101804742106684, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37037037699310865, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 34.791666666666664, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08571428571428572, "grad_norm": 0.1127755343914032, "kl": 0.02587890625, "learning_rate": 2.729523361034538e-07, "loss": 0.0523, "reward": 0.7372388476505876, "reward_std": 0.918749064207077, "step": 75 }, { "clip_fraction": 0.0, "completion_length": 2402.364585876465, "dapo/avg_reward_std": 0.26893362632164586, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36538462111583125, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 48.854166666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08685714285714285, "grad_norm": 0.14693324267864227, "kl": 0.12501144409179688, "learning_rate": 2.6074557564105724e-07, "loss": 0.0747, "reward": 0.6182113699615002, "reward_std": 0.9421844929456711, "step": 76 }, { "clip_fraction": 0.0, "completion_length": 2970.1146392822266, "dapo/avg_reward_std": 0.2118390180170536, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25000000521540644, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 30.53571428571428, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.088, "grad_norm": 0.12072475999593735, "kl": 0.05495643615722656, "learning_rate": 2.488912271385139e-07, "loss": 0.0498, "reward": 0.46035338938236237, "reward_std": 0.9146044701337814, "step": 77 }, { "clip_fraction": 0.0, "completion_length": 2959.0972442626953, "dapo/avg_reward_std": 0.13832776496807733, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.1631944477558136, "dapo/num_sampling_attempts": 6.0, "dapo/sampling_efficiency": 30.868055555555557, "dapo/total_prompts_processed": 36.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.08914285714285715, "grad_norm": 0.14289411902427673, "kl": 0.23297691345214844, "learning_rate": 2.374037332934512e-07, "loss": 0.0742, "reward": 0.49553669430315495, "reward_std": 0.9023259580135345, "step": 78 }, { "clip_fraction": 0.0, "completion_length": 2935.8159942626953, "dapo/avg_reward_std": 0.2931290553374724, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.44696970080787485, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 58.854166666666664, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09028571428571429, "grad_norm": 0.13638050854206085, "kl": 0.03482818603515625, "learning_rate": 2.2629708984760706e-07, "loss": 0.0609, "reward": 0.4563083341345191, "reward_std": 0.9425384849309921, "step": 79 }, { "clip_fraction": 0.0, "completion_length": 3111.340301513672, "dapo/avg_reward_std": 0.22562272967518987, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3063063154349456, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 28.819444444444446, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09142857142857143, "grad_norm": 0.10739335417747498, "kl": 0.008031845092773438, "learning_rate": 2.1558482853517253e-07, "loss": 0.0574, "reward": 0.6980459969490767, "reward_std": 0.9673654958605766, "step": 80 }, { "clip_fraction": 0.0, "completion_length": 2921.6111450195312, "dapo/avg_reward_std": 0.2788313144239886, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333386429425, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 34.27083333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09257142857142857, "grad_norm": 0.18038466572761536, "kl": 0.016963958740234375, "learning_rate": 2.0528000059645995e-07, "loss": 0.0958, "reward": 0.6405055914074183, "reward_std": 0.9560460075736046, "step": 81 }, { "clip_fraction": 0.0, "completion_length": 3220.687530517578, "dapo/avg_reward_std": 0.1744266465688363, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2307692349721224, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 26.666666666666664, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09371428571428571, "grad_norm": 0.12377161532640457, "kl": 0.009552001953125, "learning_rate": 1.9539516087697517e-07, "loss": 0.061, "reward": 0.5073397234082222, "reward_std": 0.9641925543546677, "step": 82 }, { "clip_fraction": 0.0, "completion_length": 2663.1597442626953, "dapo/avg_reward_std": 0.2496542421079451, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333413447103, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 40.451388888888886, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09485714285714286, "grad_norm": 0.1273493468761444, "kl": 0.04001617431640625, "learning_rate": 1.8594235253127372e-07, "loss": 0.0521, "reward": 0.49824655149132013, "reward_std": 0.9464590474963188, "step": 83 }, { "clip_fraction": 0.0, "completion_length": 3073.2986450195312, "dapo/avg_reward_std": 0.27911247177557513, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4318181892687624, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 60.3125, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.096, "grad_norm": 0.14399568736553192, "kl": 0.010408401489257812, "learning_rate": 1.7693309235023127e-07, "loss": 0.0657, "reward": 0.624765045940876, "reward_std": 0.954634428024292, "step": 84 }, { "clip_fraction": 0.0, "completion_length": 3073.7535095214844, "dapo/avg_reward_std": 0.17655213298024358, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24324324888152046, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 29.82142857142857, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09714285714285714, "grad_norm": 0.12462300807237625, "kl": 0.007053375244140625, "learning_rate": 1.6837835672960831e-07, "loss": 0.062, "reward": 0.6820014184340835, "reward_std": 0.8695997595787048, "step": 85 }, { "clip_fraction": 0.0, "completion_length": 2741.204849243164, "dapo/avg_reward_std": 0.21997538357973098, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2666666731238365, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 27.896825396825395, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09828571428571428, "grad_norm": 0.14978615939617157, "kl": 0.025630950927734375, "learning_rate": 1.6028856829700258e-07, "loss": 0.0585, "reward": 0.5304304007440805, "reward_std": 0.9523463025689125, "step": 86 }, { "clip_fraction": 0.0, "completion_length": 3223.7257080078125, "dapo/avg_reward_std": 0.27104776600996655, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3666666716337204, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 43.333333333333336, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.09942857142857142, "grad_norm": 0.1086694523692131, "kl": 0.009660720825195312, "learning_rate": 1.5267358321348285e-07, "loss": 0.058, "reward": 0.5936380252242088, "reward_std": 0.919317290186882, "step": 87 }, { "clip_fraction": 0.0, "completion_length": 2934.5833740234375, "dapo/avg_reward_std": 0.23462909049001232, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333376152762, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 52.84722222222222, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10057142857142858, "grad_norm": 0.14571106433868408, "kl": 0.02588653564453125, "learning_rate": 1.4554267916537495e-07, "loss": 0.0741, "reward": 0.5716092269867659, "reward_std": 0.9475584626197815, "step": 88 }, { "clip_fraction": 0.0, "completion_length": 3017.2673950195312, "dapo/avg_reward_std": 0.22858241697152457, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333383003871, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 45.416666666666664, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10171428571428572, "grad_norm": 0.10647116601467133, "kl": 0.034389495849609375, "learning_rate": 1.3890454406082956e-07, "loss": 0.0586, "reward": 0.5356123449746519, "reward_std": 0.9426311627030373, "step": 89 }, { "clip_fraction": 0.0, "completion_length": 2540.9548950195312, "dapo/avg_reward_std": 0.16863613526026408, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22592593100335862, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 28.75, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10285714285714286, "grad_norm": 0.1207195371389389, "kl": 0.7361793518066406, "learning_rate": 1.3276726544494571e-07, "loss": 0.0349, "reward": 0.750616230070591, "reward_std": 1.0088519006967545, "step": 90 }, { "clip_fraction": 0.0, "completion_length": 3054.5833435058594, "dapo/avg_reward_std": 0.2058313423767686, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28645834047347307, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 41.36904761904762, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.104, "grad_norm": 0.10659411549568176, "kl": 0.009166717529296875, "learning_rate": 1.2713832064634125e-07, "loss": 0.06, "reward": 0.49192704539746046, "reward_std": 0.8957021087408066, "step": 91 }, { "clip_fraction": 0.0, "completion_length": 2958.43408203125, "dapo/avg_reward_std": 0.317311546076899, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.47101450160793634, "dapo/num_sampling_attempts": 2.875, "dapo/sampling_efficiency": 44.166666666666664, "dapo/total_prompts_processed": 17.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10514285714285715, "grad_norm": 0.1002211645245552, "kl": 0.00801849365234375, "learning_rate": 1.220245676671809e-07, "loss": 0.0508, "reward": 0.7598672257736325, "reward_std": 0.9218961223959923, "step": 92 }, { "clip_fraction": 0.0, "completion_length": 3257.7881774902344, "dapo/avg_reward_std": 0.2586492033941405, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36309524678758215, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 41.5625, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10628571428571429, "grad_norm": 0.12036111950874329, "kl": 0.01373291015625, "learning_rate": 1.1743223682775649e-07, "loss": 0.0459, "reward": 0.5575436241924763, "reward_std": 0.9431066736578941, "step": 93 }, { "clip_fraction": 0.0, "completion_length": 2740.1284942626953, "dapo/avg_reward_std": 0.2375115204241968, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35483871688765867, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 29.999999999999996, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10742857142857143, "grad_norm": 0.14863841235637665, "kl": 0.032642364501953125, "learning_rate": 1.1336692317580158e-07, "loss": 0.0742, "reward": 0.5738632343709469, "reward_std": 0.9468542039394379, "step": 94 }, { "clip_fraction": 0.0, "completion_length": 2899.937515258789, "dapo/avg_reward_std": 0.2901096656208947, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4206349246558689, "dapo/num_sampling_attempts": 2.625, "dapo/sampling_efficiency": 64.58333333333333, "dapo/total_prompts_processed": 15.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10857142857142857, "grad_norm": 0.13841120898723602, "kl": 0.012683868408203125, "learning_rate": 1.0983357966978745e-07, "loss": 0.0653, "reward": 0.6555321607738733, "reward_std": 0.9674765914678574, "step": 95 }, { "clip_fraction": 0.0, "completion_length": 2926.1910247802734, "dapo/avg_reward_std": 0.18252932499436772, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2696078485425781, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 37.82738095238095, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.10971428571428571, "grad_norm": 0.13530230522155762, "kl": 0.05282402038574219, "learning_rate": 1.068365111445064e-07, "loss": 0.0762, "reward": 0.5449853939935565, "reward_std": 0.952080488204956, "step": 96 }, { "clip_fraction": 0.0, "completion_length": 2798.031280517578, "dapo/avg_reward_std": 0.23633464597738707, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3461538478732109, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 38.541666666666664, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11085714285714286, "grad_norm": 0.1648494303226471, "kl": 0.025691986083984375, "learning_rate": 1.0437936906629334e-07, "loss": 0.0939, "reward": 0.673285935074091, "reward_std": 0.979133740067482, "step": 97 }, { "clip_fraction": 0.0, "completion_length": 3240.7361450195312, "dapo/avg_reward_std": 0.2805523918225215, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3653846193964665, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 54.513888888888886, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.112, "grad_norm": 0.12132810056209564, "kl": 0.01453399658203125, "learning_rate": 1.0246514708427701e-07, "loss": 0.0557, "reward": 0.5335402796044946, "reward_std": 0.9456770345568657, "step": 98 }, { "clip_fraction": 0.0, "completion_length": 2497.9132499694824, "dapo/avg_reward_std": 0.2488528937101364, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3888888942698638, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 58.05555555555555, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11314285714285714, "grad_norm": 0.24999241530895233, "kl": 0.028301239013671875, "learning_rate": 1.0109617738307911e-07, "loss": 0.1037, "reward": 0.785055335611105, "reward_std": 0.9553829357028008, "step": 99 }, { "clip_fraction": 0.0, "completion_length": 3039.6284790039062, "dapo/avg_reward_std": 0.2903642791012923, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38194445086022216, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 46.24999999999999, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11428571428571428, "grad_norm": 0.14126254618167877, "kl": 0.014410018920898438, "learning_rate": 1.002741278414069e-07, "loss": 0.0643, "reward": 0.4948624651879072, "reward_std": 0.9704382866621017, "step": 100 }, { "clip_fraction": 0.0, "completion_length": 3318.513916015625, "dapo/avg_reward_std": 0.22042016812733242, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.29523810063089645, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 28.645833333333332, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11542857142857142, "grad_norm": 0.22150926291942596, "kl": 0.011791229248046875, "learning_rate": 1e-07, "loss": 0.0631, "reward": 0.46524661034345627, "reward_std": 0.9665903598070145, "step": 101 }, { "clip_fraction": 0.0, "completion_length": 3083.875, "dapo/avg_reward_std": 0.21663353669232335, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3390804637095024, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 39.93055555555555, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11657142857142858, "grad_norm": 0.16289636492729187, "kl": 0.008695602416992188, "learning_rate": 6.203955092681039e-07, "loss": 0.098, "reward": 0.8642945289611816, "reward_std": 1.031830094754696, "step": 102 }, { "clip_fraction": 0.0, "completion_length": 3364.701446533203, "dapo/avg_reward_std": 0.24887267331923207, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3172043090866458, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 31.69642857142857, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11771428571428572, "grad_norm": 0.08825232833623886, "kl": 0.009820938110351562, "learning_rate": 6.126278954320294e-07, "loss": 0.0178, "reward": 0.3627179069444537, "reward_std": 0.8941863179206848, "step": 103 }, { "clip_fraction": 0.0, "completion_length": 3255.3055725097656, "dapo/avg_reward_std": 0.24808817549988074, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33950618074999916, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.11885714285714286, "grad_norm": 0.13638561964035034, "kl": 0.011318206787109375, "learning_rate": 6.048412045323164e-07, "loss": 0.0643, "reward": 0.5508436523377895, "reward_std": 0.9409585371613503, "step": 104 }, { "clip_fraction": 0.0, "completion_length": 3270.4930419921875, "dapo/avg_reward_std": 0.23700118958950042, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3166666706403097, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 61.07142857142857, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12, "grad_norm": 0.10357476025819778, "kl": 0.0117034912109375, "learning_rate": 5.97037808470444e-07, "loss": 0.0278, "reward": 0.4137148158624768, "reward_std": 0.9205853268504143, "step": 105 }, { "clip_fraction": 0.0, "completion_length": 3118.9584045410156, "dapo/avg_reward_std": 0.22452521603554487, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333395421505, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 28.869047619047613, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12114285714285715, "grad_norm": 0.11885393410921097, "kl": 0.011783599853515625, "learning_rate": 5.892200842364462e-07, "loss": 0.0786, "reward": 0.673494272865355, "reward_std": 0.9388571679592133, "step": 106 }, { "clip_fraction": 0.0, "completion_length": 3183.666717529297, "dapo/avg_reward_std": 0.23609773551716523, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30882353467099805, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 37.74305555555556, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12228571428571429, "grad_norm": 0.13629400730133057, "kl": 0.0092010498046875, "learning_rate": 5.813904131848564e-07, "loss": 0.0615, "reward": 0.5680118557065725, "reward_std": 0.8982010260224342, "step": 107 }, { "clip_fraction": 0.0, "completion_length": 3170.263916015625, "dapo/avg_reward_std": 0.21017570431168014, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3018018079770578, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 30.625, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12342857142857143, "grad_norm": 0.1134539544582367, "kl": 0.010692596435546875, "learning_rate": 5.735511803093248e-07, "loss": 0.0433, "reward": 0.6368884779512882, "reward_std": 0.9655679985880852, "step": 108 }, { "clip_fraction": 0.0, "completion_length": 2938.5243530273438, "dapo/avg_reward_std": 0.30796096875117374, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3974359052685591, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12457142857142857, "grad_norm": 0.16064728796482086, "kl": 0.014812469482421875, "learning_rate": 5.657047735161255e-07, "loss": 0.0874, "reward": 0.4405923653393984, "reward_std": 0.899710550904274, "step": 109 }, { "clip_fraction": 0.0, "completion_length": 3333.5556030273438, "dapo/avg_reward_std": 0.17683410130698105, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28735632475080164, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 40.104166666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12571428571428572, "grad_norm": 0.1374766230583191, "kl": 0.00823211669921875, "learning_rate": 5.578535828967777e-07, "loss": 0.0525, "reward": 0.6373127717524767, "reward_std": 0.949370414018631, "step": 110 }, { "clip_fraction": 0.0, "completion_length": 3404.166717529297, "dapo/avg_reward_std": 0.2707539377734065, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3437500074505806, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 28.124999999999996, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12685714285714286, "grad_norm": 0.09096160531044006, "kl": 0.0152435302734375, "learning_rate": 5.5e-07, "loss": 0.0286, "reward": 0.4166172882542014, "reward_std": 0.9417606145143509, "step": 111 }, { "clip_fraction": 0.0, "completion_length": 3306.263946533203, "dapo/avg_reward_std": 0.17227381931410896, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.21481482055452134, "dapo/num_sampling_attempts": 5.625, "dapo/sampling_efficiency": 27.395833333333332, "dapo/total_prompts_processed": 33.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.128, "grad_norm": 0.11950567364692688, "kl": 0.01320648193359375, "learning_rate": 5.421464171032224e-07, "loss": 0.0449, "reward": 0.4937558462843299, "reward_std": 0.9720155894756317, "step": 112 }, { "clip_fraction": 0.0, "completion_length": 3117.1979064941406, "dapo/avg_reward_std": 0.30339551545106447, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3846153886272357, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 38.95833333333333, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.12914285714285714, "grad_norm": 0.15823398530483246, "kl": 0.01418304443359375, "learning_rate": 5.342952264838747e-07, "loss": 0.0743, "reward": 0.5596551271155477, "reward_std": 0.8979872986674309, "step": 113 }, { "clip_fraction": 0.0, "completion_length": 3239.031280517578, "dapo/avg_reward_std": 0.24120492219924927, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34000000298023225, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 56.770833333333336, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13028571428571428, "grad_norm": 0.20106364786624908, "kl": 0.01206207275390625, "learning_rate": 5.264488196906752e-07, "loss": 0.0817, "reward": 0.697497084736824, "reward_std": 0.9489930346608162, "step": 114 }, { "clip_fraction": 0.0, "completion_length": 3197.2430725097656, "dapo/avg_reward_std": 0.20663932577157632, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26495727056112045, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 38.4375, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13142857142857142, "grad_norm": 0.15399962663650513, "kl": 0.015567779541015625, "learning_rate": 5.186095868151436e-07, "loss": 0.0667, "reward": 0.5802914081141353, "reward_std": 0.9295158162713051, "step": 115 }, { "clip_fraction": 0.0, "completion_length": 3272.6007080078125, "dapo/avg_reward_std": 0.22710687816143035, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3166666701436043, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 37.61904761904762, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13257142857142856, "grad_norm": 0.140142023563385, "kl": 0.01934814453125, "learning_rate": 5.107799157635538e-07, "loss": 0.0611, "reward": 0.6176847349852324, "reward_std": 0.944318100810051, "step": 116 }, { "clip_fraction": 0.0, "completion_length": 3268.4305725097656, "dapo/avg_reward_std": 0.23266587586238466, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.344827591345228, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 38.125, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1337142857142857, "grad_norm": 0.1582440286874771, "kl": 0.01198577880859375, "learning_rate": 5.02962191529556e-07, "loss": 0.0556, "reward": 0.5785031230188906, "reward_std": 0.954645112156868, "step": 117 }, { "clip_fraction": 0.0, "completion_length": 2941.9722595214844, "dapo/avg_reward_std": 0.24969401342027328, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3284313814604984, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 27.20238095238095, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13485714285714287, "grad_norm": 0.1869765818119049, "kl": 0.01676177978515625, "learning_rate": 4.951587954676837e-07, "loss": 0.1063, "reward": 0.6486848145723343, "reward_std": 0.9332743212580681, "step": 118 }, { "clip_fraction": 0.0, "completion_length": 3206.982635498047, "dapo/avg_reward_std": 0.20580977627209254, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.26666667333671024, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 41.28472222222222, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.136, "grad_norm": 0.13004696369171143, "kl": 0.015842437744140625, "learning_rate": 4.873721045679706e-07, "loss": 0.0453, "reward": 0.4798949249088764, "reward_std": 0.9390313774347305, "step": 119 }, { "clip_fraction": 0.0, "completion_length": 3015.545135498047, "dapo/avg_reward_std": 0.22217401381461852, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3548387149649282, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 28.95833333333333, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13714285714285715, "grad_norm": 0.229897141456604, "kl": 0.02198028564453125, "learning_rate": 4.79604490731896e-07, "loss": 0.0749, "reward": 0.7311479561030865, "reward_std": 0.9607837572693825, "step": 120 }, { "clip_fraction": 0.0, "completion_length": 3098.656280517578, "dapo/avg_reward_std": 0.22588159143924713, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32777778506278993, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 44.613095238095234, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1382857142857143, "grad_norm": 0.13800247013568878, "kl": 0.014202117919921875, "learning_rate": 4.7185832004988133e-07, "loss": 0.0814, "reward": 0.8461479842662811, "reward_std": 0.9660850539803505, "step": 121 }, { "clip_fraction": 0.0, "completion_length": 3064.3924255371094, "dapo/avg_reward_std": 0.16500467896461488, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.19666667193174361, "dapo/num_sampling_attempts": 6.25, "dapo/sampling_efficiency": 21.07142857142857, "dapo/total_prompts_processed": 37.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.13942857142857143, "grad_norm": 0.1680934727191925, "kl": 0.01361083984375, "learning_rate": 4.641359520805548e-07, "loss": 0.066, "reward": 0.7812346797436476, "reward_std": 0.9529108256101608, "step": 122 }, { "clip_fraction": 0.0, "completion_length": 3097.4861755371094, "dapo/avg_reward_std": 0.22939075000824466, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334038334506, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 33.75, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14057142857142857, "grad_norm": 0.18081900477409363, "kl": 0.014842987060546875, "learning_rate": 4.5643973913200837e-07, "loss": 0.0877, "reward": 0.7531900368630886, "reward_std": 0.9868133068084717, "step": 123 }, { "clip_fraction": 0.0, "completion_length": 3203.888885498047, "dapo/avg_reward_std": 0.24352495979379724, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35185185737080044, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 43.05555555555556, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1417142857142857, "grad_norm": 0.16807734966278076, "kl": 0.0139007568359375, "learning_rate": 4.4877202554526084e-07, "loss": 0.0612, "reward": 0.715996683575213, "reward_std": 0.9595553278923035, "step": 124 }, { "clip_fraction": 0.0, "completion_length": 2885.5625610351562, "dapo/avg_reward_std": 0.2548297820612788, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31770833814516664, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 27.20238095238095, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14285714285714285, "grad_norm": 0.16355834901332855, "kl": 0.02027130126953125, "learning_rate": 4.4113514698014953e-07, "loss": 0.0597, "reward": 0.8311022147536278, "reward_std": 0.9600836709141731, "step": 125 }, { "clip_fraction": 0.0, "completion_length": 3250.843780517578, "dapo/avg_reward_std": 0.2203440727858708, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32758621152105005, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 46.770833333333336, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.144, "grad_norm": 0.18190248310565948, "kl": 0.0158843994140625, "learning_rate": 4.3353142970386557e-07, "loss": 0.068, "reward": 0.7400151332840323, "reward_std": 0.9569809287786484, "step": 126 }, { "clip_fraction": 0.0, "completion_length": 3264.420166015625, "dapo/avg_reward_std": 0.25137073759521755, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.41666667429464205, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 40.11904761904761, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14514285714285713, "grad_norm": 0.17950685322284698, "kl": 0.0223236083984375, "learning_rate": 4.2596318988235037e-07, "loss": 0.0528, "reward": 0.5194851458072662, "reward_std": 0.9414050430059433, "step": 127 }, { "clip_fraction": 0.0, "completion_length": 2892.9132690429688, "dapo/avg_reward_std": 0.2416491061449051, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2631579002267436, "dapo/num_sampling_attempts": 4.75, "dapo/sampling_efficiency": 26.9047619047619, "dapo/total_prompts_processed": 28.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1462857142857143, "grad_norm": 0.25602471828460693, "kl": 0.02016448974609375, "learning_rate": 4.1843273287476854e-07, "loss": 0.0933, "reward": 0.8592288717627525, "reward_std": 0.9212958365678787, "step": 128 }, { "clip_fraction": 0.0, "completion_length": 3146.6944580078125, "dapo/avg_reward_std": 0.22558308675371366, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3218390854268238, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 54.07738095238095, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14742857142857144, "grad_norm": 0.21352027356624603, "kl": 0.0198211669921875, "learning_rate": 4.1094235253127374e-07, "loss": 0.0679, "reward": 0.5732525363564491, "reward_std": 0.9645283669233322, "step": 129 }, { "clip_fraction": 0.0, "completion_length": 3248.4236450195312, "dapo/avg_reward_std": 0.35807471639580196, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000066227384, "dapo/num_sampling_attempts": 2.25, "dapo/sampling_efficiency": 51.041666666666664, "dapo/total_prompts_processed": 13.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14857142857142858, "grad_norm": 0.1599435657262802, "kl": 0.0216827392578125, "learning_rate": 4.034943304942796e-07, "loss": 0.0443, "reward": 0.5955070666968822, "reward_std": 0.9924386888742447, "step": 130 }, { "clip_fraction": 0.0, "completion_length": 2958.5347595214844, "dapo/avg_reward_std": 0.18185590389298228, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.23170731998071437, "dapo/num_sampling_attempts": 5.125, "dapo/sampling_efficiency": 24.945436507936506, "dapo/total_prompts_processed": 30.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.14971428571428572, "grad_norm": 0.21188445389270782, "kl": 0.02074432373046875, "learning_rate": 3.9609093550344907e-07, "loss": 0.0628, "reward": 0.8608505353331566, "reward_std": 0.9059992283582687, "step": 131 }, { "clip_fraction": 0.0, "completion_length": 3019.888931274414, "dapo/avg_reward_std": 0.3038036392794715, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.36419753785486575, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 38.33333333333333, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15085714285714286, "grad_norm": 0.19752100110054016, "kl": 0.024078369140625, "learning_rate": 3.8873442270461485e-07, "loss": 0.0698, "reward": 0.7191393785178661, "reward_std": 0.9548436179757118, "step": 132 }, { "clip_fraction": 0.0, "completion_length": 3251.6909790039062, "dapo/avg_reward_std": 0.17617152915114448, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22222222494227545, "dapo/num_sampling_attempts": 5.25, "dapo/sampling_efficiency": 31.369047619047613, "dapo/total_prompts_processed": 31.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.152, "grad_norm": 0.1220565065741539, "kl": 0.01824951171875, "learning_rate": 3.8142703296283953e-07, "loss": 0.0249, "reward": 0.3546891317819245, "reward_std": 0.9377138167619705, "step": 133 }, { "clip_fraction": 0.0, "completion_length": 3146.545196533203, "dapo/avg_reward_std": 0.2565364229679108, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.32666667103767394, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 47.08333333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15314285714285714, "grad_norm": 0.15810362994670868, "kl": 0.03081512451171875, "learning_rate": 3.7417099217982686e-07, "loss": 0.0306, "reward": 0.5206232005730271, "reward_std": 0.9619846642017365, "step": 134 }, { "clip_fraction": 0.0, "completion_length": 3085.5972900390625, "dapo/avg_reward_std": 0.30491976333515985, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.40476191469601225, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 31.666666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15428571428571428, "grad_norm": 0.2133372277021408, "kl": 0.0204620361328125, "learning_rate": 3.6696851061588994e-07, "loss": 0.0681, "reward": 0.7713347226381302, "reward_std": 0.9403144493699074, "step": 135 }, { "clip_fraction": 0.0, "completion_length": 3326.295196533203, "dapo/avg_reward_std": 0.22884555886953306, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24358974817471626, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 25.868055555555557, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15542857142857142, "grad_norm": 0.18792302906513214, "kl": 0.029754638671875, "learning_rate": 3.5982178221668533e-07, "loss": 0.0468, "reward": 0.5651950668543577, "reward_std": 0.9934203922748566, "step": 136 }, { "clip_fraction": 0.0, "completion_length": 3265.2882080078125, "dapo/avg_reward_std": 0.304972759137551, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.43055556155741215, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 54.375, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15657142857142858, "grad_norm": 0.13081717491149902, "kl": 0.0223846435546875, "learning_rate": 3.5273298394491515e-07, "loss": 0.0443, "reward": 0.5535581167787313, "reward_std": 0.9467164501547813, "step": 137 }, { "clip_fraction": 0.0, "completion_length": 2895.8646545410156, "dapo/avg_reward_std": 0.2690910736719767, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333387970924, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 32.82738095238095, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15771428571428572, "grad_norm": 0.18165208399295807, "kl": 0.032073974609375, "learning_rate": 3.45704275117204e-07, "loss": 0.0288, "reward": 0.5253790076822042, "reward_std": 0.9247673749923706, "step": 138 }, { "clip_fraction": 0.0, "completion_length": 3049.8507080078125, "dapo/avg_reward_std": 0.2440622321196965, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33928572067192625, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 40.11904761904761, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.15885714285714286, "grad_norm": 0.19676071405410767, "kl": 0.03052520751953125, "learning_rate": 3.387377967463493e-07, "loss": 0.0477, "reward": 0.6778539270162582, "reward_std": 0.9344745948910713, "step": 139 }, { "clip_fraction": 0.0, "completion_length": 3029.0486450195312, "dapo/avg_reward_std": 0.3111469969153404, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4916666768491268, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 41.666666666666664, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16, "grad_norm": 0.18594416975975037, "kl": 0.0277557373046875, "learning_rate": 3.3183567088914833e-07, "loss": 0.0431, "reward": 0.5210836753249168, "reward_std": 0.9851464107632637, "step": 140 }, { "clip_fraction": 0.0, "completion_length": 3151.5486755371094, "dapo/avg_reward_std": 0.23511080997330802, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3095238127878734, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 26.18055555555555, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16114285714285714, "grad_norm": 0.17807213962078094, "kl": 0.0266265869140625, "learning_rate": 3.250000000000001e-07, "loss": 0.0498, "reward": 0.5591800361871719, "reward_std": 0.9730060175061226, "step": 141 }, { "clip_fraction": 0.0, "completion_length": 2963.59033203125, "dapo/avg_reward_std": 0.19928012508898973, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2812500069849193, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.02083333333333, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16228571428571428, "grad_norm": 0.24388359487056732, "kl": 0.0318603515625, "learning_rate": 3.182328662904756e-07, "loss": 0.0567, "reward": 0.7148469444364309, "reward_std": 0.9495278596878052, "step": 142 }, { "clip_fraction": 0.0, "completion_length": 3157.791717529297, "dapo/avg_reward_std": 0.23966079843895777, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3214285767504147, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 39.166666666666664, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16342857142857142, "grad_norm": 0.20528583228588104, "kl": 0.041290283203125, "learning_rate": 3.115363310950578e-07, "loss": 0.0443, "reward": 0.5249591246247292, "reward_std": 0.9509934857487679, "step": 143 }, { "clip_fraction": 0.0, "completion_length": 3030.187530517578, "dapo/avg_reward_std": 0.30880050485332805, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4375000099341075, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 41.04166666666666, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16457142857142856, "grad_norm": 0.15082307159900665, "kl": 0.02729034423828125, "learning_rate": 3.0491243424323783e-07, "loss": 0.0511, "reward": 0.5894143544137478, "reward_std": 0.954010546207428, "step": 144 }, { "clip_fraction": 0.0, "completion_length": 2973.3993225097656, "dapo/avg_reward_std": 0.32683228328824043, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4236111181477706, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 48.66071428571428, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1657142857142857, "grad_norm": 0.2588576078414917, "kl": 0.038238525390625, "learning_rate": 2.9836319343816397e-07, "loss": 0.0611, "reward": 0.6702784113585949, "reward_std": 0.9678368121385574, "step": 145 }, { "clip_fraction": 0.0, "completion_length": 3289.8368530273438, "dapo/avg_reward_std": 0.29686578666722335, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.34567901823255753, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 51.57738095238095, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16685714285714287, "grad_norm": 0.2035798877477646, "kl": 0.0394744873046875, "learning_rate": 2.918906036420294e-07, "loss": 0.0576, "reward": 0.4602743685245514, "reward_std": 0.9194413796067238, "step": 146 }, { "clip_fraction": 0.0, "completion_length": 3068.7604064941406, "dapo/avg_reward_std": 0.27814541943371296, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3437500069849193, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 36.666666666666664, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.168, "grad_norm": 0.22469140589237213, "kl": 0.030426025390625, "learning_rate": 2.854966364683872e-07, "loss": 0.0696, "reward": 0.6243265215307474, "reward_std": 0.9174878597259521, "step": 147 }, { "clip_fraction": 0.0, "completion_length": 3041.357635498047, "dapo/avg_reward_std": 0.2907161459326744, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.458333346247673, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 57.70833333333333, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.16914285714285715, "grad_norm": 0.3123789429664612, "kl": 0.0328521728515625, "learning_rate": 2.791832395815782e-07, "loss": 0.0819, "reward": 0.8250775411725044, "reward_std": 0.9233218431472778, "step": 148 }, { "clip_fraction": 0.0, "completion_length": 2433.0694732666016, "dapo/avg_reward_std": 0.22243764168686336, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2777777839865949, "dapo/num_sampling_attempts": 4.5, "dapo/sampling_efficiency": 35.75892857142857, "dapo/total_prompts_processed": 27.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1702857142857143, "grad_norm": 0.2827485203742981, "kl": 0.0386505126953125, "learning_rate": 2.729523361034538e-07, "loss": 0.0784, "reward": 0.6995697831735015, "reward_std": 0.9434132054448128, "step": 149 }, { "clip_fraction": 0.0, "completion_length": 3096.59033203125, "dapo/avg_reward_std": 0.347408726811409, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.541666672565043, "dapo/num_sampling_attempts": 2.0, "dapo/sampling_efficiency": 63.541666666666664, "dapo/total_prompts_processed": 12.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17142857142857143, "grad_norm": 0.30529579520225525, "kl": 0.03045654296875, "learning_rate": 2.6680582402757324e-07, "loss": 0.0868, "reward": 0.7112221932038665, "reward_std": 0.9602288007736206, "step": 150 }, { "clip_fraction": 0.0, "completion_length": 3184.611083984375, "dapo/avg_reward_std": 0.1674806038115887, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.20212766528129578, "dapo/num_sampling_attempts": 5.875, "dapo/sampling_efficiency": 23.749999999999996, "dapo/total_prompts_processed": 35.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17257142857142857, "grad_norm": 0.19142813980579376, "kl": 0.037353515625, "learning_rate": 2.6074557564105724e-07, "loss": 0.045, "reward": 0.41017685225233436, "reward_std": 0.9152907580137253, "step": 151 }, { "clip_fraction": 0.0, "completion_length": 3437.3541564941406, "dapo/avg_reward_std": 0.208841644014631, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2571428622518267, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 40.416666666666664, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1737142857142857, "grad_norm": 0.15321692824363708, "kl": 0.03997802734375, "learning_rate": 2.547734369542718e-07, "loss": 0.0346, "reward": 0.34562894329428673, "reward_std": 0.856454074382782, "step": 152 }, { "clip_fraction": 0.0, "completion_length": 3008.1285095214844, "dapo/avg_reward_std": 0.3009934023022652, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5000000096857548, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 43.75, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17485714285714285, "grad_norm": 0.20332548022270203, "kl": 0.0509033203125, "learning_rate": 2.488912271385139e-07, "loss": 0.0536, "reward": 0.7641689777374268, "reward_std": 0.95648343116045, "step": 153 }, { "clip_fraction": 0.0, "completion_length": 3165.52783203125, "dapo/avg_reward_std": 0.2268627045246271, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.35256410905948055, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 40.625, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.176, "grad_norm": 0.2415708601474762, "kl": 0.032623291015625, "learning_rate": 2.4310073797187573e-07, "loss": 0.0658, "reward": 0.6375892572104931, "reward_std": 0.9544621706008911, "step": 154 }, { "clip_fraction": 0.0, "completion_length": 3226.4652709960938, "dapo/avg_reward_std": 0.2563069482644399, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.38333334078391396, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 31.249999999999996, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17714285714285713, "grad_norm": 0.2137623131275177, "kl": 0.0427093505859375, "learning_rate": 2.374037332934512e-07, "loss": 0.0533, "reward": 0.537381574511528, "reward_std": 0.9281218275427818, "step": 155 }, { "clip_fraction": 0.0, "completion_length": 2680.3090209960938, "dapo/avg_reward_std": 0.22888225678241614, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3181818226973216, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 31.29960317460317, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1782857142857143, "grad_norm": 0.3409210443496704, "kl": 0.03851318359375, "learning_rate": 2.3180194846605364e-07, "loss": 0.0962, "reward": 0.8820424377918243, "reward_std": 0.9246840327978134, "step": 156 }, { "clip_fraction": 0.0, "completion_length": 3045.3299255371094, "dapo/avg_reward_std": 0.2491180575810946, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3653846222620744, "dapo/num_sampling_attempts": 3.25, "dapo/sampling_efficiency": 45.83333333333332, "dapo/total_prompts_processed": 19.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.17942857142857144, "grad_norm": 0.23701035976409912, "kl": 0.0436248779296875, "learning_rate": 2.2629708984760706e-07, "loss": 0.0414, "reward": 0.6551959328353405, "reward_std": 0.9744707196950912, "step": 157 }, { "clip_fraction": 0.0, "completion_length": 2918.892364501953, "dapo/avg_reward_std": 0.22537656256130764, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333333730697634, "dapo/num_sampling_attempts": 4.375, "dapo/sampling_efficiency": 39.93055555555556, "dapo/total_prompts_processed": 26.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18057142857142858, "grad_norm": 0.3551786541938782, "kl": 0.0572357177734375, "learning_rate": 2.2089083427137329e-07, "loss": 0.0732, "reward": 0.5248121619224548, "reward_std": 0.9334831684827805, "step": 158 }, { "clip_fraction": 0.0, "completion_length": 2874.0729446411133, "dapo/avg_reward_std": 0.18832522351294756, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2812500046566129, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.69047619047618, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18171428571428572, "grad_norm": 0.25500980019569397, "kl": 0.03741455078125, "learning_rate": 2.1558482853517253e-07, "loss": 0.0537, "reward": 0.7963100634515285, "reward_std": 0.987776905298233, "step": 159 }, { "clip_fraction": 0.0, "completion_length": 2940.701385498047, "dapo/avg_reward_std": 0.16297742784023284, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.20000000536441803, "dapo/num_sampling_attempts": 6.25, "dapo/sampling_efficiency": 18.368055555555557, "dapo/total_prompts_processed": 37.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18285714285714286, "grad_norm": 0.2898014187812805, "kl": 0.058013916015625, "learning_rate": 2.1038068889975259e-07, "loss": 0.037, "reward": 0.5323189618065953, "reward_std": 0.9483579620718956, "step": 160 }, { "clip_fraction": 0.0, "completion_length": 3090.7882385253906, "dapo/avg_reward_std": 0.3046227526664734, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3733333414793015, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 43.45238095238095, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.184, "grad_norm": 0.28573325276374817, "kl": 0.040771484375, "learning_rate": 2.0528000059645995e-07, "loss": 0.0511, "reward": 0.6970310118049383, "reward_std": 0.9432796016335487, "step": 161 }, { "clip_fraction": 0.0, "completion_length": 3205.4270629882812, "dapo/avg_reward_std": 0.36972329020500183, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5438596621940011, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 55.625, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18514285714285714, "grad_norm": 0.390523225069046, "kl": 0.052459716796875, "learning_rate": 2.0028431734436308e-07, "loss": 0.0818, "reward": 0.6346883065998554, "reward_std": 0.9713371768593788, "step": 162 }, { "clip_fraction": 0.0, "completion_length": 3082.107635498047, "dapo/avg_reward_std": 0.2315557522158469, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3440860264724301, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 44.513888888888886, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18628571428571428, "grad_norm": 0.31898149847984314, "kl": 0.05328369140625, "learning_rate": 1.9539516087697517e-07, "loss": 0.0722, "reward": 0.6942785531282425, "reward_std": 0.9776681512594223, "step": 163 }, { "clip_fraction": 0.0, "completion_length": 3027.0243530273438, "dapo/avg_reward_std": 0.15836979811255997, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.22972973214613424, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 41.69642857142857, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18742857142857142, "grad_norm": 0.2931766211986542, "kl": 0.033111572265625, "learning_rate": 1.9061402047871833e-07, "loss": 0.0754, "reward": 0.944303285330534, "reward_std": 0.9451126903295517, "step": 164 }, { "clip_fraction": 0.0, "completion_length": 2894.260482788086, "dapo/avg_reward_std": 0.224585828371346, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2916666716337204, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 37.5, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18857142857142858, "grad_norm": 0.24178634583950043, "kl": 0.0533447265625, "learning_rate": 1.8594235253127372e-07, "loss": 0.0505, "reward": 0.6519163623452187, "reward_std": 0.9615699052810669, "step": 165 }, { "clip_fraction": 0.0, "completion_length": 3002.7882385253906, "dapo/avg_reward_std": 0.29886600477942105, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3160919598464308, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 35.416666666666664, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.18971428571428572, "grad_norm": 0.31221655011177063, "kl": 0.047943115234375, "learning_rate": 1.8138158006995363e-07, "loss": 0.066, "reward": 0.6383479349315166, "reward_std": 0.9029820337891579, "step": 166 }, { "clip_fraction": 0.0, "completion_length": 2927.295150756836, "dapo/avg_reward_std": 0.34752671499001353, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5438596621940011, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 48.95833333333333, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19085714285714286, "grad_norm": 0.2697528600692749, "kl": 0.045745849609375, "learning_rate": 1.7693309235023127e-07, "loss": 0.0483, "reward": 0.8266985702211969, "reward_std": 0.9544429406523705, "step": 167 }, { "clip_fraction": 0.0, "completion_length": 3212.857666015625, "dapo/avg_reward_std": 0.263968757220677, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3690476247242519, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 41.388888888888886, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.192, "grad_norm": 0.27940821647644043, "kl": 0.05059814453125, "learning_rate": 1.7259824442455923e-07, "loss": 0.0415, "reward": 0.7715255841612816, "reward_std": 0.95072440803051, "step": 168 }, { "clip_fraction": 0.0, "completion_length": 3112.5799255371094, "dapo/avg_reward_std": 0.22730760558231458, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3153153222960395, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 26.249999999999996, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19314285714285714, "grad_norm": 0.4339730143547058, "kl": 0.06396484375, "learning_rate": 1.6837835672960831e-07, "loss": 0.0777, "reward": 0.5262689627707005, "reward_std": 0.9779800549149513, "step": 169 }, { "clip_fraction": 0.0, "completion_length": 3088.6632385253906, "dapo/avg_reward_std": 0.2333034286275506, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333333721384406, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 38.263888888888886, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19428571428571428, "grad_norm": 0.48384836316108704, "kl": 0.0555419921875, "learning_rate": 1.6427471468404952e-07, "loss": 0.0974, "reward": 0.7407102398574352, "reward_std": 0.9568767622113228, "step": 170 }, { "clip_fraction": 0.0, "completion_length": 3099.347198486328, "dapo/avg_reward_std": 0.17301563743282766, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.27941177127992406, "dapo/num_sampling_attempts": 4.25, "dapo/sampling_efficiency": 31.874999999999996, "dapo/total_prompts_processed": 25.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19542857142857142, "grad_norm": 0.42263394594192505, "kl": 0.0595703125, "learning_rate": 1.6028856829700258e-07, "loss": 0.0812, "reward": 0.4282900430262089, "reward_std": 0.914498083293438, "step": 171 }, { "clip_fraction": 0.0, "completion_length": 3111.232696533203, "dapo/avg_reward_std": 0.2433939976617694, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333334093913436, "dapo/num_sampling_attempts": 4.0, "dapo/sampling_efficiency": 36.80555555555555, "dapo/total_prompts_processed": 24.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19657142857142856, "grad_norm": 0.4814501404762268, "kl": 0.05926513671875, "learning_rate": 1.5642113178727193e-07, "loss": 0.0843, "reward": 0.6843680012971163, "reward_std": 0.8743765726685524, "step": 172 }, { "clip_fraction": 0.0, "completion_length": 3008.6563110351562, "dapo/avg_reward_std": 0.25363275137814606, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.31818182811592566, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 38.78472222222222, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.1977142857142857, "grad_norm": 0.285697877407074, "kl": 0.05755615234375, "learning_rate": 1.5267358321348285e-07, "loss": 0.0456, "reward": 0.5798944532871246, "reward_std": 0.984041191637516, "step": 173 }, { "clip_fraction": 0.0, "completion_length": 3067.9791870117188, "dapo/avg_reward_std": 0.3438388824462891, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4500000044703484, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 48.33333333333333, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.19885714285714284, "grad_norm": 0.43520498275756836, "kl": 0.07098388671875, "learning_rate": 1.4904706411523448e-07, "loss": 0.0716, "reward": 0.5646946905180812, "reward_std": 0.9460153579711914, "step": 174 }, { "clip_fraction": 0.0, "completion_length": 3223.2916870117188, "dapo/avg_reward_std": 0.2690600073337555, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4133333420753479, "dapo/num_sampling_attempts": 3.125, "dapo/sampling_efficiency": 41.45833333333333, "dapo/total_prompts_processed": 18.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2, "grad_norm": 0.35144945979118347, "kl": 0.06170654296875, "learning_rate": 1.4554267916537495e-07, "loss": 0.0348, "reward": 0.556399748660624, "reward_std": 0.9192204177379608, "step": 175 }, { "clip_fraction": 0.0, "completion_length": 2946.0799102783203, "dapo/avg_reward_std": 0.25316954652468365, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37500000558793545, "dapo/num_sampling_attempts": 3.0, "dapo/sampling_efficiency": 43.75, "dapo/total_prompts_processed": 18.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20114285714285715, "grad_norm": 0.46807849407196045, "kl": 0.063018798828125, "learning_rate": 1.4216149583350755e-07, "loss": 0.0796, "reward": 0.6736351866275072, "reward_std": 0.9649264737963676, "step": 176 }, { "clip_fraction": 0.0, "completion_length": 3096.829864501953, "dapo/avg_reward_std": 0.31567848042437907, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.482456142965116, "dapo/num_sampling_attempts": 2.375, "dapo/sampling_efficiency": 55.625, "dapo/total_prompts_processed": 14.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2022857142857143, "grad_norm": 0.31731271743774414, "kl": 0.055938720703125, "learning_rate": 1.3890454406082956e-07, "loss": 0.0386, "reward": 0.681073285639286, "reward_std": 0.9661536440253258, "step": 177 }, { "clip_fraction": 0.0, "completion_length": 3235.8056030273438, "dapo/avg_reward_std": 0.24198689542967697, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3448275898037286, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 47.08333333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20342857142857143, "grad_norm": 0.4640950560569763, "kl": 0.072052001953125, "learning_rate": 1.3577281594640182e-07, "loss": 0.0702, "reward": 0.5520291309803724, "reward_std": 0.9967257082462311, "step": 178 }, { "clip_fraction": 0.0, "completion_length": 3237.77783203125, "dapo/avg_reward_std": 0.30828417566689575, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4242424341765317, "dapo/num_sampling_attempts": 2.75, "dapo/sampling_efficiency": 52.82738095238095, "dapo/total_prompts_processed": 16.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20457142857142857, "grad_norm": 0.4502318203449249, "kl": 0.07550048828125, "learning_rate": 1.3276726544494571e-07, "loss": 0.0614, "reward": 0.6213867999613285, "reward_std": 0.9431608989834785, "step": 179 }, { "clip_fraction": 0.0, "completion_length": 2887.9236450195312, "dapo/avg_reward_std": 0.2488611958645008, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3518518612340645, "dapo/num_sampling_attempts": 3.375, "dapo/sampling_efficiency": 48.035714285714285, "dapo/total_prompts_processed": 20.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2057142857142857, "grad_norm": 0.44646504521369934, "kl": 0.073760986328125, "learning_rate": 1.2988880807625927e-07, "loss": 0.0683, "reward": 0.5839751102030277, "reward_std": 0.9090578481554985, "step": 180 }, { "clip_fraction": 0.0, "completion_length": 3021.2916870117188, "dapo/avg_reward_std": 0.20883248069069602, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2878787942004926, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 39.632936507936506, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20685714285714285, "grad_norm": 0.36042678356170654, "kl": 0.07421875, "learning_rate": 1.2713832064634125e-07, "loss": 0.054, "reward": 0.5517729418352246, "reward_std": 0.9483400657773018, "step": 181 }, { "clip_fraction": 0.0, "completion_length": 3249.2118530273438, "dapo/avg_reward_std": 0.2615335573043142, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.33333333847778185, "dapo/num_sampling_attempts": 3.5, "dapo/sampling_efficiency": 46.785714285714285, "dapo/total_prompts_processed": 21.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.208, "grad_norm": 0.4518042504787445, "kl": 0.072021484375, "learning_rate": 1.2451664098030743e-07, "loss": 0.0654, "reward": 0.686168298125267, "reward_std": 0.9350233674049377, "step": 182 }, { "clip_fraction": 0.0, "completion_length": 3221.6631774902344, "dapo/avg_reward_std": 0.27866364789731574, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3686868738044392, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 28.4375, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.20914285714285713, "grad_norm": 0.32408109307289124, "kl": 0.062255859375, "learning_rate": 1.220245676671809e-07, "loss": 0.0384, "reward": 0.6384344138205051, "reward_std": 0.9783304929733276, "step": 183 }, { "clip_fraction": 0.0, "completion_length": 3199.1354370117188, "dapo/avg_reward_std": 0.2816663732131322, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.316666671137015, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 45.55555555555555, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.2102857142857143, "grad_norm": 0.2197091430425644, "kl": 0.07550048828125, "learning_rate": 1.1966285981663407e-07, "loss": 0.0211, "reward": 0.45471471454948187, "reward_std": 0.9136239141225815, "step": 184 }, { "clip_fraction": 0.0, "completion_length": 3037.420166015625, "dapo/avg_reward_std": 0.17516983683044846, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.2657657728807346, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 25.729166666666664, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21142857142857144, "grad_norm": 0.4012245535850525, "kl": 0.091796875, "learning_rate": 1.1743223682775649e-07, "loss": 0.0442, "reward": 0.7168623730540276, "reward_std": 0.9515729621052742, "step": 185 }, { "clip_fraction": 0.0, "completion_length": 3222.767364501953, "dapo/avg_reward_std": 0.2550514280796051, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3444444512327512, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 43.64583333333333, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21257142857142858, "grad_norm": 0.4945845305919647, "kl": 0.083465576171875, "learning_rate": 1.1533337816991931e-07, "loss": 0.0667, "reward": 0.5391142014414072, "reward_std": 0.9342528805136681, "step": 186 }, { "clip_fraction": 0.0, "completion_length": 2858.5659942626953, "dapo/avg_reward_std": 0.23423856112264818, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3118279609949358, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 40.0297619047619, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21371428571428572, "grad_norm": 0.4291866421699524, "kl": 0.091796875, "learning_rate": 1.1336692317580158e-07, "loss": 0.0384, "reward": 0.7481220848858356, "reward_std": 0.9474795907735825, "step": 187 }, { "clip_fraction": 0.0, "completion_length": 3123.170166015625, "dapo/avg_reward_std": 0.1988734739857751, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.25675675997862946, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 31.875, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21485714285714286, "grad_norm": 0.30453264713287354, "kl": 0.080657958984375, "learning_rate": 1.1153347084664419e-07, "loss": 0.0273, "reward": 0.6236942922696471, "reward_std": 0.9715093299746513, "step": 188 }, { "clip_fraction": 0.0, "completion_length": 2872.9618530273438, "dapo/avg_reward_std": 0.21385114904372923, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333365378841, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 39.18154761904762, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.216, "grad_norm": 0.5780288577079773, "kl": 0.08612060546875, "learning_rate": 1.0983357966978745e-07, "loss": 0.0607, "reward": 0.7514887787401676, "reward_std": 1.0098591819405556, "step": 189 }, { "clip_fraction": 0.0, "completion_length": 2937.093780517578, "dapo/avg_reward_std": 0.1677520631575117, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.21895425284610076, "dapo/num_sampling_attempts": 6.375, "dapo/sampling_efficiency": 20.689484126984123, "dapo/total_prompts_processed": 38.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21714285714285714, "grad_norm": 0.3947860896587372, "kl": 0.076263427734375, "learning_rate": 1.0826776744855121e-07, "loss": 0.0487, "reward": 0.6180934552103281, "reward_std": 0.9050487726926804, "step": 190 }, { "clip_fraction": 0.0, "completion_length": 3252.090301513672, "dapo/avg_reward_std": 0.24265852073828378, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3055555621782939, "dapo/num_sampling_attempts": 3.75, "dapo/sampling_efficiency": 38.020833333333336, "dapo/total_prompts_processed": 22.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21828571428571428, "grad_norm": 0.48333072662353516, "kl": 0.09661865234375, "learning_rate": 1.068365111445064e-07, "loss": 0.0584, "reward": 0.4759152363985777, "reward_std": 0.9479196071624756, "step": 191 }, { "clip_fraction": 0.0, "completion_length": 3074.420166015625, "dapo/avg_reward_std": 0.2189681170315578, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.3333333371014431, "dapo/num_sampling_attempts": 3.625, "dapo/sampling_efficiency": 46.45833333333333, "dapo/total_prompts_processed": 21.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.21942857142857142, "grad_norm": 0.5536202192306519, "kl": 0.09814453125, "learning_rate": 1.0554024673218806e-07, "loss": 0.0731, "reward": 0.48804986744653434, "reward_std": 0.9367131069302559, "step": 192 }, { "clip_fraction": 0.0, "completion_length": 3026.9097595214844, "dapo/avg_reward_std": 0.21337791310774312, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.256756762395034, "dapo/num_sampling_attempts": 4.625, "dapo/sampling_efficiency": 30.3125, "dapo/total_prompts_processed": 27.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22057142857142858, "grad_norm": 0.5239105224609375, "kl": 0.0985107421875, "learning_rate": 1.0437936906629334e-07, "loss": 0.0561, "reward": 0.45341441221535206, "reward_std": 0.8912393003702164, "step": 193 }, { "clip_fraction": 0.0, "completion_length": 2896.656280517578, "dapo/avg_reward_std": 0.31374274492263793, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.4333333417773247, "dapo/num_sampling_attempts": 2.5, "dapo/sampling_efficiency": 46.875, "dapo/total_prompts_processed": 15.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22171428571428572, "grad_norm": 0.6310634016990662, "kl": 0.108062744140625, "learning_rate": 1.0335423176140511e-07, "loss": 0.0809, "reward": 0.6844924800097942, "reward_std": 0.9649646729230881, "step": 194 }, { "clip_fraction": 0.0, "completion_length": 3319.7048950195312, "dapo/avg_reward_std": 0.21983732057340216, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.30303031251286017, "dapo/num_sampling_attempts": 4.125, "dapo/sampling_efficiency": 29.479166666666664, "dapo/total_prompts_processed": 24.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22285714285714286, "grad_norm": 0.47936248779296875, "kl": 0.0997314453125, "learning_rate": 1.0246514708427701e-07, "loss": 0.0479, "reward": 0.3993752491660416, "reward_std": 0.9481607303023338, "step": 195 }, { "clip_fraction": 0.0, "completion_length": 3298.1736450195312, "dapo/avg_reward_std": 0.2514548934996128, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.28333333916962145, "dapo/num_sampling_attempts": 5.0, "dapo/sampling_efficiency": 33.13988095238095, "dapo/total_prompts_processed": 30.0, "dapo/valid_prompts_collected": 6.0, "epoch": 0.224, "grad_norm": 0.36350947618484497, "kl": 0.1043701171875, "learning_rate": 1.017123858587145e-07, "loss": 0.0389, "reward": 0.31427645590156317, "reward_std": 0.8980218172073364, "step": 196 }, { "clip_fraction": 0.0, "completion_length": 3260.4861450195312, "dapo/avg_reward_std": 0.1836753969009106, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.24786325486806723, "dapo/num_sampling_attempts": 4.875, "dapo/sampling_efficiency": 28.154761904761905, "dapo/total_prompts_processed": 29.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22514285714285714, "grad_norm": 0.3354601562023163, "kl": 0.0946044921875, "learning_rate": 1.0109617738307911e-07, "loss": 0.0301, "reward": 0.5015182960778475, "reward_std": 0.9334053322672844, "step": 197 }, { "clip_fraction": 0.0, "completion_length": 3031.3958129882812, "dapo/avg_reward_std": 0.3008538554696476, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.5196078463512308, "dapo/num_sampling_attempts": 2.125, "dapo/sampling_efficiency": 76.5625, "dapo/total_prompts_processed": 12.75, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22628571428571428, "grad_norm": 0.48223650455474854, "kl": 0.10247802734375, "learning_rate": 1.0061670936044178e-07, "loss": 0.0648, "reward": 0.573589576408267, "reward_std": 0.9578919112682343, "step": 198 }, { "clip_fraction": 0.0, "completion_length": 2948.3854064941406, "dapo/avg_reward_std": 0.43072181940078735, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.8333333373069763, "dapo/num_sampling_attempts": 1.25, "dapo/sampling_efficiency": 87.5, "dapo/total_prompts_processed": 7.5, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22742857142857142, "grad_norm": 0.6141620874404907, "kl": 0.09808349609375, "learning_rate": 1.002741278414069e-07, "loss": 0.0827, "reward": 0.7053878791630268, "reward_std": 0.9694960787892342, "step": 199 }, { "clip_fraction": 0.0, "completion_length": 2714.482666015625, "dapo/avg_reward_std": 0.26207208441149804, "dapo/filter_reward_index": 0.0, "dapo/kept_prompts_ratio": 0.37096774914572317, "dapo/num_sampling_attempts": 3.875, "dapo/sampling_efficiency": 30.624999999999993, "dapo/total_prompts_processed": 23.25, "dapo/valid_prompts_collected": 6.0, "epoch": 0.22857142857142856, "grad_norm": 0.2072688341140747, "kl": 0.1064453125, "learning_rate": 1.0006853717962393e-07, "loss": 0.0122, "reward": 0.5771910101175308, "reward_std": 0.9156405553221703, "step": 200 }, { "epoch": 0.22857142857142856, "step": 200, "total_flos": 0.0, "train_loss": 0.02940896774176508, "train_runtime": 83918.4654, "train_samples_per_second": 0.114, "train_steps_per_second": 0.002 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }