| exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,5,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_5--actor--huggingface_vllm_temp_1.0.eval_results.json,53.7,45.9 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,10,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,53.4,44.7 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,15,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_15--actor--huggingface_vllm_temp_1.0.eval_results.json,55.3,45.8 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,20,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,54.1,45.0 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,25,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_25--actor--huggingface_vllm_temp_1.0.eval_results.json,55.8,46.6 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,30,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.5 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,35,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_35--actor--huggingface_vllm_temp_1.0.eval_results.json,55.3,45.9 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,40,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,56.3,46.8 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,45,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_45--actor--huggingface_vllm_temp_1.0.eval_results.json,57.6,48.1 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,50,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,55.9,47.2 | |