| exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,0,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_0--actor--huggingface_vllm_temp_1.0.eval_results.json,24.7,21.0 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,10,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,54.3,45.3 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,20,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,55.7,47.5 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,30,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,56.8,47.2 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,40,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,57.6,48.6 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,50,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,58.4,49.6 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,60,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_60--actor--huggingface_vllm_temp_1.0.eval_results.json,60.0,50.3 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,70,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_70--actor--huggingface_vllm_temp_1.0.eval_results.json,60.8,50.9 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,80,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_80--actor--huggingface_vllm_temp_1.0.eval_results.json,61.6,51.2 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,90,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_90--actor--huggingface_vllm_temp_1.0.eval_results.json,60.5,51.5 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,100,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_100--actor--huggingface_vllm_temp_1.0.eval_results.json,60.6,52.2 | |