| exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,5,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_5--actor--huggingface_vllm_temp_1.0.eval_results.json,54.4,45.6 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,10,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,55.6,47.0 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,15,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_15--actor--huggingface_vllm_temp_1.0.eval_results.json,55.0,46.1 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,20,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.2 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,25,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_25--actor--huggingface_vllm_temp_1.0.eval_results.json,57.1,48.2 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,30,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,57.1,48.3 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,35,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_35--actor--huggingface_vllm_temp_1.0.eval_results.json,54.6,46.5 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,40,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,57.9,49.3 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,45,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_45--actor--huggingface_vllm_temp_1.0.eval_results.json,56.7,48.3 | |
| verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,50,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.4 | |