bensondccnqwc's picture
Add files using upload-large-folder tool
d568d68 verified
exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,0,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_0--actor--huggingface_vllm_temp_1.0.eval_results.json,24.7,21.0
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,10,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,54.3,45.3
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,20,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,55.7,47.5
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,30,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,56.8,47.2
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,40,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,57.6,48.6
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,50,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,58.4,49.6
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,60,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_60--actor--huggingface_vllm_temp_1.0.eval_results.json,60.0,50.3
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,70,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_70--actor--huggingface_vllm_temp_1.0.eval_results.json,60.8,50.9
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,80,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_80--actor--huggingface_vllm_temp_1.0.eval_results.json,61.6,51.2
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,90,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_90--actor--huggingface_vllm_temp_1.0.eval_results.json,60.5,51.5
verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,100,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/evalplus_results/mbpp/home--work--minzijun_rl_output_2--checkpoints--verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle--global_step_100--actor--huggingface_vllm_temp_1.0.eval_results.json,60.6,52.2