bensondccnqwc's picture
Add files using upload-large-folder tool
e10392c verified
exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,5,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_5--actor--huggingface_vllm_temp_1.0.eval_results.json,53.7,45.9
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,10,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,53.4,44.7
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,15,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_15--actor--huggingface_vllm_temp_1.0.eval_results.json,55.3,45.8
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,20,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,54.1,45.0
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,25,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_25--actor--huggingface_vllm_temp_1.0.eval_results.json,55.8,46.6
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,30,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.5
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,35,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_35--actor--huggingface_vllm_temp_1.0.eval_results.json,55.3,45.9
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,40,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,56.3,46.8
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,45,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_45--actor--huggingface_vllm_temp_1.0.eval_results.json,57.6,48.1
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo,50,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_align_dapo--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,55.9,47.2