bensondccnqwc's picture
Add files using upload-large-folder tool
11c074c verified
exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,5,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_5--actor--huggingface_vllm_temp_1.0.eval_results.json,54.4,45.6
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,10,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,55.6,47.0
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,15,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_15--actor--huggingface_vllm_temp_1.0.eval_results.json,55.0,46.1
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,20,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.2
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,25,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_25--actor--huggingface_vllm_temp_1.0.eval_results.json,57.1,48.2
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,30,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,57.1,48.3
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,35,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_35--actor--huggingface_vllm_temp_1.0.eval_results.json,54.6,46.5
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,40,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,57.9,49.3
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,45,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_45--actor--huggingface_vllm_temp_1.0.eval_results.json,56.7,48.3
verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15,50,verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15/evalplus_results/mbpp/home--work--compass_innovation--minzijun--checkpoints--verl_role_sft_dapo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.15--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,56.0,47.4