bensondccnqwc's picture
Add files using upload-large-folder tool
30ef604 verified
exp_name,global_step,json_relpath,mbpp_base_pass@1,mbpp_plus_pass@1
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,10,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_10--actor--huggingface_vllm_temp_1.0.eval_results.json,55.4,45.2
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,20,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_20--actor--huggingface_vllm_temp_1.0.eval_results.json,55.4,46.5
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,30,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_30--actor--huggingface_vllm_temp_1.0.eval_results.json,57.9,48.5
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,40,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_40--actor--huggingface_vllm_temp_1.0.eval_results.json,58.6,49.4
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,50,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_50--actor--huggingface_vllm_temp_1.0.eval_results.json,59.6,50.9
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,60,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_60--actor--huggingface_vllm_temp_1.0.eval_results.json,59.6,50.9
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,70,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_70--actor--huggingface_vllm_temp_1.0.eval_results.json,61.5,52.2
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,80,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_80--actor--huggingface_vllm_temp_1.0.eval_results.json,61.7,52.4
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,90,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_90--actor--huggingface_vllm_temp_1.0.eval_results.json,62.3,54.0
ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2,100,ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2/evalplus_results/mbpp/home--work--minzijun_rl_output--checkpoints--ppo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_v2--global_step_100--actor--huggingface_vllm_temp_1.0.eval_results.json,62.3,53.0