| exp_name,global_step,model_name,json_relpath,prompt_level_strict_acc,prompt_level_strict_acc_stderr,inst_level_strict_acc,inst_level_strict_acc_stderr,prompt_level_loose_acc,prompt_level_loose_acc_stderr,inst_level_loose_acc,inst_level_loose_acc_stderr,gpqa_pass@1:1_samples,gpqa_pass@1:1_samples_stderr | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,0,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_0/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/results_2025-08-22T21-59-53.252655.json,0.1645101663585952,0.01595401792671804,0.29856115107913667,0.00043588321487573415,0.17744916820702403,0.016440744379699793,0.31894484412470026,0.00045164438595463155,0.25252525252525254,0.030954055470365897 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,0,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_0/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/results_2025-08-24T06-10-51.524843.json,0.21072088724584104,0.017549801883664246,0.328537170263789,0.000503330427109403,0.2365988909426987,0.01828882758262565,0.35731414868105515,0.0005103196928875978,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,10,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_10/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/results_2025-08-22T21-51-40.106242.json,0.1977818853974122,0.017141254719084918,0.32014388489208634,0.0004811224139885245,0.2088724584103512,0.017493107347793312,0.3405275779376499,0.0004826159386477645,0.23232323232323232,0.030088629490217487 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,10,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_10/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/results_2025-08-24T06-01-33.728040.json,0.2033271719038817,0.017319718641834722,0.30455635491606714,0.0004891508609155334,0.22181146025878004,0.01787876540794438,0.3345323741007194,0.0004970114680774834,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,20,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_20/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/results_2025-08-22T21-50-15.704473.json,0.20147874306839186,0.017260802262371477,0.32973621103117506,0.00047828591594576884,0.22920517560073936,0.018087757424955338,0.3597122302158273,0.0005054167480426688,0.2676767676767677,0.03154449888270286 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,20,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_20/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/results_2025-08-24T05-55-03.541337.json,0.2033271719038817,0.017319718641834715,0.33093525179856115,0.0004970975763198274,0.22920517560073936,0.018087757424955338,0.35851318944844124,0.0005180255871960584,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,30,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_30/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface/results_2025-08-22T21-38-21.345863.json,0.19963031423290203,0.017201316264889934,0.3261390887290168,0.0004753111490466111,0.22920517560073936,0.018087757424955345,0.3609112709832134,0.00048533443925998963,0.24242424242424243,0.03053289223393202 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,30,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_30/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface/results_2025-08-24T05-44-37.776773.json,0.2088724584103512,0.017493107347793305,0.3333333333333333,0.0004959379902298786,0.22181146025878004,0.017878765407944384,0.34772182254196643,0.0004939708551979106,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,40,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_40/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface/results_2025-08-22T21-39-31.356295.json,0.2365988909426987,0.018288827582625646,0.35611510791366907,0.00048250671612914405,0.25693160813308685,0.018802962575636854,0.38369304556354916,0.0005038077287645109,0.26262626262626265,0.03135305009533086 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,40,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_40/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface/results_2025-08-24T05-54-37.431353.json,0.21256931608133087,0.017605954482106632,0.3345323741007194,0.0005075026511821061,0.24399260628465805,0.018482234430967866,0.3669064748201439,0.0005210201629222579,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,50,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_50/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface/results_2025-08-22T21-39-22.119596.json,0.2088724584103512,0.017493107347793305,0.34292565947242204,0.0004921466594951312,0.23105360443622922,0.01813875717052341,0.3657074340527578,0.0005053491766972938,0.2474747474747475,0.03074630074212451 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,50,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_50/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface/results_2025-08-24T05-51-56.876297.json,0.2513863216266174,0.018668216152240413,0.38968824940047964,0.0005126877872718119,0.2828096118299446,0.019380609595892766,0.42206235011990406,0.0005288278619212021,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,60,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_60_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_60/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_60_actor_huggingface/results_2025-08-22T21-37-55.750769.json,0.21256931608133087,0.01760595448210661,0.34772182254196643,0.0004922071621813319,0.25508317929759705,0.01875849195041414,0.3872901678657074,0.0005125725892120684,0.31313131313131315,0.033042050878136525 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,60,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_60_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_60/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_60_actor_huggingface/results_2025-08-24T05-41-15.558475.json,0.24029574861367836,0.01838647358148708,0.3597122302158273,0.0005157263887185362,0.26247689463955637,0.018933742876044622,0.38369304556354916,0.0005251815784520928,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,70,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_70/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface/results_2025-08-22T22-37-40.349268.json,0.24399260628465805,0.018482234430967866,0.36330935251798563,0.0005284860476845682,0.2846580406654344,0.019418769106486003,0.4040767386091127,0.0005432433187540159,0.3181818181818182,0.03318477333845331 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,70,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_70/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface/results_2025-08-24T05-44-33.516844.json,0.26247689463955637,0.018933742876044626,0.3800959232613909,0.0005167800893147942,0.29944547134935307,0.019709834029672916,0.4172661870503597,0.0005251551037494576,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,80,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_80_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_80/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_80_actor_huggingface/results_2025-08-22T22-42-52.264172.json,0.2476894639556377,0.018576139285185197,0.3752997601918465,0.0005230934509498333,0.2809611829944547,0.019342047683777005,0.4088729016786571,0.0005356056764533407,0.26262626262626265,0.031353050095330855 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,80,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_80_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_80/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_80_actor_huggingface/results_2025-08-24T06-43-29.911737.json,0.24953789279112754,0.018622404509805863,0.3800959232613909,0.0005510759898975793,0.29390018484288355,0.019603612015637092,0.42326139088729015,0.0005641190661908537,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,90,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_90_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_90/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_90_actor_huggingface/results_2025-08-22T22-35-57.252501.json,0.24214417744916822,0.018434587800223095,0.3920863309352518,0.0004963906750273186,0.2809611829944547,0.019342047683777005,0.42805755395683454,0.0005082678849371766,0.21717171717171718,0.029376616484945633 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,90,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_90_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_90/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_90_actor_huggingface/results_2025-08-24T06-44-12.807078.json,0.24399260628465805,0.018482234430967866,0.36930455635491605,0.0004881164791481993,0.27911275415896486,0.019303080958497216,0.4052757793764988,0.0004927891481844729,, | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,100,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_100/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/results_2025-08-22T22-32-53.893348.json,0.23475046210720887,0.018239288213433784,0.35611510791366907,0.00047752346118073573,0.27171903881700554,0.019143116099594026,0.39448441247002397,0.0004960211655333147,0.26262626262626265,0.03135305009533085 | |
| verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle,100,_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface,verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle/eval_results_ood/global_step_100/results/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/results_2025-08-24T06-36-23.774981.json,0.24214417744916822,0.018434587800223085,0.3800959232613909,0.00048439015286744333,0.28650646950092423,0.019456528583211702,0.42685851318944845,0.0005018331714116503,, | |