Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +50 -0
- eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
- eval_results/plots/eval_results_acc_keywords.png +3 -0
- eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- eval_results/plots/eval_results_acc_tokens.png +3 -0
- eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_clip_ratio.png +3 -0
- eval_results/plots/eval_results_correct_tokens.png +3 -0
- eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_tokens_keywords.png +3 -0
- eval_results/plots/eval_results_wrong_tokens.png +3 -0
- eval_results_avg16/plots/eval_results_avg16_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg16/plots/eval_results_avg16_wrong_tokens.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_acc_keywords.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_acc_pass_acc.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_acc_tokens.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_avg_stop_tokens.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_box_ratio_and_token_length.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_clip_ratio.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_correct_tokens.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_tokens_keywords.png +3 -0
- eval_results_avg32/plots/eval_results_avg32_wrong_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
- eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_extended|ifeval|0_2025-08-22T21-59-53.252655.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_extended|ifeval|0_2025-08-22T21-59-53.252655.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_lighteval|gpqa:diamond|0_2025-08-22T21-59-53.252655.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-24T06-10-51.524843/details_extended|ifeval|0_2025-08-24T06-10-51.524843.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-24T06-10-51.524843/details_extended|ifeval|0_2025-08-24T06-10-51.524843.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_extended|ifeval|0_2025-08-22T21-51-40.106242.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_extended|ifeval|0_2025-08-22T21-51-40.106242.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_lighteval|gpqa:diamond|0_2025-08-22T21-51-40.106242.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-24T06-01-33.728040/details_extended|ifeval|0_2025-08-24T06-01-33.728040.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-24T06-01-33.728040/details_extended|ifeval|0_2025-08-24T06-01-33.728040.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-22T22-32-53.893348/details_extended|ifeval|0_2025-08-22T22-32-53.893348.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-22T22-32-53.893348/details_lighteval|gpqa:diamond|0_2025-08-22T22-32-53.893348.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-24T06-36-23.774981/details_extended|ifeval|0_2025-08-24T06-36-23.774981.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_extended|ifeval|0_2025-08-22T21-50-15.704473.csv +3 -0
- eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_extended|ifeval|0_2025-08-22T21-50-15.704473.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_lighteval|gpqa:diamond|0_2025-08-22T21-50-15.704473.parquet +3 -0
.gitattributes
CHANGED
|
@@ -43,3 +43,53 @@ global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -t
|
|
| 43 |
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_results_avg16/plots/eval_results_avg16_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_results_avg16/plots/eval_results_avg16_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_results_avg32/plots/eval_results_avg32_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_results_avg32/plots/eval_results_avg32_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_results_avg32/plots/eval_results_avg32_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_results_avg32/plots/eval_results_avg32_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_results_avg32/plots/eval_results_avg32_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
eval_results_avg32/plots/eval_results_avg32_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
eval_results_avg32/plots/eval_results_avg32_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
eval_results_avg32/plots/eval_results_avg32_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
eval_results_avg32/plots/eval_results_avg32_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
eval_results_avg32/plots/eval_results_avg32_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-24T06-10-51.524843/details_extended|ifeval|0_2025-08-24T06-10-51.524843.csv filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_extended|ifeval|0_2025-08-22T21-59-53.252655.csv filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_extended|ifeval|0_2025-08-22T21-51-40.106242.csv filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-24T06-01-33.728040/details_extended|ifeval|0_2025-08-24T06-01-33.728040.csv filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-24T05-55-03.541337/details_extended|ifeval|0_2025-08-24T05-55-03.541337.csv filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_extended|ifeval|0_2025-08-22T21-50-15.704473.csv filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface/2025-08-24T05-44-37.776773/details_extended|ifeval|0_2025-08-24T05-44-37.776773.csv filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
eval_results_ood/global_step_30/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_30_actor_huggingface/2025-08-22T21-38-21.345863/details_extended|ifeval|0_2025-08-22T21-38-21.345863.csv filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface/2025-08-22T21-39-31.356295/details_extended|ifeval|0_2025-08-22T21-39-31.356295.csv filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface/2025-08-22T21-39-22.119596/details_extended|ifeval|0_2025-08-22T21-39-22.119596.csv filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
eval_results_ood/global_step_50/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_50_actor_huggingface/2025-08-24T05-51-56.876297/details_extended|ifeval|0_2025-08-24T05-51-56.876297.csv filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
eval_results_ood/global_step_40/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_40_actor_huggingface/2025-08-24T05-54-37.431353/details_extended|ifeval|0_2025-08-24T05-54-37.431353.csv filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
eval_results_ood/global_step_60/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_60_actor_huggingface/2025-08-22T21-37-55.750769/details_extended|ifeval|0_2025-08-22T21-37-55.750769.csv filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface/2025-08-22T22-37-40.349268/details_extended|ifeval|0_2025-08-22T22-37-40.349268.csv filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
eval_results_ood/global_step_70/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_70_actor_huggingface/2025-08-24T05-44-33.516844/details_extended|ifeval|0_2025-08-24T05-44-33.516844.csv filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
eval_results_ood/global_step_80/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_80_actor_huggingface/2025-08-22T22-42-52.264172/details_extended|ifeval|0_2025-08-22T22-42-52.264172.csv filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
eval_results_ood/global_step_90/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_90_actor_huggingface/2025-08-24T06-44-12.807078/details_extended|ifeval|0_2025-08-24T06-44-12.807078.csv filter=lfs diff=lfs merge=lfs -text
|
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ff9273f7375822df00a8822d8d2baf9d15e646641ddca406e49d399f41b0a3b
|
| 3 |
+
size 11883083
|
eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg16/plots/eval_results_avg16_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg16/plots/eval_results_avg16_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg32/plots/eval_results_avg32_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_extended|ifeval|0_2025-08-22T21-59-53.252655.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9800b44e313740bb47d6b2173f893b1a920e59b74224c8fb2c11fa4b2fcc2d6a
|
| 3 |
+
size 19121550
|
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_extended|ifeval|0_2025-08-22T21-59-53.252655.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9593b94d437284733b033513af89f919c25eb4382eea1d3904bf70ce47596ca
|
| 3 |
+
size 2883389
|
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-22T21-59-53.252655/details_lighteval|gpqa:diamond|0_2025-08-22T21-59-53.252655.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be1ed464355dc0c52d62483bc62dcd9195068359dafbe13fe6a9969ec92abea9
|
| 3 |
+
size 496418
|
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-24T06-10-51.524843/details_extended|ifeval|0_2025-08-24T06-10-51.524843.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d20a05df2ee2bd81ee63cda2f85422fbcd6ce4865abe8d05340d819665446067
|
| 3 |
+
size 19994943
|
eval_results_ood/global_step_0/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_0_actor_huggingface/2025-08-24T06-10-51.524843/details_extended|ifeval|0_2025-08-24T06-10-51.524843.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d9bbb67c4a575fd02a483cb86d8c85faa77383b20135fc673cb238b67cd1249
|
| 3 |
+
size 3245950
|
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_extended|ifeval|0_2025-08-22T21-51-40.106242.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b55eef8612ff417e53075228c7f898ab2665e6211a81752a38b1449a69d493b8
|
| 3 |
+
size 18066599
|
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_extended|ifeval|0_2025-08-22T21-51-40.106242.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67918ede7592f1ec0f12a907f018975edb932b7da888785103ff31e25e5c6b80
|
| 3 |
+
size 2783976
|
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-22T21-51-40.106242/details_lighteval|gpqa:diamond|0_2025-08-22T21-51-40.106242.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69aeee0323f4544c38630ee5a8383944c6867782144ab14333b22895a5c27d41
|
| 3 |
+
size 600158
|
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-24T06-01-33.728040/details_extended|ifeval|0_2025-08-24T06-01-33.728040.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff26a8d3aed192b4c07acfe742ec88acf199702fec683282964badf1883baea3
|
| 3 |
+
size 17528529
|
eval_results_ood/global_step_10/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_10_actor_huggingface/2025-08-24T06-01-33.728040/details_extended|ifeval|0_2025-08-24T06-01-33.728040.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b1bb209088f341de8656bc4293bd967e80b37e0fe87122af5850c00cc03e7e7
|
| 3 |
+
size 2976719
|
eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-22T22-32-53.893348/details_extended|ifeval|0_2025-08-22T22-32-53.893348.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3360d7a2b39ef6455a5efa3d06de2002566c852186f99fbefe18e4c92078180e
|
| 3 |
+
size 1491028
|
eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-22T22-32-53.893348/details_lighteval|gpqa:diamond|0_2025-08-22T22-32-53.893348.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31248d7d49794a2e63f7f59128354fdaea3e4ab35f82e3f0fbc30ccf605b8391
|
| 3 |
+
size 531581
|
eval_results_ood/global_step_100/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_100_actor_huggingface/2025-08-24T06-36-23.774981/details_extended|ifeval|0_2025-08-24T06-36-23.774981.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c86d7e783cd257a33d71aed2f889b0600c538aa7a76a9f2f31bb9febd4772b6
|
| 3 |
+
size 1520595
|
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_extended|ifeval|0_2025-08-22T21-50-15.704473.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9708b743e2cb74cd0a117aa3f771f71c46c4147928d745573c3f0e81821308d9
|
| 3 |
+
size 16977649
|
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_extended|ifeval|0_2025-08-22T21-50-15.704473.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a469d26aac2e448511d3cce0294051e36f2065aff3c27adcd373cc3ac3c76bb
|
| 3 |
+
size 2387399
|
eval_results_ood/global_step_20/details/_home_work_minzijun_rl_output_2_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_no_shuffle_global_step_20_actor_huggingface/2025-08-22T21-50-15.704473/details_lighteval|gpqa:diamond|0_2025-08-22T21-50-15.704473.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef0b54f3ae01ac98f2fe23157c71fc9c0562cc792fb83f1dfd5d5ade6fa5b0aa
|
| 3 |
+
size 627710
|