Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +33 -0
- eval_results/plots/eval_results_acc_keywords.png +3 -0
- eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- eval_results/plots/eval_results_acc_tokens.png +3 -0
- eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_clip_ratio.png +3 -0
- eval_results/plots/eval_results_correct_tokens.png +3 -0
- eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_tokens_keywords.png +3 -0
- eval_results/plots/eval_results_wrong_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_extended|ifeval|0_2025-08-15T13-35-49.816341.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_extended|ifeval|0_2025-08-15T13-35-49.816341.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_lighteval|gpqa:diamond|0_2025-08-15T13-35-49.816341.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_extended|ifeval|0_2025-08-15T13-31-29.716693.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_extended|ifeval|0_2025-08-15T13-31-29.716693.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_lighteval|gpqa:diamond|0_2025-08-15T13-31-29.716693.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_100_actor_huggingface/2025-08-15T14-37-42.123261/details_extended|ifeval|0_2025-08-15T14-37-42.123261.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_100_actor_huggingface/2025-08-15T14-37-42.123261/details_lighteval|gpqa:diamond|0_2025-08-15T14-37-42.123261.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_extended|ifeval|0_2025-08-15T13-21-09.066792.csv +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_extended|ifeval|0_2025-08-15T13-21-09.066792.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_lighteval|gpqa:diamond|0_2025-08-15T13-21-09.066792.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_extended|ifeval|0_2025-08-15T13-17-38.270755.csv +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_extended|ifeval|0_2025-08-15T13-17-38.270755.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_lighteval|gpqa:diamond|0_2025-08-15T13-17-38.270755.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_40_actor_huggingface/2025-08-15T14-12-48.635371/details_extended|ifeval|0_2025-08-15T14-12-48.635371.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_40_actor_huggingface/2025-08-15T14-12-48.635371/details_lighteval|gpqa:diamond|0_2025-08-15T14-12-48.635371.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_50_actor_huggingface/2025-08-15T14-05-24.807346/details_extended|ifeval|0_2025-08-15T14-05-24.807346.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_50_actor_huggingface/2025-08-15T14-05-24.807346/details_lighteval|gpqa:diamond|0_2025-08-15T14-05-24.807346.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_extended|ifeval|0_2025-08-15T14-09-25.201221.csv +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_extended|ifeval|0_2025-08-15T14-09-25.201221.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_lighteval|gpqa:diamond|0_2025-08-15T14-09-25.201221.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_70_actor_huggingface/2025-08-15T14-07-38.503008/details_extended|ifeval|0_2025-08-15T14-07-38.503008.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_70_actor_huggingface/2025-08-15T14-07-38.503008/details_lighteval|gpqa:diamond|0_2025-08-15T14-07-38.503008.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_80_actor_huggingface/2025-08-15T14-34-26.588600/details_extended|ifeval|0_2025-08-15T14-34-26.588600.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_80_actor_huggingface/2025-08-15T14-34-26.588600/details_lighteval|gpqa:diamond|0_2025-08-15T14-34-26.588600.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_90_actor_huggingface/2025-08-15T14-39-05.603495/details_extended|ifeval|0_2025-08-15T14-39-05.603495.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_90_actor_huggingface/2025-08-15T14-39-05.603495/details_lighteval|gpqa:diamond|0_2025-08-15T14-39-05.603495.parquet +3 -0
- global_step_10/actor/huggingface/tokenizer.json +3 -0
- global_step_10/data.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_extended|ifeval|0_2025-08-15T13-21-09.066792.csv filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_extended|ifeval|0_2025-08-15T13-31-29.716693.csv filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_extended|ifeval|0_2025-08-15T13-35-49.816341.csv filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_extended|ifeval|0_2025-08-15T14-09-25.201221.csv filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_extended|ifeval|0_2025-08-15T13-17-38.270755.csv filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_extended|ifeval|0_2025-08-15T13-35-49.816341.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0704d46d5009fac35c1fb3f86bac1ac5343c750eaa4d2b24a4577234a095c383
|
| 3 |
+
size 20289652
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_extended|ifeval|0_2025-08-15T13-35-49.816341.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d181481b6db464baea906c7528104540f4dc9a94b635ed2985911599692fe0b5
|
| 3 |
+
size 2838029
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_0_actor_huggingface/2025-08-15T13-35-49.816341/details_lighteval|gpqa:diamond|0_2025-08-15T13-35-49.816341.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a3196fea0af31d935e8b69d265ac29e9fa9923f7fbee5158accdb553aa2260e
|
| 3 |
+
size 495364
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_extended|ifeval|0_2025-08-15T13-31-29.716693.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b6d59f9fa6ea129cf75b514d93e30fd56837184a9032591d1978a6409a8cfc3
|
| 3 |
+
size 16707831
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_extended|ifeval|0_2025-08-15T13-31-29.716693.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42dc9db56eddb8b16de3d8841b8afac20187d458b1576b1c56d39ff951b3054e
|
| 3 |
+
size 2659632
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_10_actor_huggingface/2025-08-15T13-31-29.716693/details_lighteval|gpqa:diamond|0_2025-08-15T13-31-29.716693.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6932a54dc0f412744d43922430d12bd59a6827b02dbb9cfaa52eeb276ac07552
|
| 3 |
+
size 600984
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_100_actor_huggingface/2025-08-15T14-37-42.123261/details_extended|ifeval|0_2025-08-15T14-37-42.123261.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b08819452d52b43de4bd8191d8aeaa25beca18af3e99f31b458153367708d5de
|
| 3 |
+
size 1746269
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_100_actor_huggingface/2025-08-15T14-37-42.123261/details_lighteval|gpqa:diamond|0_2025-08-15T14-37-42.123261.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3efe4d59a6e4abbcfa180a28054b463e917f56ae17d848e2f14334fe9557036
|
| 3 |
+
size 656145
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_extended|ifeval|0_2025-08-15T13-21-09.066792.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:171674c500c16f242298405937d5cc8d70ea1d2f84868b4067fe7b7ba3ea0ee6
|
| 3 |
+
size 13456111
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_extended|ifeval|0_2025-08-15T13-21-09.066792.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67f490ca2e17d149785c2d5834a5d078c97fe2a367b967b4afa26e978c263b3b
|
| 3 |
+
size 2142698
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_20_actor_huggingface/2025-08-15T13-21-09.066792/details_lighteval|gpqa:diamond|0_2025-08-15T13-21-09.066792.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6524e01a8d68ce7e34dcf85c9c2cbb9566535cf8d119106587ebc00b34fd3c6
|
| 3 |
+
size 619572
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_extended|ifeval|0_2025-08-15T13-17-38.270755.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23f3a7542756241cacdfbd1967a34848e591e73dffa483974fed5a8bc3cf2657
|
| 3 |
+
size 17174480
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_extended|ifeval|0_2025-08-15T13-17-38.270755.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:952cb93a977f67aa14d51eb747ec29eca8181f6771d843fe4a03796bbad64afb
|
| 3 |
+
size 2376015
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_30_actor_huggingface/2025-08-15T13-17-38.270755/details_lighteval|gpqa:diamond|0_2025-08-15T13-17-38.270755.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:221fe666e54e16190ccbf1f543272791d39193bd14e8dbd37a719ce4c848ba28
|
| 3 |
+
size 599110
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_40_actor_huggingface/2025-08-15T14-12-48.635371/details_extended|ifeval|0_2025-08-15T14-12-48.635371.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b41870e24e341ae28e8dea2513570fdd7861ad26a4a9d7462cad911e0076f4f5
|
| 3 |
+
size 1843318
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_40_actor_huggingface/2025-08-15T14-12-48.635371/details_lighteval|gpqa:diamond|0_2025-08-15T14-12-48.635371.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a0c8cfbcc72597d33d05ea48a0be5f28d45d50f77f2628efdced20efc586d9c
|
| 3 |
+
size 694745
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_50_actor_huggingface/2025-08-15T14-05-24.807346/details_extended|ifeval|0_2025-08-15T14-05-24.807346.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27b3fe934d86e1e4baebd389831dee58c350e041f808ed2acd75ef9391d60397
|
| 3 |
+
size 1986129
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_50_actor_huggingface/2025-08-15T14-05-24.807346/details_lighteval|gpqa:diamond|0_2025-08-15T14-05-24.807346.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e6c4fe7789e6515171161d34e70fc87b8a9309eb57e28a63c1662dd54c524e4
|
| 3 |
+
size 541560
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_extended|ifeval|0_2025-08-15T14-09-25.201221.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bba227a5e66dbf4e0431e625bc249cd702d54fc423ae4bbf0a2345301be80532
|
| 3 |
+
size 10635818
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_extended|ifeval|0_2025-08-15T14-09-25.201221.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73e41b17d53f38c647c98e37be86d76861e8f99ed27a48807ef0728b1a5e088c
|
| 3 |
+
size 1749733
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_60_actor_huggingface/2025-08-15T14-09-25.201221/details_lighteval|gpqa:diamond|0_2025-08-15T14-09-25.201221.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7626dd7b6de8a09d11ddc3674c337c2bc7f02d66a9769c75253db368d700ac01
|
| 3 |
+
size 621843
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_70_actor_huggingface/2025-08-15T14-07-38.503008/details_extended|ifeval|0_2025-08-15T14-07-38.503008.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94cb40b7161e4fbc3add9e9474ce9fe2b7a6addb3b75ed399d9d26f410cad9b0
|
| 3 |
+
size 1836298
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_70_actor_huggingface/2025-08-15T14-07-38.503008/details_lighteval|gpqa:diamond|0_2025-08-15T14-07-38.503008.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e49655eb911f41c42593e108616dcff63c5bcca377641db65fe45b0d88c8796a
|
| 3 |
+
size 582882
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_80_actor_huggingface/2025-08-15T14-34-26.588600/details_extended|ifeval|0_2025-08-15T14-34-26.588600.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c39397eef73665f4bd7e79d708df091045007c7de6e71544129e345f151495
|
| 3 |
+
size 1563421
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_80_actor_huggingface/2025-08-15T14-34-26.588600/details_lighteval|gpqa:diamond|0_2025-08-15T14-34-26.588600.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5e62eca5ad3ff63c7fd9620000d2d5c5bfdcf7e29ddece8b20f1f1aae0c736f
|
| 3 |
+
size 632643
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_90_actor_huggingface/2025-08-15T14-39-05.603495/details_extended|ifeval|0_2025-08-15T14-39-05.603495.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa6bfc477e143a5c708bc71b0c734bc4c52acb9b847f0ed88bf47aba2ae18114
|
| 3 |
+
size 1779469
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.1_global_step_90_actor_huggingface/2025-08-15T14-39-05.603495/details_lighteval|gpqa:diamond|0_2025-08-15T14-39-05.603495.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41cf96edc7fe1d3c0e62292f767c7f4a4d1b67d1acbdd26ab80ad58b8b488337
|
| 3 |
+
size 643128
|
global_step_10/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_10/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f78cc225b4123c10f383e9e95ce34119e3088173444cbb8c9a610ca43cbac2cc
|
| 3 |
+
size 1947
|