Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +34 -0
- eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
- eval_results/plots/eval_results_acc_keywords.png +3 -0
- eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- eval_results/plots/eval_results_acc_tokens.png +3 -0
- eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_clip_ratio.png +3 -0
- eval_results/plots/eval_results_correct_tokens.png +3 -0
- eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_tokens_keywords.png +3 -0
- eval_results/plots/eval_results_wrong_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_lighteval|gpqa:diamond|0_2025-08-18T16-21-32.723446.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_lighteval|gpqa:diamond|0_2025-08-18T16-10-44.289594.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_extended|ifeval|0_2025-08-18T16-41-52.767998.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_lighteval|gpqa:diamond|0_2025-08-18T16-41-52.767998.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_lighteval|gpqa:diamond|0_2025-08-18T15-50-59.925267.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_lighteval|gpqa:diamond|0_2025-08-18T15-54-45.139937.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_lighteval|gpqa:diamond|0_2025-08-18T15-47-59.222617.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_extended|ifeval|0_2025-08-18T15-45-31.395283.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-31.395283.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_extended|ifeval|0_2025-08-18T15-39-35.865731.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_lighteval|gpqa:diamond|0_2025-08-18T15-39-35.865731.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_extended|ifeval|0_2025-08-18T15-45-24.707769.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-24.707769.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_extended|ifeval|0_2025-08-18T16-44-03.891235.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_lighteval|gpqa:diamond|0_2025-08-18T16-44-03.891235.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_extended|ifeval|0_2025-08-18T16-39-21.271295.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_lighteval|gpqa:diamond|0_2025-08-18T16-39-21.271295.parquet +3 -0
- global_step_10/actor/huggingface/tokenizer.json +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11982e5fd8fef307ea51ecf60643f5b5b15567f6fddc200ad110306b45e88753
|
| 3 |
+
size 12914359
|
eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a68fabbf29c74825a3b25f47e1dd5d8b6ff9b365aadd3f7a6e958915eda8fc7
|
| 3 |
+
size 18334919
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_extended|ifeval|0_2025-08-18T16-21-32.723446.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34bacdac47e83c635d89bb64fe8fef71de883ea95f51e3a31cebe32903a5121a
|
| 3 |
+
size 3010355
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_0_actor_huggingface/2025-08-18T16-21-32.723446/details_lighteval|gpqa:diamond|0_2025-08-18T16-21-32.723446.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74cd417d66e222fd211285ab6d2f8bd72b026701512000e9724e100584212d26
|
| 3 |
+
size 570631
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10ec661a65f876511a3a009602fb1a39599d85c6834d28419f68da773bf583c2
|
| 3 |
+
size 16965722
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_extended|ifeval|0_2025-08-18T16-10-44.289594.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a3be47e2a30f7a41c34fd52b1c68ebba33b7824687f4e8b41dfde0d367030ac
|
| 3 |
+
size 2691645
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_10_actor_huggingface/2025-08-18T16-10-44.289594/details_lighteval|gpqa:diamond|0_2025-08-18T16-10-44.289594.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d6ec882fea65fb93295b7d6e8dcbd57bc683f42fc287ff9fc93540f1ea89227
|
| 3 |
+
size 519547
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_extended|ifeval|0_2025-08-18T16-41-52.767998.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e71386d57dead1b26499fd6672be698ca34d709def4d27d2f6db24db409c569
|
| 3 |
+
size 1498483
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_100_actor_huggingface/2025-08-18T16-41-52.767998/details_lighteval|gpqa:diamond|0_2025-08-18T16-41-52.767998.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6303acde7adf9472a18ae4fe112758c0ada25807dcd43eedf12cc4a456acaa9
|
| 3 |
+
size 658714
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0e7203de9136f0b1a47efa1c2f3ce858bc123da32bc3b0250e43b56bb9fffcd
|
| 3 |
+
size 11154595
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_extended|ifeval|0_2025-08-18T15-50-59.925267.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a80038e98338b97c4ebf7558d5a4a83fb67092a8080bf6e3c193e0c4a89da30b
|
| 3 |
+
size 2083800
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_20_actor_huggingface/2025-08-18T15-50-59.925267/details_lighteval|gpqa:diamond|0_2025-08-18T15-50-59.925267.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b37171406736aa94d9e04fbb2def7613bdaa00d122e874b8b9292d291c1ec629
|
| 3 |
+
size 574644
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c29af90c8aaa76611bc094ac0cc8984147e52486ba3bf67a0cfe62834eccdaf
|
| 3 |
+
size 11387185
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_extended|ifeval|0_2025-08-18T15-54-45.139937.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd33ede61db0fcdb173b8d4372d3dfb91e9278ca48135f90643daa04b31ed47d
|
| 3 |
+
size 2256722
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_30_actor_huggingface/2025-08-18T15-54-45.139937/details_lighteval|gpqa:diamond|0_2025-08-18T15-54-45.139937.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47d60bc54edbacbe4c84c236580fdd65d3d1c215b458eb5f605785cedcf90a77
|
| 3 |
+
size 654486
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24bcf48d7c49154b06cf56ced4567a07f47c9c4e4ecc28e08010c6396b6b45aa
|
| 3 |
+
size 11323716
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_extended|ifeval|0_2025-08-18T15-47-59.222617.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dae9f46f0f349f1c173e059b38ca13c1c52fc15d92162a13567d2134f3926ec
|
| 3 |
+
size 1945986
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_40_actor_huggingface/2025-08-18T15-47-59.222617/details_lighteval|gpqa:diamond|0_2025-08-18T15-47-59.222617.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0dd9c292fb8b29c3085e23e564321091d1196e06a4740f8d83ec8b3db215381
|
| 3 |
+
size 618770
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_extended|ifeval|0_2025-08-18T15-45-31.395283.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:279affbcfe2ceb7e0c9f60d4e6ee3a4ba1f9a84687eef9ed57f1d304c461b60e
|
| 3 |
+
size 1925213
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_50_actor_huggingface/2025-08-18T15-45-31.395283/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-31.395283.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:927632d9deb938b57f4e558647f7f2b3de5bba51786fb40229663080dc094e56
|
| 3 |
+
size 608015
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_extended|ifeval|0_2025-08-18T15-39-35.865731.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:286ce3e196a6ac3176c7001f9a722f2b80849fbf79a9984fccd86274d979b0f9
|
| 3 |
+
size 1623068
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_60_actor_huggingface/2025-08-18T15-39-35.865731/details_lighteval|gpqa:diamond|0_2025-08-18T15-39-35.865731.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:337877be84c349305b4dde8a473bf6ea664a245f007bd57ea79cd3f2dd84d413
|
| 3 |
+
size 677607
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_extended|ifeval|0_2025-08-18T15-45-24.707769.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ac30c9873cdcc8d5d95ff31cfdcf37c76bf78e1b7bb85c086f86daccc739b7f
|
| 3 |
+
size 1929554
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_70_actor_huggingface/2025-08-18T15-45-24.707769/details_lighteval|gpqa:diamond|0_2025-08-18T15-45-24.707769.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28fa63b65deb15ee21e9ab6bd2235f4853494753d2c4194d11c419689287d81e
|
| 3 |
+
size 684473
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_extended|ifeval|0_2025-08-18T16-44-03.891235.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8812184b17fe91f50bcb740642982fd8e9dc31281bead5bb951a5be7761e0a3c
|
| 3 |
+
size 1675223
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_80_actor_huggingface/2025-08-18T16-44-03.891235/details_lighteval|gpqa:diamond|0_2025-08-18T16-44-03.891235.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67ec72c743a462249d9de35a86e01bfaba2c1f3b71fbfca71997583a80c42b7d
|
| 3 |
+
size 674531
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_extended|ifeval|0_2025-08-18T16-39-21.271295.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73b2c90673c7b839cccfce4d605bad54a7d4e01ebc5fb9e03b1eb8e7c777846d
|
| 3 |
+
size 1644351
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.2_global_step_90_actor_huggingface/2025-08-18T16-39-21.271295/details_lighteval|gpqa:diamond|0_2025-08-18T16-39-21.271295.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7e2440801cfffee42d282ea2fbfee5099158e7a34361a34a81df542acfc497d
|
| 3 |
+
size 633477
|
global_step_10/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|