Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +31 -0
- eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
- eval_results/plots/eval_results_acc_keywords.png +3 -0
- eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- eval_results/plots/eval_results_acc_tokens.png +3 -0
- eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_clip_ratio.png +3 -0
- eval_results/plots/eval_results_correct_tokens.png +3 -0
- eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_tokens_keywords.png +3 -0
- eval_results/plots/eval_results_wrong_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_lighteval|gpqa:diamond|0_2025-08-22T07-57-51.872019.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_lighteval|gpqa:diamond|0_2025-08-22T07-47-25.389710.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_extended|ifeval|0_2025-08-22T08-13-29.136616.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_lighteval|gpqa:diamond|0_2025-08-22T08-13-29.136616.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_extended|ifeval|0_2025-08-22T07-29-13.669606.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_lighteval|gpqa:diamond|0_2025-08-22T07-29-13.669606.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_extended|ifeval|0_2025-08-22T07-15-48.254808.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_lighteval|gpqa:diamond|0_2025-08-22T07-15-48.254808.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_extended|ifeval|0_2025-08-22T07-12-09.643109.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_lighteval|gpqa:diamond|0_2025-08-22T07-12-09.643109.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_extended|ifeval|0_2025-08-22T07-09-28.251052.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-28.251052.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_extended|ifeval|0_2025-08-22T07-09-44.265871.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-44.265871.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_extended|ifeval|0_2025-08-22T08-09-53.250731.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-53.250731.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_extended|ifeval|0_2025-08-22T08-08-44.641132.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_lighteval|gpqa:diamond|0_2025-08-22T08-08-44.641132.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_extended|ifeval|0_2025-08-22T08-09-51.132895.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-51.132895.parquet +3 -0
- global_step_10/actor/huggingface/tokenizer.json +3 -0
- global_step_10/data.pt +3 -0
- global_step_100/actor/huggingface/tokenizer.json +3 -0
- global_step_100/data.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,34 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e25369e899dd25ca782745e42fde9dc9526de1d22c7d6646308a664742ccd6d2
|
| 3 |
+
size 12120990
|
eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9da2b503d544d3adaf82d1bcdb74dd2338defd8f9934f88fce278f11d262b3c8
|
| 3 |
+
size 20285461
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_extended|ifeval|0_2025-08-22T07-57-51.872019.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df24775c74045785e614b2ced0830d6d6aeebcc119b8a428f716cc9ae0797a3d
|
| 3 |
+
size 3168296
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_0_actor_huggingface/2025-08-22T07-57-51.872019/details_lighteval|gpqa:diamond|0_2025-08-22T07-57-51.872019.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f635f006ad862d99fb9c834773b5214dbce21047eac6598d40ccd56cb3db143b
|
| 3 |
+
size 617970
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8170a4bec78c145a20d4b17154dba882cb4e283744c06a80846441628c0cab53
|
| 3 |
+
size 15610177
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_extended|ifeval|0_2025-08-22T07-47-25.389710.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4adb83e29ae5e2c6dface3cac1ef86ab82c826b1f9b5e83c8ed5248abe46a512
|
| 3 |
+
size 2843855
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_10_actor_huggingface/2025-08-22T07-47-25.389710/details_lighteval|gpqa:diamond|0_2025-08-22T07-47-25.389710.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:007353af2bd4da05eac818c1bb9af3caecffbbe18a8aeb046292d46b4e0210a9
|
| 3 |
+
size 543738
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_extended|ifeval|0_2025-08-22T08-13-29.136616.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:459b904b60dc662054534daeb60c655b68ed818b47b49a74aacb03cda7df0c0b
|
| 3 |
+
size 1306224
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_100_actor_huggingface/2025-08-22T08-13-29.136616/details_lighteval|gpqa:diamond|0_2025-08-22T08-13-29.136616.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b9a3a319adc75f19f2904b9c4df6bc0c3e1b6edc44f0ca587ba86fc737762ff
|
| 3 |
+
size 710733
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_extended|ifeval|0_2025-08-22T07-29-13.669606.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6af44d6bdc27afa78c75348219b7a97094c2fafbba367df73882639ee06ba71e
|
| 3 |
+
size 1852554
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_20_actor_huggingface/2025-08-22T07-29-13.669606/details_lighteval|gpqa:diamond|0_2025-08-22T07-29-13.669606.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c323901a555f9e2d4071903687f56b04721f7931c55c48b5a028dd6679e1375c
|
| 3 |
+
size 644713
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_extended|ifeval|0_2025-08-22T07-15-48.254808.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7614b496007aad888271cd710e3a5541eb9cc05a0e270d6d31f063f8a1294f2
|
| 3 |
+
size 1611319
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_30_actor_huggingface/2025-08-22T07-15-48.254808/details_lighteval|gpqa:diamond|0_2025-08-22T07-15-48.254808.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ed92f5d51449b3c8b3f434205c9763fa11eb3b4caf597384837347b88c0128
|
| 3 |
+
size 649559
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_extended|ifeval|0_2025-08-22T07-12-09.643109.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a06b81ab020f2135899ed6a3721e4fdfa71ad9ae340e83ab482770405d70c01
|
| 3 |
+
size 1522928
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_40_actor_huggingface/2025-08-22T07-12-09.643109/details_lighteval|gpqa:diamond|0_2025-08-22T07-12-09.643109.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bbcb1345665e9f96896ee1503803f848b34af147250248c67572fc9b21a5923
|
| 3 |
+
size 680229
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_extended|ifeval|0_2025-08-22T07-09-28.251052.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e0c7e77a6adae7bf6ee41b947f353c2355f38fe524511ff3564af6f2876056f
|
| 3 |
+
size 1431840
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_50_actor_huggingface/2025-08-22T07-09-28.251052/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-28.251052.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d872f2c877cd51909dcb2d623e9b7ffdbb965b3376675d2ab7bfe21f9ce7c972
|
| 3 |
+
size 669400
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_extended|ifeval|0_2025-08-22T07-09-44.265871.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d58b1de3823dcb594c613507928e6079866644c8f7fce2e7a90a97ba8b1ba09e
|
| 3 |
+
size 1416768
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_60_actor_huggingface/2025-08-22T07-09-44.265871/details_lighteval|gpqa:diamond|0_2025-08-22T07-09-44.265871.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9057e644306261ac22c2883c32cf3e4c042f319c9fd5ccae2cde6515ad117dbd
|
| 3 |
+
size 690886
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_extended|ifeval|0_2025-08-22T08-09-53.250731.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a159d4da06c7a15f52cafd6413fb71d1590b961b2ce077bdc938ac09c396f21
|
| 3 |
+
size 1284283
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_70_actor_huggingface/2025-08-22T08-09-53.250731/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-53.250731.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8cb4519f5ccd8217e0276e1d7456bc3908ecb29e241628ae6a1ca1a934caef1
|
| 3 |
+
size 673612
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_extended|ifeval|0_2025-08-22T08-08-44.641132.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08e01921ee052d37f655863cea8cebe19e34632aa273de1bdb7cdb13a851f8fc
|
| 3 |
+
size 1292453
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_80_actor_huggingface/2025-08-22T08-08-44.641132/details_lighteval|gpqa:diamond|0_2025-08-22T08-08-44.641132.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09631eb71eaf3d81ca1b15b56e3eb565fc8d6e5b203d3ed6d6b0ccf3c21e1cd8
|
| 3 |
+
size 684884
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_extended|ifeval|0_2025-08-22T08-09-51.132895.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f943a5855b2736779ef6b695e2d751107a67885b91201d58ff35b7c02fc783
|
| 3 |
+
size 1374059
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.9_global_step_90_actor_huggingface/2025-08-22T08-09-51.132895/details_lighteval|gpqa:diamond|0_2025-08-22T08-09-51.132895.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e4e06978ada198f9b4857089229e95c92433097332cf31f159cc3515411d17e
|
| 3 |
+
size 683583
|
global_step_10/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_10/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:361b3a2db9de44ab0f855170ee87f31505491e0a949c4376e2e334e64b147592
|
| 3 |
+
size 1947
|
global_step_100/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_100/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ace1ba6509c69d2d98913c7d0a01abe0da6c0896b8d6629782cbce5ffef4bc99
|
| 3 |
+
size 1947
|