Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +34 -0
- eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +3 -0
- eval_results/plots/eval_results_acc_keywords.png +3 -0
- eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- eval_results/plots/eval_results_acc_tokens.png +3 -0
- eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_clip_ratio.png +3 -0
- eval_results/plots/eval_results_correct_tokens.png +3 -0
- eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- eval_results/plots/eval_results_tokens_keywords.png +3 -0
- eval_results/plots/eval_results_wrong_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_acc_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_clip_ratio.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_correct_tokens.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png +3 -0
- eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.parquet +3 -0
- eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_lighteval|gpqa:diamond|0_2025-08-19T03-03-31.558565.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.parquet +3 -0
- eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_lighteval|gpqa:diamond|0_2025-08-19T02-59-44.015414.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_extended|ifeval|0_2025-08-19T03-23-25.775217.parquet +3 -0
- eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-25.775217.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.parquet +3 -0
- eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_lighteval|gpqa:diamond|0_2025-08-19T02-45-28.477295.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_extended|ifeval|0_2025-08-19T02-31-43.587146.parquet +3 -0
- eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_lighteval|gpqa:diamond|0_2025-08-19T02-31-43.587146.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_extended|ifeval|0_2025-08-19T02-32-20.502116.parquet +3 -0
- eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_lighteval|gpqa:diamond|0_2025-08-19T02-32-20.502116.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_extended|ifeval|0_2025-08-19T02-27-37.276111.parquet +3 -0
- eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_lighteval|gpqa:diamond|0_2025-08-19T02-27-37.276111.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_extended|ifeval|0_2025-08-19T02-24-48.573923.parquet +3 -0
- eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_lighteval|gpqa:diamond|0_2025-08-19T02-24-48.573923.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_extended|ifeval|0_2025-08-19T02-23-41.225302.parquet +3 -0
- eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_lighteval|gpqa:diamond|0_2025-08-19T02-23-41.225302.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_extended|ifeval|0_2025-08-19T03-22-27.759640.parquet +3 -0
- eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_lighteval|gpqa:diamond|0_2025-08-19T03-22-27.759640.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_extended|ifeval|0_2025-08-19T03-23-41.379521.parquet +3 -0
- eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-41.379521.parquet +3 -0
- global_step_0/actor/huggingface/model.safetensors +3 -0
- global_step_10/actor/huggingface/tokenizer.json +3 -0
- global_step_10/data.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
eval_results/global_step_0/mmlu_stem/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c128f44b9bd5065604edd55ea62d32b25dfd015eac122620303e55215bed515
|
| 3 |
+
size 12239207
|
eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_acc_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_clip_ratio.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_correct_tokens.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png
ADDED
|
Git LFS Details
|
eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png
ADDED
|
Git LFS Details
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77f4cbd1d57baaa65840d41c4e3fdf408aa0a5d078c3ab233d451de97e99b727
|
| 3 |
+
size 18592855
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_extended|ifeval|0_2025-08-19T03-03-31.558565.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d611194575c562264b41400d3d9288f43e8c6320cb0e6d912a6489b1d85682f1
|
| 3 |
+
size 2789061
|
eval_results_ood/global_step_0/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_0_actor_huggingface/2025-08-19T03-03-31.558565/details_lighteval|gpqa:diamond|0_2025-08-19T03-03-31.558565.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4073c119302a1e9cd694f357f710227a8957303f7178eb8702a4bd2c42b5a7b
|
| 3 |
+
size 566842
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50f4fa9aca026027eac08f0ed0ddc2b00d926e18b7f88e330fc4293c392a73d0
|
| 3 |
+
size 17748213
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_extended|ifeval|0_2025-08-19T02-59-44.015414.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea5449f92cc18f780f33cf1ab76f3769bac4d58eb2e93547daf41df8842abeb
|
| 3 |
+
size 3026523
|
eval_results_ood/global_step_10/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_10_actor_huggingface/2025-08-19T02-59-44.015414/details_lighteval|gpqa:diamond|0_2025-08-19T02-59-44.015414.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f310b31e7299ff4348cd8d9655f8334f9b76a8990f85a17be0445fc50aafe21c
|
| 3 |
+
size 611170
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_extended|ifeval|0_2025-08-19T03-23-25.775217.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81c4fdf2936ca8d92b10ccc2b75ceb72f551b3b3be588ede7604967537743b29
|
| 3 |
+
size 1526557
|
eval_results_ood/global_step_100/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_100_actor_huggingface/2025-08-19T03-23-25.775217/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-25.775217.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a6c707d324d9afd6a1cfd752d177e675674484b761e2760fe7ce65b56324180
|
| 3 |
+
size 762665
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0309fa76751880316815296f3f5d9ea697b4bcf47aab8ff480b5b00cb1728aa9
|
| 3 |
+
size 17747706
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_extended|ifeval|0_2025-08-19T02-45-28.477295.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d101c8fa4c97ffd37e3554371ec26e987a62ad1b7d8cd2551e0cecae28d65d5
|
| 3 |
+
size 2445648
|
eval_results_ood/global_step_20/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_20_actor_huggingface/2025-08-19T02-45-28.477295/details_lighteval|gpqa:diamond|0_2025-08-19T02-45-28.477295.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dffd5da7c2e6e280733a688198e964c5b843c36ac5e1187658f3a1da0467b32
|
| 3 |
+
size 604606
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_extended|ifeval|0_2025-08-19T02-31-43.587146.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e06532f2a4ca0d6fc447b2c01eaa81246f747d6e82cd3a415179fd94e01ab5c9
|
| 3 |
+
size 1997084
|
eval_results_ood/global_step_30/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_30_actor_huggingface/2025-08-19T02-31-43.587146/details_lighteval|gpqa:diamond|0_2025-08-19T02-31-43.587146.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49be0512c751e57b2bdc48244427efe32f3db3411839a30cc75027c2a0bff29a
|
| 3 |
+
size 659079
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_extended|ifeval|0_2025-08-19T02-32-20.502116.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccbccd663da32f237c308c7ce146a10b1403cd60ade911b955a8466c59aa4315
|
| 3 |
+
size 1933203
|
eval_results_ood/global_step_40/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_40_actor_huggingface/2025-08-19T02-32-20.502116/details_lighteval|gpqa:diamond|0_2025-08-19T02-32-20.502116.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72fe64ee7be245e4906a6f79dea95073c247311a70f7469a2e86d7ca3d721dc1
|
| 3 |
+
size 729678
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_extended|ifeval|0_2025-08-19T02-27-37.276111.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7d7fc6fb913cabb9c3bb4ab6faf1d413a69f88cb882fbd63f23a0ce21d85ad8
|
| 3 |
+
size 1887216
|
eval_results_ood/global_step_50/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_50_actor_huggingface/2025-08-19T02-27-37.276111/details_lighteval|gpqa:diamond|0_2025-08-19T02-27-37.276111.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4626ac4a1695080f6379b1712918e3c90c9c9b2d6bb41ce561ffc262554982e1
|
| 3 |
+
size 710992
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_extended|ifeval|0_2025-08-19T02-24-48.573923.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35ce730b988707e09af26e33ee0b669dc0b0eac8a56c63d1446260dd37bc6d1c
|
| 3 |
+
size 1633920
|
eval_results_ood/global_step_60/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_60_actor_huggingface/2025-08-19T02-24-48.573923/details_lighteval|gpqa:diamond|0_2025-08-19T02-24-48.573923.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b3eafdc4a5dc80faae8416428e649a5ea69b3720ab02eb6945c3455ccfd8379
|
| 3 |
+
size 717748
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_extended|ifeval|0_2025-08-19T02-23-41.225302.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ff941a9a8eb14c97cd6a36795beb4e5cddcf45ac9977f605eb78a892b75e1b
|
| 3 |
+
size 1554610
|
eval_results_ood/global_step_70/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_70_actor_huggingface/2025-08-19T02-23-41.225302/details_lighteval|gpqa:diamond|0_2025-08-19T02-23-41.225302.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:896ae09b2e2167fe2443b2586ec718a9a92a7eb5a521b891fdf654b8b0a7f06a
|
| 3 |
+
size 711115
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_extended|ifeval|0_2025-08-19T03-22-27.759640.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d7564a8979cd22c865885d2905e4d6999907884b7c4139c2e8582799de3f041
|
| 3 |
+
size 1863422
|
eval_results_ood/global_step_80/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_80_actor_huggingface/2025-08-19T03-22-27.759640/details_lighteval|gpqa:diamond|0_2025-08-19T03-22-27.759640.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e3309afb1ba407363c2582463ed55d238c47b3f50af967b601c0a0bd54572b0
|
| 3 |
+
size 731554
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_extended|ifeval|0_2025-08-19T03-23-41.379521.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bdc3b985034af8c1a96535639333cd8fec96b045a5cd0147f1ccc7f09931fab
|
| 3 |
+
size 1712739
|
eval_results_ood/global_step_90/details/_home_work_compass_innovation_minzijun_checkpoints_verl_role_sft_grpo_deepmath_train_sample_6144_context_4k_Qwen3-1.7B-base_max_response4096_batch1024_rollout8_vllm_True_bias0.3_global_step_90_actor_huggingface/2025-08-19T03-23-41.379521/details_lighteval|gpqa:diamond|0_2025-08-19T03-23-41.379521.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a22384154eb28672a4c38af7f8b186cbaecdf854de6d2a9596b0bd3293aa4e5
|
| 3 |
+
size 761415
|
global_step_0/actor/huggingface/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6df85b39330e5a425ee36253d0f894e4387e4f0a15b9c53cb467d668e6b3a841
|
| 3 |
+
size 3441185608
|
global_step_10/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_10/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5d7cb574e77c186989ac15b0b93bfabe487df954f0d0061aeabf2d618cedd25
|
| 3 |
+
size 1947
|