Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +26 -0
- global_step_10/actor/huggingface/tokenizer.json +3 -0
- global_step_10/data.pt +3 -0
- global_step_100/actor/huggingface/tokenizer.json +3 -0
- global_step_100/data.pt +3 -0
- global_step_20/actor/huggingface/tokenizer.json +3 -0
- global_step_20/data.pt +3 -0
- global_step_30/actor/huggingface/tokenizer.json +3 -0
- global_step_30/data.pt +3 -0
- global_step_40/actor/huggingface/tokenizer.json +3 -0
- global_step_40/data.pt +3 -0
- global_step_50/actor/huggingface/tokenizer.json +3 -0
- global_step_50/data.pt +3 -0
- global_step_60/actor/huggingface/tokenizer.json +3 -0
- global_step_60/data.pt +3 -0
- global_step_70/actor/huggingface/tokenizer.json +3 -0
- global_step_70/data.pt +3 -0
- global_step_80/actor/huggingface/tokenizer.json +3 -0
- global_step_80/data.pt +3 -0
- global_step_90/actor/huggingface/tokenizer.json +3 -0
- global_step_90/data.pt +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
|
global_step_10/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_10/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d7c26916a0b5bf53242298f37baf627a9b0ed4e16fabfd78a877ab3cb35472c
|
| 3 |
+
size 1947
|
global_step_100/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_100/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95f3eaab16577c8b15e0e26c7e76ea89028ecdefaee47b9d4ad925c5a562a622
|
| 3 |
+
size 1947
|
global_step_20/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_20/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66ce7c51037d5f729b2a767511d5573796bed9a99711ff83eadb00f227957707
|
| 3 |
+
size 1947
|
global_step_30/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_30/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dd0fee6dcd750e5c3b43665d8e8da560439ee4d1ed1ac4f0ced642981e02ca1
|
| 3 |
+
size 1947
|
global_step_40/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_40/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cd793991b9fe00986d0c1d3593ad31fac549dd780b01d5f6d9becd1a8161e69
|
| 3 |
+
size 1947
|
global_step_50/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_50/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c36aed214995f2c4f55495ab6ac7b5ec006bfed7a645a3f139c885d98ba2ed60
|
| 3 |
+
size 1947
|
global_step_60/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_60/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:386f4e1ce32469eb9649ae72e1114453155232915883c775bad93899892df882
|
| 3 |
+
size 1947
|
global_step_70/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_70/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f70670c9ecd962978294e131e8948e9bed7a8e05fd302cd1058ff4e962c7f189
|
| 3 |
+
size 1947
|
global_step_80/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_80/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2da91f2b0ee491fc6c032b29eb667535f053724d53bd035fe62753317a43ae00
|
| 3 |
+
size 1947
|
global_step_90/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
global_step_90/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4609719c846c982e5c9b89f6f8b8702de5c4ded573a84e527f87e33c095bdbc7
|
| 3 |
+
size 1947
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png
ADDED
|
Git LFS Details
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 40,
|
| 3 |
+
"num_scores": 160,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 67.5,
|
| 7 |
+
"pass_acc": 85.0,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 67.5,
|
| 10 |
+
"2": 76.7,
|
| 11 |
+
"4": 85.0
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 162.94680261611938,
|
| 14 |
+
"time_use_in_minite": "2:42"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 27.5,
|
| 7 |
+
"pass_acc": 36.7,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 27.5,
|
| 10 |
+
"2": 32.2,
|
| 11 |
+
"4": 36.7
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 419.3475561141968,
|
| 14 |
+
"time_use_in_minite": "6:59"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 19.2,
|
| 7 |
+
"pass_acc": 30.0,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 19.2,
|
| 10 |
+
"2": 25.6,
|
| 11 |
+
"4": 30.0
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 375.0433328151703,
|
| 14 |
+
"time_use_in_minite": "6:15"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 40,
|
| 3 |
+
"num_scores": 160,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 64.4,
|
| 7 |
+
"pass_acc": 80.0,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 64.4,
|
| 10 |
+
"2": 74.2,
|
| 11 |
+
"4": 80.0
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 195.60499334335327,
|
| 14 |
+
"time_use_in_minite": "3:15"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 28.3,
|
| 7 |
+
"pass_acc": 46.7,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 28.3,
|
| 10 |
+
"2": 36.1,
|
| 11 |
+
"4": 46.7
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 549.4302027225494,
|
| 14 |
+
"time_use_in_minite": "9:09"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 22.5,
|
| 7 |
+
"pass_acc": 36.7,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 22.5,
|
| 10 |
+
"2": 28.9,
|
| 11 |
+
"4": 36.7
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 381.60382318496704,
|
| 14 |
+
"time_use_in_minite": "6:21"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 40,
|
| 3 |
+
"num_scores": 160,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 66.9,
|
| 7 |
+
"pass_acc": 80.0,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 66.9,
|
| 10 |
+
"2": 73.8,
|
| 11 |
+
"4": 80.0
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 233.23579001426697,
|
| 14 |
+
"time_use_in_minite": "3:53"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 25.0,
|
| 7 |
+
"pass_acc": 43.3,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 25.0,
|
| 10 |
+
"2": 32.8,
|
| 11 |
+
"4": 43.3
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 509.6931164264679,
|
| 14 |
+
"time_use_in_minite": "8:29"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 30,
|
| 3 |
+
"num_scores": 120,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 24.2,
|
| 7 |
+
"pass_acc": 33.3,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 24.2,
|
| 10 |
+
"2": 28.9,
|
| 11 |
+
"4": 33.3
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 440.93800044059753,
|
| 14 |
+
"time_use_in_minite": "7:20"
|
| 15 |
+
}
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_samples": 40,
|
| 3 |
+
"num_scores": 160,
|
| 4 |
+
"timeout_samples": 0,
|
| 5 |
+
"empty_samples": 0,
|
| 6 |
+
"acc": 68.8,
|
| 7 |
+
"pass_acc": 87.5,
|
| 8 |
+
"pass@k": {
|
| 9 |
+
"1": 68.8,
|
| 10 |
+
"2": 80.8,
|
| 11 |
+
"4": 87.5
|
| 12 |
+
},
|
| 13 |
+
"time_use_in_second": 134.2269902229309,
|
| 14 |
+
"time_use_in_minite": "2:14"
|
| 15 |
+
}
|