bensondccnqwc commited on
Commit
bda1814
·
verified ·
1 Parent(s): e7cbf59

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +26 -0
  2. global_step_10/actor/huggingface/tokenizer.json +3 -0
  3. global_step_10/data.pt +3 -0
  4. global_step_100/actor/huggingface/tokenizer.json +3 -0
  5. global_step_100/data.pt +3 -0
  6. global_step_20/actor/huggingface/tokenizer.json +3 -0
  7. global_step_20/data.pt +3 -0
  8. global_step_30/actor/huggingface/tokenizer.json +3 -0
  9. global_step_30/data.pt +3 -0
  10. global_step_40/actor/huggingface/tokenizer.json +3 -0
  11. global_step_40/data.pt +3 -0
  12. global_step_50/actor/huggingface/tokenizer.json +3 -0
  13. global_step_50/data.pt +3 -0
  14. global_step_60/actor/huggingface/tokenizer.json +3 -0
  15. global_step_60/data.pt +3 -0
  16. global_step_70/actor/huggingface/tokenizer.json +3 -0
  17. global_step_70/data.pt +3 -0
  18. global_step_80/actor/huggingface/tokenizer.json +3 -0
  19. global_step_80/data.pt +3 -0
  20. global_step_90/actor/huggingface/tokenizer.json +3 -0
  21. global_step_90/data.pt +3 -0
  22. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png +3 -0
  23. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png +3 -0
  24. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png +3 -0
  25. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png +3 -0
  26. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png +3 -0
  27. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png +3 -0
  28. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png +3 -0
  29. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png +3 -0
  30. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png +3 -0
  31. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png +3 -0
  32. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  33. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  34. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  35. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  36. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  37. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  38. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  39. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  40. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  41. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  42. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  43. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  44. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  45. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  46. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  47. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  48. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
  49. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  50. reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json +15 -0
.gitattributes CHANGED
@@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ global_step_30/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ global_step_40/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ global_step_50/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ global_step_60/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ global_step_70/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ global_step_90/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
47
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
48
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
49
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
50
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
51
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
52
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png filter=lfs diff=lfs merge=lfs -text
53
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
54
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
55
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
56
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_keywords.png filter=lfs diff=lfs merge=lfs -text
57
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_pass_acc.png filter=lfs diff=lfs merge=lfs -text
58
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_acc_tokens.png filter=lfs diff=lfs merge=lfs -text
59
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_avg_stop_tokens.png filter=lfs diff=lfs merge=lfs -text
60
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_clip_ratio.png filter=lfs diff=lfs merge=lfs -text
61
+ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_box_ratio_and_token_length.png filter=lfs diff=lfs merge=lfs -text
global_step_10/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_10/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7c26916a0b5bf53242298f37baf627a9b0ed4e16fabfd78a877ab3cb35472c
3
+ size 1947
global_step_100/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_100/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f3eaab16577c8b15e0e26c7e76ea89028ecdefaee47b9d4ad925c5a562a622
3
+ size 1947
global_step_20/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_20/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ce7c51037d5f729b2a767511d5573796bed9a99711ff83eadb00f227957707
3
+ size 1947
global_step_30/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_30/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd0fee6dcd750e5c3b43665d8e8da560439ee4d1ed1ac4f0ced642981e02ca1
3
+ size 1947
global_step_40/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_40/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd793991b9fe00986d0c1d3593ad31fac549dd780b01d5f6d9becd1a8161e69
3
+ size 1947
global_step_50/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_50/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36aed214995f2c4f55495ab6ac7b5ec006bfed7a645a3f139c885d98ba2ed60
3
+ size 1947
global_step_60/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_60/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386f4e1ce32469eb9649ae72e1114453155232915883c775bad93899892df882
3
+ size 1947
global_step_70/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_70/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70670c9ecd962978294e131e8948e9bed7a8e05fd302cd1058ff4e962c7f189
3
+ size 1947
global_step_80/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_80/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da91f2b0ee491fc6c032b29eb667535f053724d53bd035fe62753317a43ae00
3
+ size 1947
global_step_90/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
global_step_90/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4609719c846c982e5c9b89f6f8b8702de5c4ded573a84e527f87e33c095bdbc7
3
+ size 1947
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_keywords.png ADDED

Git LFS Details

  • SHA256: 869b7199b66cb682b0d1307617794927be6b272ec6a8185da112deb1667f33b3
  • Pointer size: 131 Bytes
  • Size of remote file: 418 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_pass_acc.png ADDED

Git LFS Details

  • SHA256: 16ff86a7da1e6a2cde216a8159c3b3b68df85a53a3d428656c8a1fc559d9b90f
  • Pointer size: 131 Bytes
  • Size of remote file: 302 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_acc_tokens.png ADDED

Git LFS Details

  • SHA256: d9c3e2966f4c71bb0b6176e28083977a297054fb5dda7c9f20e1d17c383a38be
  • Pointer size: 131 Bytes
  • Size of remote file: 431 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_avg_stop_tokens.png ADDED

Git LFS Details

  • SHA256: 02907aff5685e33b3dc99936933cc29ccbc805d810eba74b36b8f2d1efb57d2e
  • Pointer size: 131 Bytes
  • Size of remote file: 462 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_box_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: 5be548f30aea1bec76bc3df37e4234d1fd1bbed15576a7786c2329b00ae1ff6c
  • Pointer size: 131 Bytes
  • Size of remote file: 448 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_clip_ratio.png ADDED

Git LFS Details

  • SHA256: ad0e6606a2d2ec3275ee4c5e45d46e04671593fea6acf0dfc6d0c94937f88a57
  • Pointer size: 131 Bytes
  • Size of remote file: 411 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_correct_tokens.png ADDED

Git LFS Details

  • SHA256: 499beb04f3462210166caec3f519ad2abc35f39170cdb27036f13dfae557160e
  • Pointer size: 131 Bytes
  • Size of remote file: 420 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_repeat_ratio_and_token_length.png ADDED

Git LFS Details

  • SHA256: fc6047f88412b6958bffdc19fed506affabd9a9e55119fd5e0bc963cb3e8f912
  • Pointer size: 131 Bytes
  • Size of remote file: 473 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_tokens_keywords.png ADDED

Git LFS Details

  • SHA256: bf387a9155421ee88ed1b4a630afb60e9f329f01df4d70a58f6fc936cdcfc168
  • Pointer size: 131 Bytes
  • Size of remote file: 446 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results/plots/eval_results_wrong_tokens.png ADDED

Git LFS Details

  • SHA256: f3039230d66700f047cc17937d56f7e8d66e2a19e1815e7abe30033abe12269c
  • Pointer size: 131 Bytes
  • Size of remote file: 442 kB
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_60/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 160,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 67.5,
7
+ "pass_acc": 85.0,
8
+ "pass@k": {
9
+ "1": 67.5,
10
+ "2": 76.7,
11
+ "4": 85.0
12
+ },
13
+ "time_use_in_second": 162.94680261611938,
14
+ "time_use_in_minite": "2:42"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 27.5,
7
+ "pass_acc": 36.7,
8
+ "pass@k": {
9
+ "1": 27.5,
10
+ "2": 32.2,
11
+ "4": 36.7
12
+ },
13
+ "time_use_in_second": 419.3475561141968,
14
+ "time_use_in_minite": "6:59"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 19.2,
7
+ "pass_acc": 30.0,
8
+ "pass@k": {
9
+ "1": 19.2,
10
+ "2": 25.6,
11
+ "4": 30.0
12
+ },
13
+ "time_use_in_second": 375.0433328151703,
14
+ "time_use_in_minite": "6:15"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_70/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 160,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 64.4,
7
+ "pass_acc": 80.0,
8
+ "pass@k": {
9
+ "1": 64.4,
10
+ "2": 74.2,
11
+ "4": 80.0
12
+ },
13
+ "time_use_in_second": 195.60499334335327,
14
+ "time_use_in_minite": "3:15"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 28.3,
7
+ "pass_acc": 46.7,
8
+ "pass@k": {
9
+ "1": 28.3,
10
+ "2": 36.1,
11
+ "4": 46.7
12
+ },
13
+ "time_use_in_second": 549.4302027225494,
14
+ "time_use_in_minite": "9:09"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 22.5,
7
+ "pass_acc": 36.7,
8
+ "pass@k": {
9
+ "1": 22.5,
10
+ "2": 28.9,
11
+ "4": 36.7
12
+ },
13
+ "time_use_in_second": 381.60382318496704,
14
+ "time_use_in_minite": "6:21"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_80/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 160,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 66.9,
7
+ "pass_acc": 80.0,
8
+ "pass@k": {
9
+ "1": 66.9,
10
+ "2": 73.8,
11
+ "4": 80.0
12
+ },
13
+ "time_use_in_second": 233.23579001426697,
14
+ "time_use_in_minite": "3:53"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime24/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 25.0,
7
+ "pass_acc": 43.3,
8
+ "pass@k": {
9
+ "1": 25.0,
10
+ "2": 32.8,
11
+ "4": 43.3
12
+ },
13
+ "time_use_in_second": 509.6931164264679,
14
+ "time_use_in_minite": "8:29"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/aime25/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 120,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 24.2,
7
+ "pass_acc": 33.3,
8
+ "pass@k": {
9
+ "1": 24.2,
10
+ "2": 28.9,
11
+ "4": 33.3
12
+ },
13
+ "time_use_in_second": 440.93800044059753,
14
+ "time_use_in_minite": "7:20"
15
+ }
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/global_step_90/amc23/test_qwen-boxed_-1_seed0_t1.0_s0_e-1_metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 160,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 68.8,
7
+ "pass_acc": 87.5,
8
+ "pass@k": {
9
+ "1": 68.8,
10
+ "2": 80.8,
11
+ "4": 87.5
12
+ },
13
+ "time_use_in_second": 134.2269902229309,
14
+ "time_use_in_minite": "2:14"
15
+ }