Add files using upload-large-folder tool
Browse files- .gitattributes +2 -0
- global_step_90/actor/huggingface/model.safetensors +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00001-of-00005.safetensors +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00002-of-00005.safetensors +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00003-of-00005.safetensors +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00005-of-00005.safetensors +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_100/actor/huggingface/tokenizer.json +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_20/actor/huggingface/tokenizer.json +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_20/data.pt +3 -0
- reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_30/data.pt +3 -0
.gitattributes
CHANGED
|
@@ -64,3 +64,5 @@ reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response409
|
|
| 64 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 64 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_wrong_tokens.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/eval_results_avg4/plots/eval_results_avg4_tokens_keywords.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_10/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_100/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_20/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
global_step_90/actor/huggingface/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32b1e8b4a4e1ec5fc335e52f5b84642db7d4ae397203385ee224da8073c32823
|
| 3 |
+
size 4063515640
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00001-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9983f1b9ef2f60e7c3730d9bc11ada914e6ec630639b5b020a89bd158cd0446b
|
| 3 |
+
size 3996250744
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00002-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9aa12339835bf7a093d3d0b0a0d2d77f8538301cfc7f8e7ec7a585858ebf7a1f
|
| 3 |
+
size 3993160032
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00003-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7dd5a191c8da555def0714550375b3c0117751add408fcdda0c2a79ac0186bc
|
| 3 |
+
size 3959604768
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_0/actor/huggingface/model-00005-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbf24915d47ea030bb68ab0b9488f4515a907185baa6dc26837c9c3f2326a550
|
| 3 |
+
size 1244659840
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_100/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_20/actor/huggingface/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_20/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9bdf362aff01ad7b93bc63313ec5786fe5a30d7f70b5b845b9d824c957a7be1
|
| 3 |
+
size 1947
|
reinforce_pp_deepmath_train_sample_6144_context_4k_Qwen3-8B-Base_max_response4096_batch1024_ppomini256_rollout8_vllm/global_step_30/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24a6147ae9c59fb95eaca2c041603aa399dddfe3bf9ef94e44b8fb0c114c9ee1
|
| 3 |
+
size 1947
|