Upload 5 files
Browse files
SofT-GRPO-deepscaler-8k-dir.sh
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
-
data.train_files=********************************/
|
| 6 |
-
data.val_files=[********************************/
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
|
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
+
data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
|
| 6 |
+
data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
SofT-GRPO-deepscaler-8k-gau.sh
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1 HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
-
data.train_files=********************************/
|
| 6 |
-
data.val_files=[********************************/
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
|
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1 HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
+
data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
|
| 6 |
+
data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
SofT-GRPO-deepscaler-8k-llama3.sh
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
-
data.train_files=********************************/
|
| 6 |
-
data.val_files=[********************************/
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
|
|
|
| 2 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 3 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 4 |
algorithm.adv_estimator=grpo \
|
| 5 |
+
data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
|
| 6 |
+
data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
|
| 7 |
data.train_batch_size=64 \
|
| 8 |
data.val_batch_size=128 \
|
| 9 |
data.max_prompt_length=1024 \
|
SofT-GRPO-deepscaler-8k-qwen7.sh
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 2 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 3 |
algorithm.adv_estimator=grpo \
|
| 4 |
-
data.train_files=********************************/
|
| 5 |
-
data.val_files=[********************************/
|
| 6 |
data.train_batch_size=64 \
|
| 7 |
data.val_batch_size=128 \
|
| 8 |
data.max_prompt_length=1024 \
|
|
|
|
| 1 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 2 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 3 |
algorithm.adv_estimator=grpo \
|
| 4 |
+
data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
|
| 5 |
+
data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
|
| 6 |
data.train_batch_size=64 \
|
| 7 |
data.val_batch_size=128 \
|
| 8 |
data.max_prompt_length=1024 \
|
SofT-GRPO-deepscaler-8k.sh
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 2 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 3 |
algorithm.adv_estimator=grpo \
|
| 4 |
-
data.train_files=********************************/
|
| 5 |
-
data.val_files=[********************************/
|
| 6 |
data.train_batch_size=64 \
|
| 7 |
data.val_batch_size=128 \
|
| 8 |
data.max_prompt_length=1024 \
|
|
|
|
| 1 |
export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
|
| 2 |
PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
|
| 3 |
algorithm.adv_estimator=grpo \
|
| 4 |
+
data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
|
| 5 |
+
data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
|
| 6 |
data.train_batch_size=64 \
|
| 7 |
data.val_batch_size=128 \
|
| 8 |
data.max_prompt_length=1024 \
|