zz1358m commited on
Commit
a58dbfa
·
verified ·
1 Parent(s): dc2835f

Upload 5 files

Browse files
SofT-GRPO-deepscaler-8k-dir.sh CHANGED
@@ -2,8 +2,8 @@
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
- data.train_files=********************************/Baseline/GRPO/deepscaler/data/train.parquet \
6
- data.val_files=[********************************/Baseline/GRPO/deepscaler/data/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
 
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
+ data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
6
+ data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
SofT-GRPO-deepscaler-8k-gau.sh CHANGED
@@ -2,8 +2,8 @@
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1 HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
- data.train_files=********************************/Baseline/GRPO/deepscaler/data/train.parquet \
6
- data.val_files=[********************************/Baseline/GRPO/deepscaler/data/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
 
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1 HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
+ data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
6
+ data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
SofT-GRPO-deepscaler-8k-llama3.sh CHANGED
@@ -2,8 +2,8 @@
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
- data.train_files=********************************/Baseline/GRPO/deepscaler/data/train.parquet \
6
- data.val_files=[********************************/Baseline/GRPO/deepscaler/data/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
 
2
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
3
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
4
  algorithm.adv_estimator=grpo \
5
+ data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
6
+ data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
7
  data.train_batch_size=64 \
8
  data.val_batch_size=128 \
9
  data.max_prompt_length=1024 \
SofT-GRPO-deepscaler-8k-qwen7.sh CHANGED
@@ -1,8 +1,8 @@
1
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
2
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
3
  algorithm.adv_estimator=grpo \
4
- data.train_files=********************************/Baseline/GRPO/deepscaler/data/train.parquet \
5
- data.val_files=[********************************/Baseline/GRPO/deepscaler/data/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
6
  data.train_batch_size=64 \
7
  data.val_batch_size=128 \
8
  data.max_prompt_length=1024 \
 
1
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
2
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
3
  algorithm.adv_estimator=grpo \
4
+ data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
5
+ data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
6
  data.train_batch_size=64 \
7
  data.val_batch_size=128 \
8
  data.max_prompt_length=1024 \
SofT-GRPO-deepscaler-8k.sh CHANGED
@@ -1,8 +1,8 @@
1
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
2
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
3
  algorithm.adv_estimator=grpo \
4
- data.train_files=********************************/Baseline/GRPO/deepscaler/data/train.parquet \
5
- data.val_files=[********************************/Baseline/GRPO/deepscaler/data/aime.parquet,********************************/Baseline/GRPO/deepscaler/data/amc.parquet,********************************/Baseline/GRPO/deepscaler/data/math.parquet] \
6
  data.train_batch_size=64 \
7
  data.val_batch_size=128 \
8
  data.max_prompt_length=1024 \
 
1
  export SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=True
2
  PYTHONUNBUFFERED=1 CUDA_LAUNCH_BLOCKING=1HYDRA_FULL_ERROR=1 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m verl.trainer.main_ppo \
3
  algorithm.adv_estimator=grpo \
4
+ data.train_files=********************************/Soft-Thinking+noise+loss-main/datasets/train.parquet \
5
+ data.val_files=[********************************/Soft-Thinking+noise+loss-main/datasets/aime.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/amc.parquet,********************************/Soft-Thinking+noise+loss-main/datasets/math.parquet] \
6
  data.train_batch_size=64 \
7
  data.val_batch_size=128 \
8
  data.max_prompt_length=1024 \