Update train6.py
Browse files
train6.py
CHANGED
|
@@ -19,7 +19,7 @@ from openai import AsyncOpenAI
|
|
| 19 |
MODEL_NAME = "55mvresearch/Qwen2.5-7B-Instruct-SFT-FT1-Merged"
|
| 20 |
DATASET_NAME = "55mvresearch/sft-v1-singleturn-ads-creativity"
|
| 21 |
OUTPUT_DIR = "./grpo_output"
|
| 22 |
-
OUTPUT_REPO = "55mvresearch/Qwen2.5-7B-Instruct-GRPO-
|
| 23 |
|
| 24 |
# Environment tokens
|
| 25 |
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
|
|
@@ -741,22 +741,24 @@ training_args = GRPOConfig(
|
|
| 741 |
output_dir=OUTPUT_DIR,
|
| 742 |
|
| 743 |
# Optimizer settings
|
| 744 |
-
learning_rate=
|
| 745 |
adam_beta1=0.9,
|
| 746 |
adam_beta2=0.99,
|
| 747 |
weight_decay=0.0,
|
| 748 |
-
warmup_ratio=0.
|
| 749 |
lr_scheduler_type='cosine',
|
| 750 |
-
max_grad_norm=0.
|
|
|
|
|
|
|
| 751 |
|
| 752 |
# Generation settings
|
| 753 |
-
num_generations=
|
| 754 |
-
max_completion_length=
|
| 755 |
|
| 756 |
# Training settings
|
| 757 |
per_device_train_batch_size=8, # Must be divisible by num_generations
|
| 758 |
gradient_accumulation_steps=4,
|
| 759 |
-
num_train_epochs=
|
| 760 |
|
| 761 |
# Logging
|
| 762 |
logging_steps=10,
|
|
@@ -778,7 +780,10 @@ peft_config = LoraConfig(
|
|
| 778 |
r=32,
|
| 779 |
lora_alpha=64,
|
| 780 |
lora_dropout=0.05,
|
| 781 |
-
target_modules=[
|
|
|
|
|
|
|
|
|
|
| 782 |
bias="none",
|
| 783 |
task_type="CAUSAL_LM",
|
| 784 |
)
|
|
|
|
| 19 |
MODEL_NAME = "55mvresearch/Qwen2.5-7B-Instruct-SFT-FT1-Merged"
|
| 20 |
DATASET_NAME = "55mvresearch/sft-v1-singleturn-ads-creativity"
|
| 21 |
OUTPUT_DIR = "./grpo_output"
|
| 22 |
+
OUTPUT_REPO = "55mvresearch/Qwen2.5-7B-Instruct-GRPO-Emotion8"
|
| 23 |
|
| 24 |
# Environment tokens
|
| 25 |
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
|
|
|
|
| 741 |
output_dir=OUTPUT_DIR,
|
| 742 |
|
| 743 |
# Optimizer settings
|
| 744 |
+
learning_rate=3e-6,
|
| 745 |
adam_beta1=0.9,
|
| 746 |
adam_beta2=0.99,
|
| 747 |
weight_decay=0.0,
|
| 748 |
+
warmup_ratio=0.05,
|
| 749 |
lr_scheduler_type='cosine',
|
| 750 |
+
max_grad_norm=0.2,
|
| 751 |
+
beta = 0.02,
|
| 752 |
+
|
| 753 |
|
| 754 |
# Generation settings
|
| 755 |
+
num_generations=16, # Number of completions per prompt
|
| 756 |
+
max_completion_length=512,
|
| 757 |
|
| 758 |
# Training settings
|
| 759 |
per_device_train_batch_size=8, # Must be divisible by num_generations
|
| 760 |
gradient_accumulation_steps=4,
|
| 761 |
+
num_train_epochs=2,
|
| 762 |
|
| 763 |
# Logging
|
| 764 |
logging_steps=10,
|
|
|
|
| 780 |
r=32,
|
| 781 |
lora_alpha=64,
|
| 782 |
lora_dropout=0.05,
|
| 783 |
+
target_modules=[
|
| 784 |
+
"q_proj","k_proj","v_proj","o_proj",
|
| 785 |
+
"up_proj","down_proj","gate_proj"
|
| 786 |
+
],
|
| 787 |
bias="none",
|
| 788 |
task_type="CAUSAL_LM",
|
| 789 |
)
|