abedk commited on
Commit
78604aa
·
verified ·
1 Parent(s): 7f81d7b

Update train6.py

Browse files
Files changed (1) hide show
  1. train6.py +13 -8
train6.py CHANGED
@@ -19,7 +19,7 @@ from openai import AsyncOpenAI
19
  MODEL_NAME = "55mvresearch/Qwen2.5-7B-Instruct-SFT-FT1-Merged"
20
  DATASET_NAME = "55mvresearch/sft-v1-singleturn-ads-creativity"
21
  OUTPUT_DIR = "./grpo_output"
22
- OUTPUT_REPO = "55mvresearch/Qwen2.5-7B-Instruct-GRPO-Emotion7"
23
 
24
  # Environment tokens
25
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
@@ -741,22 +741,24 @@ training_args = GRPOConfig(
741
  output_dir=OUTPUT_DIR,
742
 
743
  # Optimizer settings
744
- learning_rate=2e-6,
745
  adam_beta1=0.9,
746
  adam_beta2=0.99,
747
  weight_decay=0.0,
748
- warmup_ratio=0.03,
749
  lr_scheduler_type='cosine',
750
- max_grad_norm=0.5,
 
 
751
 
752
  # Generation settings
753
- num_generations=8, # Number of completions per prompt
754
- max_completion_length=320,
755
 
756
  # Training settings
757
  per_device_train_batch_size=8, # Must be divisible by num_generations
758
  gradient_accumulation_steps=4,
759
- num_train_epochs=3,
760
 
761
  # Logging
762
  logging_steps=10,
@@ -778,7 +780,10 @@ peft_config = LoraConfig(
778
  r=32,
779
  lora_alpha=64,
780
  lora_dropout=0.05,
781
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
 
 
 
782
  bias="none",
783
  task_type="CAUSAL_LM",
784
  )
 
19
  MODEL_NAME = "55mvresearch/Qwen2.5-7B-Instruct-SFT-FT1-Merged"
20
  DATASET_NAME = "55mvresearch/sft-v1-singleturn-ads-creativity"
21
  OUTPUT_DIR = "./grpo_output"
22
+ OUTPUT_REPO = "55mvresearch/Qwen2.5-7B-Instruct-GRPO-Emotion8"
23
 
24
  # Environment tokens
25
  HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 
741
  output_dir=OUTPUT_DIR,
742
 
743
  # Optimizer settings
744
+ learning_rate=3e-6,
745
  adam_beta1=0.9,
746
  adam_beta2=0.99,
747
  weight_decay=0.0,
748
+ warmup_ratio=0.05,
749
  lr_scheduler_type='cosine',
750
+ max_grad_norm=0.2,
751
+ beta = 0.02,
752
+
753
 
754
  # Generation settings
755
+ num_generations=16, # Number of completions per prompt
756
+ max_completion_length=512,
757
 
758
  # Training settings
759
  per_device_train_batch_size=8, # Must be divisible by num_generations
760
  gradient_accumulation_steps=4,
761
+ num_train_epochs=2,
762
 
763
  # Logging
764
  logging_steps=10,
 
780
  r=32,
781
  lora_alpha=64,
782
  lora_dropout=0.05,
783
+ target_modules=[
784
+ "q_proj","k_proj","v_proj","o_proj",
785
+ "up_proj","down_proj","gate_proj"
786
+ ],
787
  bias="none",
788
  task_type="CAUSAL_LM",
789
  )