ligaments-dev commited on
Commit
d21a432
·
verified ·
1 Parent(s): 5f0d2a1

Upload grpo_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. grpo_training.py +1 -5
grpo_training.py CHANGED
@@ -36,7 +36,6 @@ config = GRPOConfig(
36
  per_device_eval_batch_size=1,
37
  gradient_accumulation_steps=8, # Effective batch size = 8
38
  learning_rate=1e-6,
39
- max_length=1024,
40
 
41
  # Evaluation and logging
42
  eval_strategy="steps",
@@ -59,10 +58,6 @@ config = GRPOConfig(
59
  report_to="trackio",
60
  run_name="llama3.2-1b-sec-grpo-training",
61
  project="ligaments-sec-alignment",
62
-
63
- # GRPO specific parameters
64
- kl_penalty="kl", # KL penalty for policy regularization
65
- temperature=0.7,
66
  )
67
 
68
  # Initialize GRPO trainer
@@ -71,6 +66,7 @@ trainer = GRPOTrainer(
71
  tokenizer=tokenizer,
72
  train_dataset=train_dataset,
73
  eval_dataset=eval_dataset,
 
74
  peft_config=LoraConfig(
75
  r=16,
76
  lora_alpha=32,
 
36
  per_device_eval_batch_size=1,
37
  gradient_accumulation_steps=8, # Effective batch size = 8
38
  learning_rate=1e-6,
 
39
 
40
  # Evaluation and logging
41
  eval_strategy="steps",
 
58
  report_to="trackio",
59
  run_name="llama3.2-1b-sec-grpo-training",
60
  project="ligaments-sec-alignment",
 
 
 
 
61
  )
62
 
63
  # Initialize GRPO trainer
 
66
  tokenizer=tokenizer,
67
  train_dataset=train_dataset,
68
  eval_dataset=eval_dataset,
69
+ max_length=1024, # Sequence length control
70
  peft_config=LoraConfig(
71
  r=16,
72
  lora_alpha=32,