heavycoderhh commited on
Commit
2cc2ea0
·
verified ·
1 Parent(s): 49d3a8d

Fix run3 generation settings

Browse files
scripts/run_sft_grpo_training_job.py CHANGED
@@ -48,7 +48,7 @@ SFT_EPOCHS = float(os.getenv("COUNSEL_SFT_EPOCHS", "1"))
48
  SFT_LEARNING_RATE = float(os.getenv("COUNSEL_SFT_LEARNING_RATE", "1e-5"))
49
  MAX_STEPS = int(os.getenv("COUNSEL_MAX_STEPS", "500"))
50
  GRPO_DATASET_SIZE = int(os.getenv("COUNSEL_DATASET_SIZE", "320"))
51
- NUM_GENERATIONS = int(os.getenv("COUNSEL_NUM_GENERATIONS", "6"))
52
  MAX_COMPLETION_LENGTH = int(os.getenv("COUNSEL_MAX_COMPLETION_LENGTH", "256"))
53
  GRPO_LEARNING_RATE = float(os.getenv("COUNSEL_LEARNING_RATE", "3e-6"))
54
  EVIDENCE_PRESSURE = float(os.getenv("COUNSEL_EVIDENCE_PRESSURE", "2.0"))
 
48
  SFT_LEARNING_RATE = float(os.getenv("COUNSEL_SFT_LEARNING_RATE", "1e-5"))
49
  MAX_STEPS = int(os.getenv("COUNSEL_MAX_STEPS", "500"))
50
  GRPO_DATASET_SIZE = int(os.getenv("COUNSEL_DATASET_SIZE", "320"))
51
+ NUM_GENERATIONS = int(os.getenv("COUNSEL_NUM_GENERATIONS", "4"))
52
  MAX_COMPLETION_LENGTH = int(os.getenv("COUNSEL_MAX_COMPLETION_LENGTH", "256"))
53
  GRPO_LEARNING_RATE = float(os.getenv("COUNSEL_LEARNING_RATE", "3e-6"))
54
  EVIDENCE_PRESSURE = float(os.getenv("COUNSEL_EVIDENCE_PRESSURE", "2.0"))