Spaces:
Running
Running
Fix run3 generation settings
Browse files
scripts/run_sft_grpo_training_job.py
CHANGED
|
@@ -48,7 +48,7 @@ SFT_EPOCHS = float(os.getenv("COUNSEL_SFT_EPOCHS", "1"))
|
|
| 48 |
SFT_LEARNING_RATE = float(os.getenv("COUNSEL_SFT_LEARNING_RATE", "1e-5"))
|
| 49 |
MAX_STEPS = int(os.getenv("COUNSEL_MAX_STEPS", "500"))
|
| 50 |
GRPO_DATASET_SIZE = int(os.getenv("COUNSEL_DATASET_SIZE", "320"))
|
| 51 |
-
NUM_GENERATIONS = int(os.getenv("COUNSEL_NUM_GENERATIONS", "
|
| 52 |
MAX_COMPLETION_LENGTH = int(os.getenv("COUNSEL_MAX_COMPLETION_LENGTH", "256"))
|
| 53 |
GRPO_LEARNING_RATE = float(os.getenv("COUNSEL_LEARNING_RATE", "3e-6"))
|
| 54 |
EVIDENCE_PRESSURE = float(os.getenv("COUNSEL_EVIDENCE_PRESSURE", "2.0"))
|
|
|
|
| 48 |
SFT_LEARNING_RATE = float(os.getenv("COUNSEL_SFT_LEARNING_RATE", "1e-5"))
|
| 49 |
MAX_STEPS = int(os.getenv("COUNSEL_MAX_STEPS", "500"))
|
| 50 |
GRPO_DATASET_SIZE = int(os.getenv("COUNSEL_DATASET_SIZE", "320"))
|
| 51 |
+
NUM_GENERATIONS = int(os.getenv("COUNSEL_NUM_GENERATIONS", "4"))
|
| 52 |
MAX_COMPLETION_LENGTH = int(os.getenv("COUNSEL_MAX_COMPLETION_LENGTH", "256"))
|
| 53 |
GRPO_LEARNING_RATE = float(os.getenv("COUNSEL_LEARNING_RATE", "3e-6"))
|
| 54 |
EVIDENCE_PRESSURE = float(os.getenv("COUNSEL_EVIDENCE_PRESSURE", "2.0"))
|