Spaces:
Sleeping
Sleeping
natnael kahssay Claude Sonnet 4.6 commited on
Commit ·
7ba71bb
1
Parent(s): bd0f19a
fix: correct model name to unsloth/gpt-oss-20b (no -instruct suffix)
Browse files- training/train.py +2 -2
training/train.py
CHANGED
|
@@ -27,7 +27,7 @@ wandb.init(
|
|
| 27 |
project = "moa-rl-grpo",
|
| 28 |
mode = "online" if os.environ.get("WANDB_API_KEY") else "disabled",
|
| 29 |
config = {
|
| 30 |
-
"model": os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b
|
| 31 |
"env_url": os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run"),
|
| 32 |
"max_steps": 300,
|
| 33 |
"num_generations": 4,
|
|
@@ -36,7 +36,7 @@ wandb.init(
|
|
| 36 |
)
|
| 37 |
|
| 38 |
ENV_URL = os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run")
|
| 39 |
-
MODEL_NAME = os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b
|
| 40 |
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/output/moa-rl-grpo")
|
| 41 |
TIMEOUT = 120
|
| 42 |
MAX_STEPS = 8 # tool calls per episode
|
|
|
|
| 27 |
project = "moa-rl-grpo",
|
| 28 |
mode = "online" if os.environ.get("WANDB_API_KEY") else "disabled",
|
| 29 |
config = {
|
| 30 |
+
"model": os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b"),
|
| 31 |
"env_url": os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run"),
|
| 32 |
"max_steps": 300,
|
| 33 |
"num_generations": 4,
|
|
|
|
| 36 |
)
|
| 37 |
|
| 38 |
ENV_URL = os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run")
|
| 39 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b")
|
| 40 |
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/output/moa-rl-grpo")
|
| 41 |
TIMEOUT = 120
|
| 42 |
MAX_STEPS = 8 # tool calls per episode
|