natnael kahssay Claude Sonnet 4.6 commited on
Commit
7ba71bb
·
1 Parent(s): bd0f19a

fix: correct model name to unsloth/gpt-oss-20b (no -instruct suffix)

Browse files
Files changed (1) hide show
  1. training/train.py +2 -2
training/train.py CHANGED
@@ -27,7 +27,7 @@ wandb.init(
27
  project = "moa-rl-grpo",
28
  mode = "online" if os.environ.get("WANDB_API_KEY") else "disabled",
29
  config = {
30
- "model": os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b-instruct"),
31
  "env_url": os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run"),
32
  "max_steps": 300,
33
  "num_generations": 4,
@@ -36,7 +36,7 @@ wandb.init(
36
  )
37
 
38
  ENV_URL = os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run")
39
- MODEL_NAME = os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b-instruct")
40
  OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/output/moa-rl-grpo")
41
  TIMEOUT = 120
42
  MAX_STEPS = 8 # tool calls per episode
 
27
  project = "moa-rl-grpo",
28
  mode = "online" if os.environ.get("WANDB_API_KEY") else "disabled",
29
  config = {
30
+ "model": os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b"),
31
  "env_url": os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run"),
32
  "max_steps": 300,
33
  "num_generations": 4,
 
36
  )
37
 
38
  ENV_URL = os.environ.get("ENV_URL", "https://http--moa-rl-env--7b2fgcxb6gxp.code.run")
39
+ MODEL_NAME = os.environ.get("MODEL_NAME", "unsloth/gpt-oss-20b")
40
  OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "/output/moa-rl-grpo")
41
  TIMEOUT = 120
42
  MAX_STEPS = 8 # tool calls per episode