garvitsachdeva commited on
Commit
a5c7dd0
Β·
1 Parent(s): e8360fd

fix: eliminate API leaks in training loop, set 30k steps for A100

Browse files
Files changed (3) hide show
  1. app.py +16 -2
  2. configs/training_config.yaml +1 -1
  3. env/spindleflow_env.py +10 -0
app.py CHANGED
@@ -147,6 +147,20 @@ def _training_thread():
147
  TieredRewardScorer._get_openai_client = lambda self: None
148
  _log("TieredRewardScorer β†’ Tier-1 only (LLM judge disabled for speed) βœ“")
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  # ── Smoke test ──────────────────────────────────────
151
  _log("Running smoke test...")
152
  env = SpindleFlowEnv(
@@ -349,7 +363,7 @@ def _training_thread():
349
 
350
  _log(f"Training on : {model.device}")
351
  _log(f"Curriculum : Phase {curriculum.current_phase} β€” {curriculum.progress_str()}")
352
- total_steps = int(cfg.get("training", {}).get("total_timesteps", 500_000))
353
  _log(f"Total steps : {total_steps:,}")
354
  _log("Training started...\n")
355
  _write_status("training")
@@ -366,7 +380,7 @@ def _training_thread():
366
  )
367
  periodic_push = PeriodicHubPush(
368
  api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
369
- vec_env=vec_env, reward_logger_ref=reward_logger, push_every=10_000,
370
  )
371
 
372
  model.learn(
 
147
  TieredRewardScorer._get_openai_client = lambda self: None
148
  _log("TieredRewardScorer β†’ Tier-1 only (LLM judge disabled for speed) βœ“")
149
 
150
+ # ── Patch generalist baseline β†’ static (0 API calls per episode) ─────
151
+ from env.spindleflow_env import SpindleFlowEnv as _SFEnv
152
+ _STATIC_BASELINE = (
153
+ "General problem-solving approach:\n"
154
+ "1. Gather and clarify requirements\n"
155
+ "2. Research common solution patterns\n"
156
+ "3. Draft a high-level architecture\n"
157
+ "4. Implement in small, testable increments\n"
158
+ "5. Validate against acceptance criteria and deploy\n"
159
+ "No specialist domain expertise applied."
160
+ )
161
+ _SFEnv._generate_generalist_baseline = lambda self, task: _STATIC_BASELINE
162
+ _log("Generalist baseline β†’ static simulation (0 API calls per episode) βœ“")
163
+
164
  # ── Smoke test ──────────────────────────────────────
165
  _log("Running smoke test...")
166
  env = SpindleFlowEnv(
 
363
 
364
  _log(f"Training on : {model.device}")
365
  _log(f"Curriculum : Phase {curriculum.current_phase} β€” {curriculum.progress_str()}")
366
+ total_steps = 30_000 # ~45 min on A100 with simulation, produces clean reward curve
367
  _log(f"Total steps : {total_steps:,}")
368
  _log("Training started...\n")
369
  _write_status("training")
 
380
  )
381
  periodic_push = PeriodicHubPush(
382
  api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
383
+ vec_env=vec_env, reward_logger_ref=reward_logger, push_every=5_000,
384
  )
385
 
386
  model.learn(
configs/training_config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  training:
2
  seed: 42
3
- total_timesteps: 100000
4
  n_envs: 1
5
  device: "auto" # "cuda" if available, else "cpu"
6
 
 
1
  training:
2
  seed: 42
3
+ total_timesteps: 30000
4
  n_envs: 1
5
  device: "auto" # "cuda" if available, else "cpu"
6
 
env/spindleflow_env.py CHANGED
@@ -1183,6 +1183,16 @@ class SpindleFlowEnv(gym.Env):
1183
  Falls back to a simulated template when no key is available.
1184
  """
1185
  import os
 
 
 
 
 
 
 
 
 
 
1186
  api_key = os.getenv("OPENAI_API_KEY")
1187
  if api_key:
1188
  try:
 
1183
  Falls back to a simulated template when no key is available.
1184
  """
1185
  import os
1186
+ if getattr(self, "simulate_specialists", False) or not os.getenv("OPENAI_API_KEY"):
1187
+ return (
1188
+ "General problem-solving approach:\n"
1189
+ "1. Gather and clarify requirements\n"
1190
+ "2. Research common solution patterns\n"
1191
+ "3. Draft a high-level architecture\n"
1192
+ "4. Implement in small, testable increments\n"
1193
+ "5. Validate against acceptance criteria and deploy\n"
1194
+ "No specialist domain expertise applied."
1195
+ )
1196
  api_key = os.getenv("OPENAI_API_KEY")
1197
  if api_key:
1198
  try: