Spaces:
Runtime error
Runtime error
Commit Β·
a5c7dd0
1
Parent(s): e8360fd
fix: eliminate API leaks in training loop, set 30k steps for A100
Browse files- app.py +16 -2
- configs/training_config.yaml +1 -1
- env/spindleflow_env.py +10 -0
app.py
CHANGED
|
@@ -147,6 +147,20 @@ def _training_thread():
|
|
| 147 |
TieredRewardScorer._get_openai_client = lambda self: None
|
| 148 |
_log("TieredRewardScorer β Tier-1 only (LLM judge disabled for speed) β")
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
# ββ Smoke test ββββββββββββββββββββββββββββββββββββββ
|
| 151 |
_log("Running smoke test...")
|
| 152 |
env = SpindleFlowEnv(
|
|
@@ -349,7 +363,7 @@ def _training_thread():
|
|
| 349 |
|
| 350 |
_log(f"Training on : {model.device}")
|
| 351 |
_log(f"Curriculum : Phase {curriculum.current_phase} β {curriculum.progress_str()}")
|
| 352 |
-
total_steps =
|
| 353 |
_log(f"Total steps : {total_steps:,}")
|
| 354 |
_log("Training started...\n")
|
| 355 |
_write_status("training")
|
|
@@ -366,7 +380,7 @@ def _training_thread():
|
|
| 366 |
)
|
| 367 |
periodic_push = PeriodicHubPush(
|
| 368 |
api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
|
| 369 |
-
vec_env=vec_env, reward_logger_ref=reward_logger, push_every=
|
| 370 |
)
|
| 371 |
|
| 372 |
model.learn(
|
|
|
|
| 147 |
TieredRewardScorer._get_openai_client = lambda self: None
|
| 148 |
_log("TieredRewardScorer β Tier-1 only (LLM judge disabled for speed) β")
|
| 149 |
|
| 150 |
+
# ββ Patch generalist baseline β static (0 API calls per episode) βββββ
|
| 151 |
+
from env.spindleflow_env import SpindleFlowEnv as _SFEnv
|
| 152 |
+
_STATIC_BASELINE = (
|
| 153 |
+
"General problem-solving approach:\n"
|
| 154 |
+
"1. Gather and clarify requirements\n"
|
| 155 |
+
"2. Research common solution patterns\n"
|
| 156 |
+
"3. Draft a high-level architecture\n"
|
| 157 |
+
"4. Implement in small, testable increments\n"
|
| 158 |
+
"5. Validate against acceptance criteria and deploy\n"
|
| 159 |
+
"No specialist domain expertise applied."
|
| 160 |
+
)
|
| 161 |
+
_SFEnv._generate_generalist_baseline = lambda self, task: _STATIC_BASELINE
|
| 162 |
+
_log("Generalist baseline β static simulation (0 API calls per episode) β")
|
| 163 |
+
|
| 164 |
# ββ Smoke test ββββββββββββββββββββββββββββββββββββββ
|
| 165 |
_log("Running smoke test...")
|
| 166 |
env = SpindleFlowEnv(
|
|
|
|
| 363 |
|
| 364 |
_log(f"Training on : {model.device}")
|
| 365 |
_log(f"Curriculum : Phase {curriculum.current_phase} β {curriculum.progress_str()}")
|
| 366 |
+
total_steps = 30_000 # ~45 min on A100 with simulation, produces clean reward curve
|
| 367 |
_log(f"Total steps : {total_steps:,}")
|
| 368 |
_log("Training started...\n")
|
| 369 |
_write_status("training")
|
|
|
|
| 380 |
)
|
| 381 |
periodic_push = PeriodicHubPush(
|
| 382 |
api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
|
| 383 |
+
vec_env=vec_env, reward_logger_ref=reward_logger, push_every=5_000,
|
| 384 |
)
|
| 385 |
|
| 386 |
model.learn(
|
configs/training_config.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
training:
|
| 2 |
seed: 42
|
| 3 |
-
total_timesteps:
|
| 4 |
n_envs: 1
|
| 5 |
device: "auto" # "cuda" if available, else "cpu"
|
| 6 |
|
|
|
|
| 1 |
training:
|
| 2 |
seed: 42
|
| 3 |
+
total_timesteps: 30000
|
| 4 |
n_envs: 1
|
| 5 |
device: "auto" # "cuda" if available, else "cpu"
|
| 6 |
|
env/spindleflow_env.py
CHANGED
|
@@ -1183,6 +1183,16 @@ class SpindleFlowEnv(gym.Env):
|
|
| 1183 |
Falls back to a simulated template when no key is available.
|
| 1184 |
"""
|
| 1185 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
api_key = os.getenv("OPENAI_API_KEY")
|
| 1187 |
if api_key:
|
| 1188 |
try:
|
|
|
|
| 1183 |
Falls back to a simulated template when no key is available.
|
| 1184 |
"""
|
| 1185 |
import os
|
| 1186 |
+
if getattr(self, "simulate_specialists", False) or not os.getenv("OPENAI_API_KEY"):
|
| 1187 |
+
return (
|
| 1188 |
+
"General problem-solving approach:\n"
|
| 1189 |
+
"1. Gather and clarify requirements\n"
|
| 1190 |
+
"2. Research common solution patterns\n"
|
| 1191 |
+
"3. Draft a high-level architecture\n"
|
| 1192 |
+
"4. Implement in small, testable increments\n"
|
| 1193 |
+
"5. Validate against acceptance criteria and deploy\n"
|
| 1194 |
+
"No specialist domain expertise applied."
|
| 1195 |
+
)
|
| 1196 |
api_key = os.getenv("OPENAI_API_KEY")
|
| 1197 |
if api_key:
|
| 1198 |
try:
|