Spaces:

garvitsachdeva
/

SpindleFlow-RL

Runtime error

App Files Files Community

garvitsachdeva commited on 22 days ago

Commit

a5c7dd0

1 Parent(s): e8360fd

fix: eliminate API leaks in training loop, set 30k steps for A100

Browse files

Files changed (3) hide show

app.py +16 -2
configs/training_config.yaml +1 -1
env/spindleflow_env.py +10 -0

app.py CHANGED Viewed

@@ -147,6 +147,20 @@ def _training_thread():
         TieredRewardScorer._get_openai_client = lambda self: None
         _log("TieredRewardScorer → Tier-1 only (LLM judge disabled for speed) ✓")
         # ── Smoke test ──────────────────────────────────────
         _log("Running smoke test...")
         env = SpindleFlowEnv(
@@ -349,7 +363,7 @@ def _training_thread():
         _log(f"Training on : {model.device}")
         _log(f"Curriculum  : Phase {curriculum.current_phase} — {curriculum.progress_str()}")
-        total_steps = int(cfg.get("training", {}).get("total_timesteps", 500_000))
         _log(f"Total steps : {total_steps:,}")
         _log("Training started...\n")
         _write_status("training")
@@ -366,7 +380,7 @@ def _training_thread():
         )
         periodic_push  = PeriodicHubPush(
             api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
-            vec_env=vec_env, reward_logger_ref=reward_logger, push_every=10_000,
         )
         model.learn(

         TieredRewardScorer._get_openai_client = lambda self: None
         _log("TieredRewardScorer → Tier-1 only (LLM judge disabled for speed) ✓")
+        # ── Patch generalist baseline → static (0 API calls per episode) ─────
+        from env.spindleflow_env import SpindleFlowEnv as _SFEnv
+        _STATIC_BASELINE = (
+            "General problem-solving approach:\n"
+            "1. Gather and clarify requirements\n"
+            "2. Research common solution patterns\n"
+            "3. Draft a high-level architecture\n"
+            "4. Implement in small, testable increments\n"
+            "5. Validate against acceptance criteria and deploy\n"
+            "No specialist domain expertise applied."
+        )
+        _SFEnv._generate_generalist_baseline = lambda self, task: _STATIC_BASELINE
+        _log("Generalist baseline → static simulation (0 API calls per episode) ✓")
         # ── Smoke test ──────────────────────────────────────
         _log("Running smoke test...")
         env = SpindleFlowEnv(
         _log(f"Training on : {model.device}")
         _log(f"Curriculum  : Phase {curriculum.current_phase} — {curriculum.progress_str()}")
+        total_steps = 30_000  # ~45 min on A100 with simulation, produces clean reward curve
         _log(f"Total steps : {total_steps:,}")
         _log("Training started...\n")
         _write_status("training")
         )
         periodic_push  = PeriodicHubPush(
             api=api, hf_repo=HF_REPO, hf_token=HF_TOKEN,
+            vec_env=vec_env, reward_logger_ref=reward_logger, push_every=5_000,
         )
         model.learn(

configs/training_config.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 training:
   seed: 42
-  total_timesteps: 100000
   n_envs: 1
   device: "auto"  # "cuda" if available, else "cpu"

 training:
   seed: 42
+  total_timesteps: 30000
   n_envs: 1
   device: "auto"  # "cuda" if available, else "cpu"

env/spindleflow_env.py CHANGED Viewed

@@ -1183,6 +1183,16 @@ class SpindleFlowEnv(gym.Env):
         Falls back to a simulated template when no key is available.
         """
         import os
         api_key = os.getenv("OPENAI_API_KEY")
         if api_key:
             try:

         Falls back to a simulated template when no key is available.
         """
         import os
+        if getattr(self, "simulate_specialists", False) or not os.getenv("OPENAI_API_KEY"):
+            return (
+                "General problem-solving approach:\n"
+                "1. Gather and clarify requirements\n"
+                "2. Research common solution patterns\n"
+                "3. Draft a high-level architecture\n"
+                "4. Implement in small, testable increments\n"
+                "5. Validate against acceptance criteria and deploy\n"
+                "No specialist domain expertise applied."
+            )
         api_key = os.getenv("OPENAI_API_KEY")
         if api_key:
             try: