Spaces:
Runtime error
Runtime error
Commit Β·
3bce6e3
1
Parent(s): 21923ce
Bulletproof Cell 1: check .git dir, rm partial clone, correct REPO path
Browse files- colab/train_colab.py +82 -63
colab/train_colab.py
CHANGED
|
@@ -1,27 +1,27 @@
|
|
| 1 |
# ============================================================
|
| 2 |
# SpindleFlow RL β Colab Training Script
|
| 3 |
#
|
| 4 |
-
#
|
| 5 |
-
# Runtime β Change runtime type β T4 GPU
|
| 6 |
-
#
|
| 7 |
-
#
|
| 8 |
-
#
|
| 9 |
-
#
|
| 10 |
-
#
|
| 11 |
-
#
|
| 12 |
-
#
|
| 13 |
-
# into a separate code cell, run top to bottom.
|
| 14 |
# ============================================================
|
| 15 |
|
| 16 |
|
| 17 |
# ============================================================
|
| 18 |
-
# CELL 1 β Install packages + clone repo
|
| 19 |
# ============================================================
|
| 20 |
import subprocess, os, sys
|
| 21 |
|
| 22 |
-
print(f"Python {sys.version}")
|
| 23 |
|
| 24 |
-
#
|
|
|
|
| 25 |
packages = [
|
| 26 |
"openenv", "stable-baselines3", "sb3-contrib", "gymnasium",
|
| 27 |
"sentence-transformers", "openai", "pyyaml", "trl",
|
|
@@ -30,25 +30,36 @@ packages = [
|
|
| 30 |
if sys.version_info >= (3, 13):
|
| 31 |
packages.append("audioop-lts")
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
if result.returncode != 0:
|
| 35 |
print(result.stdout[-3000:])
|
| 36 |
print(result.stderr[-3000:])
|
| 37 |
raise RuntimeError("pip install failed β see output above")
|
| 38 |
-
print("Packages OK")
|
| 39 |
|
|
|
|
|
|
|
|
|
|
| 40 |
REPO = "/content/kuchbhi"
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
print("Repo cloned")
|
| 46 |
else:
|
|
|
|
| 47 |
subprocess.run(["git", "pull"], cwd=REPO, check=True)
|
| 48 |
print("Repo updated")
|
| 49 |
|
|
|
|
| 50 |
os.chdir(REPO)
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
os.makedirs("/content/demo/assets", exist_ok=True)
|
| 53 |
os.makedirs("/content/data", exist_ok=True)
|
| 54 |
os.makedirs("/content/checkpoints", exist_ok=True)
|
|
@@ -57,7 +68,7 @@ os.makedirs("/content/logs", exist_ok=True)
|
|
| 57 |
import importlib.metadata
|
| 58 |
print(f"OpenEnv : {importlib.metadata.version('openenv')}")
|
| 59 |
print(f"CWD : {os.getcwd()}")
|
| 60 |
-
print("
|
| 61 |
|
| 62 |
|
| 63 |
# ============================================================
|
|
@@ -72,19 +83,25 @@ OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")
|
|
| 72 |
if not HF_TOKEN:
|
| 73 |
raise RuntimeError(
|
| 74 |
"HF_TOKEN missing.\n"
|
| 75 |
-
"Key icon
|
|
|
|
|
|
|
|
|
|
| 76 |
)
|
| 77 |
if not OPENAI_API_KEY:
|
| 78 |
raise RuntimeError(
|
| 79 |
"OPENAI_API_KEY missing.\n"
|
| 80 |
-
"Key icon
|
|
|
|
|
|
|
|
|
|
| 81 |
)
|
| 82 |
|
| 83 |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 84 |
|
| 85 |
print(f"HF_TOKEN : {HF_TOKEN[:8]}...{HF_TOKEN[-4:]}")
|
| 86 |
print(f"OPENAI_API_KEY : {OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:]}")
|
| 87 |
-
print("
|
| 88 |
|
| 89 |
|
| 90 |
# ============================================================
|
|
@@ -94,9 +111,8 @@ import os as _os
|
|
| 94 |
import numpy as np
|
| 95 |
from env.spindleflow_env import SpindleFlowEnv
|
| 96 |
|
| 97 |
-
# simulate_specialists
|
| 98 |
-
#
|
| 99 |
-
# and the finetuner that fires every 100 episodes.
|
| 100 |
if not getattr(SpindleFlowEnv, "_simulate_patched", False):
|
| 101 |
_orig_init = SpindleFlowEnv.__init__
|
| 102 |
|
|
@@ -112,7 +128,8 @@ if not getattr(SpindleFlowEnv, "_simulate_patched", False):
|
|
| 112 |
if getattr(self, "simulate_specialists", False):
|
| 113 |
_key = _os.environ.pop("OPENAI_API_KEY", None)
|
| 114 |
try:
|
| 115 |
-
return _orig_call(self, specialist_id, task,
|
|
|
|
| 116 |
finally:
|
| 117 |
if _key:
|
| 118 |
_os.environ["OPENAI_API_KEY"] = _key
|
|
@@ -137,7 +154,7 @@ _, reward, _, _, info2 = env.step(env.action_space.sample())
|
|
| 137 |
print(f"reward : {reward:.4f}")
|
| 138 |
print(f"action : {info2['action_name']}")
|
| 139 |
env.close()
|
| 140 |
-
print("
|
| 141 |
|
| 142 |
|
| 143 |
# ============================================================
|
|
@@ -153,23 +170,23 @@ if torch.cuda.is_available():
|
|
| 153 |
|
| 154 |
for _name in ("PPOConfig", "GRPOConfig", "SFTConfig"):
|
| 155 |
if getattr(trl, _name, None):
|
| 156 |
-
print(f"TRL config
|
| 157 |
break
|
| 158 |
else:
|
| 159 |
print("TRL imported (TrainingArguments-based version)")
|
| 160 |
|
| 161 |
-
print("
|
| 162 |
|
| 163 |
|
| 164 |
# ============================================================
|
| 165 |
# CELL 5 β Train RecurrentPPO (LSTM PPO)
|
| 166 |
#
|
| 167 |
-
# Per-step calls
|
| 168 |
-
# Task generation
|
| 169 |
-
# Finetuner
|
| 170 |
-
# Reward baseline
|
| 171 |
#
|
| 172 |
-
# Expected: ~20
|
| 173 |
# ============================================================
|
| 174 |
import time, yaml, torch, numpy as np
|
| 175 |
from sb3_contrib import RecurrentPPO
|
|
@@ -254,9 +271,9 @@ model = RecurrentPPO(
|
|
| 254 |
device="cuda" if torch.cuda.is_available() else "cpu",
|
| 255 |
)
|
| 256 |
|
| 257 |
-
_tlog(f"Device
|
| 258 |
-
_tlog(f"Timesteps
|
| 259 |
-
_tlog(f"Curriculum
|
| 260 |
_tlog("Training started...")
|
| 261 |
|
| 262 |
reward_logger = RewardLogger(curriculum)
|
|
@@ -279,9 +296,9 @@ model.save("/content/spindleflow_model")
|
|
| 279 |
vec_env.save("/content/vec_normalize.pkl")
|
| 280 |
|
| 281 |
_tlog(f"Done in {_elapsed/60:.1f} min")
|
| 282 |
-
_tlog(f"Episodes
|
| 283 |
_tlog(f"Curriculum final: {curriculum.progress_str()}")
|
| 284 |
-
print("
|
| 285 |
|
| 286 |
|
| 287 |
# ============================================================
|
|
@@ -293,11 +310,11 @@ import matplotlib.pyplot as plt
|
|
| 293 |
|
| 294 |
ep_rewards = reward_logger.episode_rewards
|
| 295 |
if not ep_rewards:
|
| 296 |
-
raise RuntimeError("No episodes
|
| 297 |
|
| 298 |
n_ep = len(ep_rewards)
|
| 299 |
episodes = list(range(n_ep))
|
| 300 |
-
window = max(30, n_ep // 20)
|
| 301 |
|
| 302 |
smoothed = [
|
| 303 |
float(np.mean(ep_rewards[max(0, i - window):i + 1]))
|
|
@@ -338,8 +355,8 @@ ax.set_title(
|
|
| 338 |
color="#f0f6fc", fontsize=13, fontweight="bold", pad=14,
|
| 339 |
)
|
| 340 |
ax.tick_params(colors="#8b949e")
|
| 341 |
-
for
|
| 342 |
-
|
| 343 |
ax.grid(color="#21262d", linewidth=0.8, alpha=0.9)
|
| 344 |
ax.legend(fontsize=10, framealpha=0.85,
|
| 345 |
facecolor="#161b22", edgecolor="#30363d", labelcolor="#c9d1d9")
|
|
@@ -354,11 +371,11 @@ fig.savefig("/content/reward_curve.png", dpi=180, bbox_inches="tight",
|
|
| 354 |
facecolor=fig.get_facecolor())
|
| 355 |
plt.show()
|
| 356 |
|
| 357 |
-
_tlog(f"Curve: early={early_mean:+.4f}
|
| 358 |
f"improvement={improvement:+.4f}")
|
| 359 |
-
print(f"
|
| 360 |
print(f"Improvement: {improvement:+.4f}")
|
| 361 |
-
print("
|
| 362 |
|
| 363 |
|
| 364 |
# ============================================================
|
|
@@ -367,11 +384,11 @@ print("CELL 6 done β reward curve saved")
|
|
| 367 |
import json
|
| 368 |
from pathlib import Path
|
| 369 |
|
| 370 |
-
print("="*52)
|
| 371 |
print("LEARNING FEATURES AUDIT")
|
| 372 |
-
print("="*52)
|
| 373 |
|
| 374 |
-
print(f"\nFeature 5 β Curriculum")
|
| 375 |
print(f" Phase : {curriculum.current_phase}/3")
|
| 376 |
print(f" Rolling mean : {curriculum.rolling_mean():.3f}")
|
| 377 |
print(f" {curriculum.progress_str()}")
|
|
@@ -382,12 +399,12 @@ print(f"\nFeature 2 β Specialist memory ({mem_path})")
|
|
| 382 |
if mem_path.exists():
|
| 383 |
data = json.loads(mem_path.read_text())
|
| 384 |
total = sum(len(v) for v in data.values())
|
| 385 |
-
print(f" {len(data)} specialists
|
| 386 |
for sid, entries in list(data.items())[:3]:
|
| 387 |
avg = sum(e["reward"] for e in entries) / len(entries)
|
| 388 |
print(f" {sid}: {len(entries)} entries, avg={avg:.3f}")
|
| 389 |
else:
|
| 390 |
-
print(" No file yet (finetuner fires after 100 episodes)")
|
| 391 |
|
| 392 |
spawn_path = Path(_cfg.get("environment", {}).get(
|
| 393 |
"spawn_memory_path", "data/spawn_memory.jsonl"))
|
|
@@ -395,6 +412,9 @@ print(f"\nFeature 3 β Spawn memory ({spawn_path})")
|
|
| 395 |
if spawn_path.exists():
|
| 396 |
lines = [l for l in spawn_path.read_text().splitlines() if l.strip()]
|
| 397 |
print(f" {len(lines)} spawn records")
|
|
|
|
|
|
|
|
|
|
| 398 |
else:
|
| 399 |
print(" No file yet")
|
| 400 |
|
|
@@ -407,8 +427,8 @@ if res_path.exists():
|
|
| 407 |
else:
|
| 408 |
print(" No file yet")
|
| 409 |
|
| 410 |
-
print("\n" + "="*52)
|
| 411 |
-
print("CELL 7 done")
|
| 412 |
|
| 413 |
|
| 414 |
# ============================================================
|
|
@@ -467,12 +487,12 @@ with open(readme_path, "w") as f:
|
|
| 467 |
f.write(readme)
|
| 468 |
|
| 469 |
candidates = [
|
| 470 |
-
("/content/spindleflow_model.zip",
|
| 471 |
-
("/content/vec_normalize.pkl",
|
| 472 |
-
("/content/reward_curve.png",
|
| 473 |
-
("/content/demo/assets/reward_curve.json",
|
| 474 |
-
("/content/logs/training_log.txt",
|
| 475 |
-
(readme_path,
|
| 476 |
]
|
| 477 |
|
| 478 |
ops = [
|
|
@@ -490,7 +510,6 @@ _tlog(f"Uploaded {len(ops)} files:")
|
|
| 490 |
for src, dst in candidates:
|
| 491 |
if os.path.exists(src):
|
| 492 |
_tlog(f" {dst}")
|
| 493 |
-
|
| 494 |
_tlog(f"Model live : https://huggingface.co/{HF_REPO}")
|
| 495 |
_tlog(f"Log : https://huggingface.co/{HF_REPO}/blob/main/training_log.txt")
|
| 496 |
-
print("
|
|
|
|
| 1 |
# ============================================================
|
| 2 |
# SpindleFlow RL β Colab Training Script
|
| 3 |
#
|
| 4 |
+
# BEFORE ANYTHING:
|
| 5 |
+
# 1. Runtime β Change runtime type β T4 GPU
|
| 6 |
+
# 2. Key icon (left sidebar) β Manage secrets β add:
|
| 7 |
+
# HF_TOKEN = hf_xxxx (write token: hf.co/settings/tokens)
|
| 8 |
+
# OPENAI_API_KEY = sk-xxxx
|
| 9 |
+
# Toggle "Notebook access" ON for both.
|
| 10 |
+
# 3. Create a new Colab notebook.
|
| 11 |
+
# 4. Copy each CELL block below into its own code cell.
|
| 12 |
+
# 5. Run cells top to bottom, one at a time.
|
|
|
|
| 13 |
# ============================================================
|
| 14 |
|
| 15 |
|
| 16 |
# ============================================================
|
| 17 |
+
# CELL 1 β Install packages + clone/update repo
|
| 18 |
# ============================================================
|
| 19 |
import subprocess, os, sys
|
| 20 |
|
| 21 |
+
print(f"Python {sys.version}\n")
|
| 22 |
|
| 23 |
+
# ββ Install packages βββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
# audioop-lts is only for Python 3.13+ (Colab uses 3.12)
|
| 25 |
packages = [
|
| 26 |
"openenv", "stable-baselines3", "sb3-contrib", "gymnasium",
|
| 27 |
"sentence-transformers", "openai", "pyyaml", "trl",
|
|
|
|
| 30 |
if sys.version_info >= (3, 13):
|
| 31 |
packages.append("audioop-lts")
|
| 32 |
|
| 33 |
+
print("Installing packages...")
|
| 34 |
+
result = subprocess.run(["pip", "install"] + packages,
|
| 35 |
+
capture_output=True, text=True)
|
| 36 |
if result.returncode != 0:
|
| 37 |
print(result.stdout[-3000:])
|
| 38 |
print(result.stderr[-3000:])
|
| 39 |
raise RuntimeError("pip install failed β see output above")
|
| 40 |
+
print("Packages OK\n")
|
| 41 |
|
| 42 |
+
# ββ Clone or update repo βββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
# The GitHub repo IS the spindleflow-rl project root.
|
| 44 |
+
# It clones to /content/kuchbhi/ β that IS the working directory.
|
| 45 |
REPO = "/content/kuchbhi"
|
| 46 |
+
GIT_URL = "https://github.com/garvitsachdevaa/kuchbhi.git"
|
| 47 |
+
|
| 48 |
+
if not os.path.isdir(os.path.join(REPO, ".git")):
|
| 49 |
+
# Not cloned yet β do a fresh clone
|
| 50 |
+
subprocess.run(["rm", "-rf", REPO]) # remove partial clone if any
|
| 51 |
+
subprocess.run(["git", "clone", GIT_URL], cwd="/content", check=True)
|
| 52 |
print("Repo cloned")
|
| 53 |
else:
|
| 54 |
+
# Already cloned β pull latest
|
| 55 |
subprocess.run(["git", "pull"], cwd=REPO, check=True)
|
| 56 |
print("Repo updated")
|
| 57 |
|
| 58 |
+
# ββ Set working directory ββββββββββββββββββββββββββββββββββββ
|
| 59 |
os.chdir(REPO)
|
| 60 |
+
if "." not in sys.path:
|
| 61 |
+
sys.path.insert(0, ".")
|
| 62 |
+
|
| 63 |
os.makedirs("/content/demo/assets", exist_ok=True)
|
| 64 |
os.makedirs("/content/data", exist_ok=True)
|
| 65 |
os.makedirs("/content/checkpoints", exist_ok=True)
|
|
|
|
| 68 |
import importlib.metadata
|
| 69 |
print(f"OpenEnv : {importlib.metadata.version('openenv')}")
|
| 70 |
print(f"CWD : {os.getcwd()}")
|
| 71 |
+
print("\nCELL 1 done β")
|
| 72 |
|
| 73 |
|
| 74 |
# ============================================================
|
|
|
|
| 83 |
if not HF_TOKEN:
|
| 84 |
raise RuntimeError(
|
| 85 |
"HF_TOKEN missing.\n"
|
| 86 |
+
"Key icon (left sidebar) β Add secret\n"
|
| 87 |
+
" Name: HF_TOKEN\n"
|
| 88 |
+
" Value: hf_xxxx (write token from hf.co/settings/tokens)\n"
|
| 89 |
+
" Toggle Notebook access ON"
|
| 90 |
)
|
| 91 |
if not OPENAI_API_KEY:
|
| 92 |
raise RuntimeError(
|
| 93 |
"OPENAI_API_KEY missing.\n"
|
| 94 |
+
"Key icon (left sidebar) β Add secret\n"
|
| 95 |
+
" Name: OPENAI_API_KEY\n"
|
| 96 |
+
" Value: sk-xxxx\n"
|
| 97 |
+
" Toggle Notebook access ON"
|
| 98 |
)
|
| 99 |
|
| 100 |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 101 |
|
| 102 |
print(f"HF_TOKEN : {HF_TOKEN[:8]}...{HF_TOKEN[-4:]}")
|
| 103 |
print(f"OPENAI_API_KEY : {OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:]}")
|
| 104 |
+
print("\nCELL 2 done β")
|
| 105 |
|
| 106 |
|
| 107 |
# ============================================================
|
|
|
|
| 111 |
import numpy as np
|
| 112 |
from env.spindleflow_env import SpindleFlowEnv
|
| 113 |
|
| 114 |
+
# Adds simulate_specialists kwarg so per-step calls stay local/fast.
|
| 115 |
+
# OPENAI_API_KEY is still active for task generation + finetuner.
|
|
|
|
| 116 |
if not getattr(SpindleFlowEnv, "_simulate_patched", False):
|
| 117 |
_orig_init = SpindleFlowEnv.__init__
|
| 118 |
|
|
|
|
| 128 |
if getattr(self, "simulate_specialists", False):
|
| 129 |
_key = _os.environ.pop("OPENAI_API_KEY", None)
|
| 130 |
try:
|
| 131 |
+
return _orig_call(self, specialist_id, task,
|
| 132 |
+
elapsed_ms, context=context)
|
| 133 |
finally:
|
| 134 |
if _key:
|
| 135 |
_os.environ["OPENAI_API_KEY"] = _key
|
|
|
|
| 154 |
print(f"reward : {reward:.4f}")
|
| 155 |
print(f"action : {info2['action_name']}")
|
| 156 |
env.close()
|
| 157 |
+
print("\nCELL 3 done β β environment OK")
|
| 158 |
|
| 159 |
|
| 160 |
# ============================================================
|
|
|
|
| 170 |
|
| 171 |
for _name in ("PPOConfig", "GRPOConfig", "SFTConfig"):
|
| 172 |
if getattr(trl, _name, None):
|
| 173 |
+
print(f"TRL config: {_name}")
|
| 174 |
break
|
| 175 |
else:
|
| 176 |
print("TRL imported (TrainingArguments-based version)")
|
| 177 |
|
| 178 |
+
print("\nCELL 4 done β β TRL requirement satisfied")
|
| 179 |
|
| 180 |
|
| 181 |
# ============================================================
|
| 182 |
# CELL 5 β Train RecurrentPPO (LSTM PPO)
|
| 183 |
#
|
| 184 |
+
# Per-step specialist calls : local simulation (no API cost/latency)
|
| 185 |
+
# Task generation : GPT-4o-mini via OPENAI_API_KEY
|
| 186 |
+
# Finetuner : fires every 100 episodes
|
| 187 |
+
# Reward baseline : GPT-4o-mini via OPENAI_API_KEY
|
| 188 |
#
|
| 189 |
+
# Expected runtime: ~20β25 min on T4 for 100k steps (~10k episodes)
|
| 190 |
# ============================================================
|
| 191 |
import time, yaml, torch, numpy as np
|
| 192 |
from sb3_contrib import RecurrentPPO
|
|
|
|
| 271 |
device="cuda" if torch.cuda.is_available() else "cpu",
|
| 272 |
)
|
| 273 |
|
| 274 |
+
_tlog(f"Device : {model.device}")
|
| 275 |
+
_tlog(f"Timesteps : {TOTAL_TIMESTEPS:,}")
|
| 276 |
+
_tlog(f"Curriculum : Phase {curriculum.current_phase} β {curriculum.progress_str()}")
|
| 277 |
_tlog("Training started...")
|
| 278 |
|
| 279 |
reward_logger = RewardLogger(curriculum)
|
|
|
|
| 296 |
vec_env.save("/content/vec_normalize.pkl")
|
| 297 |
|
| 298 |
_tlog(f"Done in {_elapsed/60:.1f} min")
|
| 299 |
+
_tlog(f"Episodes : {len(reward_logger.episode_rewards)}")
|
| 300 |
_tlog(f"Curriculum final: {curriculum.progress_str()}")
|
| 301 |
+
print("\nCELL 5 done β β model saved")
|
| 302 |
|
| 303 |
|
| 304 |
# ============================================================
|
|
|
|
| 310 |
|
| 311 |
ep_rewards = reward_logger.episode_rewards
|
| 312 |
if not ep_rewards:
|
| 313 |
+
raise RuntimeError("No episodes recorded β check Cell 5 output for errors")
|
| 314 |
|
| 315 |
n_ep = len(ep_rewards)
|
| 316 |
episodes = list(range(n_ep))
|
| 317 |
+
window = max(30, n_ep // 20) # adaptive: ~5% of run
|
| 318 |
|
| 319 |
smoothed = [
|
| 320 |
float(np.mean(ep_rewards[max(0, i - window):i + 1]))
|
|
|
|
| 355 |
color="#f0f6fc", fontsize=13, fontweight="bold", pad=14,
|
| 356 |
)
|
| 357 |
ax.tick_params(colors="#8b949e")
|
| 358 |
+
for sp in ax.spines.values():
|
| 359 |
+
sp.set_edgecolor("#30363d")
|
| 360 |
ax.grid(color="#21262d", linewidth=0.8, alpha=0.9)
|
| 361 |
ax.legend(fontsize=10, framealpha=0.85,
|
| 362 |
facecolor="#161b22", edgecolor="#30363d", labelcolor="#c9d1d9")
|
|
|
|
| 371 |
facecolor=fig.get_facecolor())
|
| 372 |
plt.show()
|
| 373 |
|
| 374 |
+
_tlog(f"Curve: early={early_mean:+.4f} final={final_mean:+.4f} "
|
| 375 |
f"improvement={improvement:+.4f}")
|
| 376 |
+
print(f"Episodes : {n_ep:,}")
|
| 377 |
print(f"Improvement: {improvement:+.4f}")
|
| 378 |
+
print("\nCELL 6 done β β reward curve saved")
|
| 379 |
|
| 380 |
|
| 381 |
# ============================================================
|
|
|
|
| 384 |
import json
|
| 385 |
from pathlib import Path
|
| 386 |
|
| 387 |
+
print("=" * 52)
|
| 388 |
print("LEARNING FEATURES AUDIT")
|
| 389 |
+
print("=" * 52)
|
| 390 |
|
| 391 |
+
print(f"\nFeature 5 β Curriculum (performance-gated)")
|
| 392 |
print(f" Phase : {curriculum.current_phase}/3")
|
| 393 |
print(f" Rolling mean : {curriculum.rolling_mean():.3f}")
|
| 394 |
print(f" {curriculum.progress_str()}")
|
|
|
|
| 399 |
if mem_path.exists():
|
| 400 |
data = json.loads(mem_path.read_text())
|
| 401 |
total = sum(len(v) for v in data.values())
|
| 402 |
+
print(f" {len(data)} specialists Β· {total} total entries")
|
| 403 |
for sid, entries in list(data.items())[:3]:
|
| 404 |
avg = sum(e["reward"] for e in entries) / len(entries)
|
| 405 |
print(f" {sid}: {len(entries)} entries, avg={avg:.3f}")
|
| 406 |
else:
|
| 407 |
+
print(" No file yet (finetuner fires after 100 completed episodes)")
|
| 408 |
|
| 409 |
spawn_path = Path(_cfg.get("environment", {}).get(
|
| 410 |
"spawn_memory_path", "data/spawn_memory.jsonl"))
|
|
|
|
| 412 |
if spawn_path.exists():
|
| 413 |
lines = [l for l in spawn_path.read_text().splitlines() if l.strip()]
|
| 414 |
print(f" {len(lines)} spawn records")
|
| 415 |
+
for line in lines[:2]:
|
| 416 |
+
rec = json.loads(line)
|
| 417 |
+
print(f" {rec['specialist_role']} | reward={rec['episode_reward']:.3f}")
|
| 418 |
else:
|
| 419 |
print(" No file yet")
|
| 420 |
|
|
|
|
| 427 |
else:
|
| 428 |
print(" No file yet")
|
| 429 |
|
| 430 |
+
print("\n" + "=" * 52)
|
| 431 |
+
print("CELL 7 done β")
|
| 432 |
|
| 433 |
|
| 434 |
# ============================================================
|
|
|
|
| 487 |
f.write(readme)
|
| 488 |
|
| 489 |
candidates = [
|
| 490 |
+
("/content/spindleflow_model.zip", "spindleflow_model.zip"),
|
| 491 |
+
("/content/vec_normalize.pkl", "vec_normalize.pkl"),
|
| 492 |
+
("/content/reward_curve.png", "reward_curve.png"),
|
| 493 |
+
("/content/demo/assets/reward_curve.json", "reward_curve.json"),
|
| 494 |
+
("/content/logs/training_log.txt", "training_log.txt"),
|
| 495 |
+
(readme_path, "README.md"),
|
| 496 |
]
|
| 497 |
|
| 498 |
ops = [
|
|
|
|
| 510 |
for src, dst in candidates:
|
| 511 |
if os.path.exists(src):
|
| 512 |
_tlog(f" {dst}")
|
|
|
|
| 513 |
_tlog(f"Model live : https://huggingface.co/{HF_REPO}")
|
| 514 |
_tlog(f"Log : https://huggingface.co/{HF_REPO}/blob/main/training_log.txt")
|
| 515 |
+
print("\nCELL 8 done β β all done!")
|