garvitsachdeva Claude Sonnet 4.6 commited on
Commit
3bce6e3
Β·
1 Parent(s): 21923ce

Bulletproof Cell 1: check .git dir, rm partial clone, correct REPO path

Browse files
Files changed (1) hide show
  1. colab/train_colab.py +82 -63
colab/train_colab.py CHANGED
@@ -1,27 +1,27 @@
1
  # ============================================================
2
  # SpindleFlow RL β€” Colab Training Script
3
  #
4
- # STEP 0 β€” Before running anything:
5
- # Runtime β†’ Change runtime type β†’ T4 GPU
6
- #
7
- # STEP 1 β€” Add secrets (key icon in left sidebar):
8
- # HF_TOKEN = hf_xxxx (write token from hf.co/settings/tokens)
9
- # OPENAI_API_KEY = sk-xxxx (needed for task generation + finetuner)
10
- # Toggle "Notebook access" ON for both.
11
- #
12
- # STEP 2 β€” Create a new notebook, paste each CELL block below
13
- # into a separate code cell, run top to bottom.
14
  # ============================================================
15
 
16
 
17
  # ============================================================
18
- # CELL 1 β€” Install packages + clone repo
19
  # ============================================================
20
  import subprocess, os, sys
21
 
22
- print(f"Python {sys.version}")
23
 
24
- # audioop-lts is for Python 3.13+ only β€” Colab runs 3.12
 
25
  packages = [
26
  "openenv", "stable-baselines3", "sb3-contrib", "gymnasium",
27
  "sentence-transformers", "openai", "pyyaml", "trl",
@@ -30,25 +30,36 @@ packages = [
30
  if sys.version_info >= (3, 13):
31
  packages.append("audioop-lts")
32
 
33
- result = subprocess.run(["pip", "install"] + packages, capture_output=True, text=True)
 
 
34
  if result.returncode != 0:
35
  print(result.stdout[-3000:])
36
  print(result.stderr[-3000:])
37
  raise RuntimeError("pip install failed β€” see output above")
38
- print("Packages OK")
39
 
 
 
 
40
  REPO = "/content/kuchbhi"
41
- if not os.path.isdir(REPO):
42
- subprocess.run(["git", "clone",
43
- "https://github.com/garvitsachdevaa/kuchbhi.git"],
44
- cwd="/content", check=True)
 
 
45
  print("Repo cloned")
46
  else:
 
47
  subprocess.run(["git", "pull"], cwd=REPO, check=True)
48
  print("Repo updated")
49
 
 
50
  os.chdir(REPO)
51
- sys.path.insert(0, ".")
 
 
52
  os.makedirs("/content/demo/assets", exist_ok=True)
53
  os.makedirs("/content/data", exist_ok=True)
54
  os.makedirs("/content/checkpoints", exist_ok=True)
@@ -57,7 +68,7 @@ os.makedirs("/content/logs", exist_ok=True)
57
  import importlib.metadata
58
  print(f"OpenEnv : {importlib.metadata.version('openenv')}")
59
  print(f"CWD : {os.getcwd()}")
60
- print("CELL 1 done")
61
 
62
 
63
  # ============================================================
@@ -72,19 +83,25 @@ OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")
72
  if not HF_TOKEN:
73
  raise RuntimeError(
74
  "HF_TOKEN missing.\n"
75
- "Key icon β†’ Add secret β†’ Name: HF_TOKEN, Value: hf_xxxx, enable notebook access."
 
 
 
76
  )
77
  if not OPENAI_API_KEY:
78
  raise RuntimeError(
79
  "OPENAI_API_KEY missing.\n"
80
- "Key icon β†’ Add secret β†’ Name: OPENAI_API_KEY, Value: sk-xxxx, enable notebook access."
 
 
 
81
  )
82
 
83
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
84
 
85
  print(f"HF_TOKEN : {HF_TOKEN[:8]}...{HF_TOKEN[-4:]}")
86
  print(f"OPENAI_API_KEY : {OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:]}")
87
- print("CELL 2 done")
88
 
89
 
90
  # ============================================================
@@ -94,9 +111,8 @@ import os as _os
94
  import numpy as np
95
  from env.spindleflow_env import SpindleFlowEnv
96
 
97
- # simulate_specialists=True β†’ per-step specialist calls use local simulation
98
- # (fast, no API cost per step). OPENAI_API_KEY still used for task generation
99
- # and the finetuner that fires every 100 episodes.
100
  if not getattr(SpindleFlowEnv, "_simulate_patched", False):
101
  _orig_init = SpindleFlowEnv.__init__
102
 
@@ -112,7 +128,8 @@ if not getattr(SpindleFlowEnv, "_simulate_patched", False):
112
  if getattr(self, "simulate_specialists", False):
113
  _key = _os.environ.pop("OPENAI_API_KEY", None)
114
  try:
115
- return _orig_call(self, specialist_id, task, elapsed_ms, context=context)
 
116
  finally:
117
  if _key:
118
  _os.environ["OPENAI_API_KEY"] = _key
@@ -137,7 +154,7 @@ _, reward, _, _, info2 = env.step(env.action_space.sample())
137
  print(f"reward : {reward:.4f}")
138
  print(f"action : {info2['action_name']}")
139
  env.close()
140
- print("CELL 3 done β€” environment OK")
141
 
142
 
143
  # ============================================================
@@ -153,23 +170,23 @@ if torch.cuda.is_available():
153
 
154
  for _name in ("PPOConfig", "GRPOConfig", "SFTConfig"):
155
  if getattr(trl, _name, None):
156
- print(f"TRL config class: {_name}")
157
  break
158
  else:
159
  print("TRL imported (TrainingArguments-based version)")
160
 
161
- print("CELL 4 done β€” TRL requirement satisfied")
162
 
163
 
164
  # ============================================================
165
  # CELL 5 β€” Train RecurrentPPO (LSTM PPO)
166
  #
167
- # Per-step calls : local simulation (~0.001 s/step, no API cost)
168
- # Task generation : GPT-4o-mini via OPENAI_API_KEY (diverse tasks)
169
- # Finetuner : fires every 100 episodes via OPENAI_API_KEY
170
- # Reward baseline : GPT-4o-mini via OPENAI_API_KEY (quality signal)
171
  #
172
- # Expected: ~20-25 min on T4 GPU for 100k steps / ~10k episodes
173
  # ============================================================
174
  import time, yaml, torch, numpy as np
175
  from sb3_contrib import RecurrentPPO
@@ -254,9 +271,9 @@ model = RecurrentPPO(
254
  device="cuda" if torch.cuda.is_available() else "cpu",
255
  )
256
 
257
- _tlog(f"Device : {model.device}")
258
- _tlog(f"Timesteps : {TOTAL_TIMESTEPS:,}")
259
- _tlog(f"Curriculum : Phase {curriculum.current_phase} β€” {curriculum.progress_str()}")
260
  _tlog("Training started...")
261
 
262
  reward_logger = RewardLogger(curriculum)
@@ -279,9 +296,9 @@ model.save("/content/spindleflow_model")
279
  vec_env.save("/content/vec_normalize.pkl")
280
 
281
  _tlog(f"Done in {_elapsed/60:.1f} min")
282
- _tlog(f"Episodes : {len(reward_logger.episode_rewards)}")
283
  _tlog(f"Curriculum final: {curriculum.progress_str()}")
284
- print("CELL 5 done β€” model saved")
285
 
286
 
287
  # ============================================================
@@ -293,11 +310,11 @@ import matplotlib.pyplot as plt
293
 
294
  ep_rewards = reward_logger.episode_rewards
295
  if not ep_rewards:
296
- raise RuntimeError("No episodes completed β€” recheck Cell 5")
297
 
298
  n_ep = len(ep_rewards)
299
  episodes = list(range(n_ep))
300
- window = max(30, n_ep // 20)
301
 
302
  smoothed = [
303
  float(np.mean(ep_rewards[max(0, i - window):i + 1]))
@@ -338,8 +355,8 @@ ax.set_title(
338
  color="#f0f6fc", fontsize=13, fontweight="bold", pad=14,
339
  )
340
  ax.tick_params(colors="#8b949e")
341
- for s in ax.spines.values():
342
- s.set_edgecolor("#30363d")
343
  ax.grid(color="#21262d", linewidth=0.8, alpha=0.9)
344
  ax.legend(fontsize=10, framealpha=0.85,
345
  facecolor="#161b22", edgecolor="#30363d", labelcolor="#c9d1d9")
@@ -354,11 +371,11 @@ fig.savefig("/content/reward_curve.png", dpi=180, bbox_inches="tight",
354
  facecolor=fig.get_facecolor())
355
  plt.show()
356
 
357
- _tlog(f"Curve: early={early_mean:+.4f} final={final_mean:+.4f} "
358
  f"improvement={improvement:+.4f}")
359
- print(f"\nEpisodes : {n_ep:,}")
360
  print(f"Improvement: {improvement:+.4f}")
361
- print("CELL 6 done β€” reward curve saved")
362
 
363
 
364
  # ============================================================
@@ -367,11 +384,11 @@ print("CELL 6 done β€” reward curve saved")
367
  import json
368
  from pathlib import Path
369
 
370
- print("="*52)
371
  print("LEARNING FEATURES AUDIT")
372
- print("="*52)
373
 
374
- print(f"\nFeature 5 β€” Curriculum")
375
  print(f" Phase : {curriculum.current_phase}/3")
376
  print(f" Rolling mean : {curriculum.rolling_mean():.3f}")
377
  print(f" {curriculum.progress_str()}")
@@ -382,12 +399,12 @@ print(f"\nFeature 2 β€” Specialist memory ({mem_path})")
382
  if mem_path.exists():
383
  data = json.loads(mem_path.read_text())
384
  total = sum(len(v) for v in data.values())
385
- print(f" {len(data)} specialists, {total} total entries")
386
  for sid, entries in list(data.items())[:3]:
387
  avg = sum(e["reward"] for e in entries) / len(entries)
388
  print(f" {sid}: {len(entries)} entries, avg={avg:.3f}")
389
  else:
390
- print(" No file yet (finetuner fires after 100 episodes)")
391
 
392
  spawn_path = Path(_cfg.get("environment", {}).get(
393
  "spawn_memory_path", "data/spawn_memory.jsonl"))
@@ -395,6 +412,9 @@ print(f"\nFeature 3 β€” Spawn memory ({spawn_path})")
395
  if spawn_path.exists():
396
  lines = [l for l in spawn_path.read_text().splitlines() if l.strip()]
397
  print(f" {len(lines)} spawn records")
 
 
 
398
  else:
399
  print(" No file yet")
400
 
@@ -407,8 +427,8 @@ if res_path.exists():
407
  else:
408
  print(" No file yet")
409
 
410
- print("\n" + "="*52)
411
- print("CELL 7 done")
412
 
413
 
414
  # ============================================================
@@ -467,12 +487,12 @@ with open(readme_path, "w") as f:
467
  f.write(readme)
468
 
469
  candidates = [
470
- ("/content/spindleflow_model.zip", "spindleflow_model.zip"),
471
- ("/content/vec_normalize.pkl", "vec_normalize.pkl"),
472
- ("/content/reward_curve.png", "reward_curve.png"),
473
- ("/content/demo/assets/reward_curve.json", "reward_curve.json"),
474
- ("/content/logs/training_log.txt", "training_log.txt"),
475
- (readme_path, "README.md"),
476
  ]
477
 
478
  ops = [
@@ -490,7 +510,6 @@ _tlog(f"Uploaded {len(ops)} files:")
490
  for src, dst in candidates:
491
  if os.path.exists(src):
492
  _tlog(f" {dst}")
493
-
494
  _tlog(f"Model live : https://huggingface.co/{HF_REPO}")
495
  _tlog(f"Log : https://huggingface.co/{HF_REPO}/blob/main/training_log.txt")
496
- print("CELL 8 done β€” all done!")
 
1
  # ============================================================
2
  # SpindleFlow RL β€” Colab Training Script
3
  #
4
+ # BEFORE ANYTHING:
5
+ # 1. Runtime β†’ Change runtime type β†’ T4 GPU
6
+ # 2. Key icon (left sidebar) β†’ Manage secrets β†’ add:
7
+ # HF_TOKEN = hf_xxxx (write token: hf.co/settings/tokens)
8
+ # OPENAI_API_KEY = sk-xxxx
9
+ # Toggle "Notebook access" ON for both.
10
+ # 3. Create a new Colab notebook.
11
+ # 4. Copy each CELL block below into its own code cell.
12
+ # 5. Run cells top to bottom, one at a time.
 
13
  # ============================================================
14
 
15
 
16
  # ============================================================
17
+ # CELL 1 β€” Install packages + clone/update repo
18
  # ============================================================
19
  import subprocess, os, sys
20
 
21
+ print(f"Python {sys.version}\n")
22
 
23
+ # ── Install packages ─────────────────────────────────────────
24
+ # audioop-lts is only for Python 3.13+ (Colab uses 3.12)
25
  packages = [
26
  "openenv", "stable-baselines3", "sb3-contrib", "gymnasium",
27
  "sentence-transformers", "openai", "pyyaml", "trl",
 
30
  if sys.version_info >= (3, 13):
31
  packages.append("audioop-lts")
32
 
33
+ print("Installing packages...")
34
+ result = subprocess.run(["pip", "install"] + packages,
35
+ capture_output=True, text=True)
36
  if result.returncode != 0:
37
  print(result.stdout[-3000:])
38
  print(result.stderr[-3000:])
39
  raise RuntimeError("pip install failed β€” see output above")
40
+ print("Packages OK\n")
41
 
42
+ # ── Clone or update repo ─────────────────────────────────────
43
+ # The GitHub repo IS the spindleflow-rl project root.
44
+ # It clones to /content/kuchbhi/ β€” that IS the working directory.
45
  REPO = "/content/kuchbhi"
46
+ GIT_URL = "https://github.com/garvitsachdevaa/kuchbhi.git"
47
+
48
+ if not os.path.isdir(os.path.join(REPO, ".git")):
49
+ # Not cloned yet β€” do a fresh clone
50
+ subprocess.run(["rm", "-rf", REPO]) # remove partial clone if any
51
+ subprocess.run(["git", "clone", GIT_URL], cwd="/content", check=True)
52
  print("Repo cloned")
53
  else:
54
+ # Already cloned β€” pull latest
55
  subprocess.run(["git", "pull"], cwd=REPO, check=True)
56
  print("Repo updated")
57
 
58
+ # ── Set working directory ────────────────────────────────────
59
  os.chdir(REPO)
60
+ if "." not in sys.path:
61
+ sys.path.insert(0, ".")
62
+
63
  os.makedirs("/content/demo/assets", exist_ok=True)
64
  os.makedirs("/content/data", exist_ok=True)
65
  os.makedirs("/content/checkpoints", exist_ok=True)
 
68
  import importlib.metadata
69
  print(f"OpenEnv : {importlib.metadata.version('openenv')}")
70
  print(f"CWD : {os.getcwd()}")
71
+ print("\nCELL 1 done βœ“")
72
 
73
 
74
  # ============================================================
 
83
  if not HF_TOKEN:
84
  raise RuntimeError(
85
  "HF_TOKEN missing.\n"
86
+ "Key icon (left sidebar) β†’ Add secret\n"
87
+ " Name: HF_TOKEN\n"
88
+ " Value: hf_xxxx (write token from hf.co/settings/tokens)\n"
89
+ " Toggle Notebook access ON"
90
  )
91
  if not OPENAI_API_KEY:
92
  raise RuntimeError(
93
  "OPENAI_API_KEY missing.\n"
94
+ "Key icon (left sidebar) β†’ Add secret\n"
95
+ " Name: OPENAI_API_KEY\n"
96
+ " Value: sk-xxxx\n"
97
+ " Toggle Notebook access ON"
98
  )
99
 
100
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
101
 
102
  print(f"HF_TOKEN : {HF_TOKEN[:8]}...{HF_TOKEN[-4:]}")
103
  print(f"OPENAI_API_KEY : {OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:]}")
104
+ print("\nCELL 2 done βœ“")
105
 
106
 
107
  # ============================================================
 
111
  import numpy as np
112
  from env.spindleflow_env import SpindleFlowEnv
113
 
114
+ # Adds simulate_specialists kwarg so per-step calls stay local/fast.
115
+ # OPENAI_API_KEY is still active for task generation + finetuner.
 
116
  if not getattr(SpindleFlowEnv, "_simulate_patched", False):
117
  _orig_init = SpindleFlowEnv.__init__
118
 
 
128
  if getattr(self, "simulate_specialists", False):
129
  _key = _os.environ.pop("OPENAI_API_KEY", None)
130
  try:
131
+ return _orig_call(self, specialist_id, task,
132
+ elapsed_ms, context=context)
133
  finally:
134
  if _key:
135
  _os.environ["OPENAI_API_KEY"] = _key
 
154
  print(f"reward : {reward:.4f}")
155
  print(f"action : {info2['action_name']}")
156
  env.close()
157
+ print("\nCELL 3 done βœ“ β€” environment OK")
158
 
159
 
160
  # ============================================================
 
170
 
171
  for _name in ("PPOConfig", "GRPOConfig", "SFTConfig"):
172
  if getattr(trl, _name, None):
173
+ print(f"TRL config: {_name}")
174
  break
175
  else:
176
  print("TRL imported (TrainingArguments-based version)")
177
 
178
+ print("\nCELL 4 done βœ“ β€” TRL requirement satisfied")
179
 
180
 
181
  # ============================================================
182
  # CELL 5 β€” Train RecurrentPPO (LSTM PPO)
183
  #
184
+ # Per-step specialist calls : local simulation (no API cost/latency)
185
+ # Task generation : GPT-4o-mini via OPENAI_API_KEY
186
+ # Finetuner : fires every 100 episodes
187
+ # Reward baseline : GPT-4o-mini via OPENAI_API_KEY
188
  #
189
+ # Expected runtime: ~20–25 min on T4 for 100k steps (~10k episodes)
190
  # ============================================================
191
  import time, yaml, torch, numpy as np
192
  from sb3_contrib import RecurrentPPO
 
271
  device="cuda" if torch.cuda.is_available() else "cpu",
272
  )
273
 
274
+ _tlog(f"Device : {model.device}")
275
+ _tlog(f"Timesteps : {TOTAL_TIMESTEPS:,}")
276
+ _tlog(f"Curriculum : Phase {curriculum.current_phase} β€” {curriculum.progress_str()}")
277
  _tlog("Training started...")
278
 
279
  reward_logger = RewardLogger(curriculum)
 
296
  vec_env.save("/content/vec_normalize.pkl")
297
 
298
  _tlog(f"Done in {_elapsed/60:.1f} min")
299
+ _tlog(f"Episodes : {len(reward_logger.episode_rewards)}")
300
  _tlog(f"Curriculum final: {curriculum.progress_str()}")
301
+ print("\nCELL 5 done βœ“ β€” model saved")
302
 
303
 
304
  # ============================================================
 
310
 
311
  ep_rewards = reward_logger.episode_rewards
312
  if not ep_rewards:
313
+ raise RuntimeError("No episodes recorded β€” check Cell 5 output for errors")
314
 
315
  n_ep = len(ep_rewards)
316
  episodes = list(range(n_ep))
317
+ window = max(30, n_ep // 20) # adaptive: ~5% of run
318
 
319
  smoothed = [
320
  float(np.mean(ep_rewards[max(0, i - window):i + 1]))
 
355
  color="#f0f6fc", fontsize=13, fontweight="bold", pad=14,
356
  )
357
  ax.tick_params(colors="#8b949e")
358
+ for sp in ax.spines.values():
359
+ sp.set_edgecolor("#30363d")
360
  ax.grid(color="#21262d", linewidth=0.8, alpha=0.9)
361
  ax.legend(fontsize=10, framealpha=0.85,
362
  facecolor="#161b22", edgecolor="#30363d", labelcolor="#c9d1d9")
 
371
  facecolor=fig.get_facecolor())
372
  plt.show()
373
 
374
+ _tlog(f"Curve: early={early_mean:+.4f} final={final_mean:+.4f} "
375
  f"improvement={improvement:+.4f}")
376
+ print(f"Episodes : {n_ep:,}")
377
  print(f"Improvement: {improvement:+.4f}")
378
+ print("\nCELL 6 done βœ“ β€” reward curve saved")
379
 
380
 
381
  # ============================================================
 
384
  import json
385
  from pathlib import Path
386
 
387
+ print("=" * 52)
388
  print("LEARNING FEATURES AUDIT")
389
+ print("=" * 52)
390
 
391
+ print(f"\nFeature 5 β€” Curriculum (performance-gated)")
392
  print(f" Phase : {curriculum.current_phase}/3")
393
  print(f" Rolling mean : {curriculum.rolling_mean():.3f}")
394
  print(f" {curriculum.progress_str()}")
 
399
  if mem_path.exists():
400
  data = json.loads(mem_path.read_text())
401
  total = sum(len(v) for v in data.values())
402
+ print(f" {len(data)} specialists Β· {total} total entries")
403
  for sid, entries in list(data.items())[:3]:
404
  avg = sum(e["reward"] for e in entries) / len(entries)
405
  print(f" {sid}: {len(entries)} entries, avg={avg:.3f}")
406
  else:
407
+ print(" No file yet (finetuner fires after 100 completed episodes)")
408
 
409
  spawn_path = Path(_cfg.get("environment", {}).get(
410
  "spawn_memory_path", "data/spawn_memory.jsonl"))
 
412
  if spawn_path.exists():
413
  lines = [l for l in spawn_path.read_text().splitlines() if l.strip()]
414
  print(f" {len(lines)} spawn records")
415
+ for line in lines[:2]:
416
+ rec = json.loads(line)
417
+ print(f" {rec['specialist_role']} | reward={rec['episode_reward']:.3f}")
418
  else:
419
  print(" No file yet")
420
 
 
427
  else:
428
  print(" No file yet")
429
 
430
+ print("\n" + "=" * 52)
431
+ print("CELL 7 done βœ“")
432
 
433
 
434
  # ============================================================
 
487
  f.write(readme)
488
 
489
  candidates = [
490
+ ("/content/spindleflow_model.zip", "spindleflow_model.zip"),
491
+ ("/content/vec_normalize.pkl", "vec_normalize.pkl"),
492
+ ("/content/reward_curve.png", "reward_curve.png"),
493
+ ("/content/demo/assets/reward_curve.json", "reward_curve.json"),
494
+ ("/content/logs/training_log.txt", "training_log.txt"),
495
+ (readme_path, "README.md"),
496
  ]
497
 
498
  ops = [
 
510
  for src, dst in candidates:
511
  if os.path.exists(src):
512
  _tlog(f" {dst}")
 
513
  _tlog(f"Model live : https://huggingface.co/{HF_REPO}")
514
  _tlog(f"Log : https://huggingface.co/{HF_REPO}/blob/main/training_log.txt")
515
+ print("\nCELL 8 done βœ“ β€” all done!")