Upload 56 files
Browse files- .gitattributes +1 -0
- hugging/td_lang/__init__.py +5 -0
- hugging/td_lang/__pycache__/__init__.cpython-310.pyc +0 -0
- hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc +0 -0
- hugging/td_lang/__pycache__/cli.cpython-310.pyc +0 -0
- hugging/td_lang/__pycache__/compiler.cpython-310.pyc +0 -0
- hugging/td_lang/__pycache__/errors.cpython-310.pyc +0 -0
- hugging/td_lang/__pycache__/grammar.cpython-310.pyc +0 -0
- hugging/td_lang/ast_nodes.py +23 -0
- hugging/td_lang/cli.py +2 -1
- hugging/td_lang/compiler.py +492 -112
- hugging/td_lang/errors.py +1 -0
- hugging/td_lang/examples/demo_schedule.td +33 -0
- hugging/td_lang/grammar.py +15 -1
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
hugging/td_lang/__pycache__/compiler.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
hugging/td_lang/__pycache__/compiler.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
hugging/td_lang/__pycache__/compiler.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text
|
hugging/td_lang/__init__.py
CHANGED
|
@@ -27,6 +27,11 @@ Phase 2: diagnose, synth, train, debate
|
|
| 27 |
Phase 3: fork, reset, prune, edit
|
| 28 |
Phase 4: snapshot, report, data_contract, reward_contract
|
| 29 |
Phase 5: CLI polish, --version, info command, --verbose
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
Designed from interviews test_14 (10 commands) and test_17 (ForgeSpec 2.0).
|
| 32 |
"""
|
|
|
|
| 27 |
Phase 3: fork, reset, prune, edit
|
| 28 |
Phase 4: snapshot, report, data_contract, reward_contract
|
| 29 |
Phase 5: CLI polish, --version, info command, --verbose
|
| 30 |
+
Phase 6: fuse, absorb (easy merge)
|
| 31 |
+
Phase 7: repeat, if/else (loop control)
|
| 32 |
+
Phase 8: setup, on_error, notify, save (autopilot)
|
| 33 |
+
Phase 9: schedule (time-based execution)
|
| 34 |
+
Engine upgrades: QLoRA training, self-contained eval, model-generated synth problems
|
| 35 |
|
| 36 |
Designed from interviews test_14 (10 commands) and test_17 (ForgeSpec 2.0).
|
| 37 |
"""
|
hugging/td_lang/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/__init__.cpython-310.pyc and b/hugging/td_lang/__pycache__/__init__.cpython-310.pyc differ
|
|
|
hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc and b/hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc differ
|
|
|
hugging/td_lang/__pycache__/cli.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/cli.cpython-310.pyc and b/hugging/td_lang/__pycache__/cli.cpython-310.pyc differ
|
|
|
hugging/td_lang/__pycache__/compiler.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/compiler.cpython-310.pyc and b/hugging/td_lang/__pycache__/compiler.cpython-310.pyc differ
|
|
|
hugging/td_lang/__pycache__/errors.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/errors.cpython-310.pyc and b/hugging/td_lang/__pycache__/errors.cpython-310.pyc differ
|
|
|
hugging/td_lang/__pycache__/grammar.cpython-310.pyc
CHANGED
|
Binary files a/hugging/td_lang/__pycache__/grammar.cpython-310.pyc and b/hugging/td_lang/__pycache__/grammar.cpython-310.pyc differ
|
|
|
hugging/td_lang/ast_nodes.py
CHANGED
|
@@ -304,6 +304,28 @@ class OnErrorBlock:
|
|
| 304 |
notify: bool = True # Send ntfy notification on error
|
| 305 |
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
# ============================================================================
|
| 308 |
# BLOCKS (gates, budget, contracts, etc.)
|
| 309 |
# ============================================================================
|
|
@@ -417,5 +439,6 @@ __all__ = [
|
|
| 417 |
"BudgetBlock",
|
| 418 |
"DataContractBlock",
|
| 419 |
"RewardContractBlock",
|
|
|
|
| 420 |
"TDProgram",
|
| 421 |
]
|
|
|
|
| 304 |
notify: bool = True # Send ntfy notification on error
|
| 305 |
|
| 306 |
|
| 307 |
+
# ============================================================================
|
| 308 |
+
# PHASE 9 — SCHEDULE (time-based execution)
|
| 309 |
+
# ============================================================================
|
| 310 |
+
|
| 311 |
+
@dataclass
|
| 312 |
+
class ScheduleCmd:
|
| 313 |
+
"""Schedule a block of commands to run at a specific time or interval. (Phase 9)
|
| 314 |
+
|
| 315 |
+
Examples:
|
| 316 |
+
schedule "every 6h" { diagnose base; train base ... }
|
| 317 |
+
schedule "at 02:00" { train base on "data.jsonl" using grpo }
|
| 318 |
+
schedule "after 30m" { eval base -> results.json }
|
| 319 |
+
|
| 320 |
+
Patterns:
|
| 321 |
+
"every Nh/Nm" — repeat every N hours/minutes
|
| 322 |
+
"at HH:MM" — run once at that time
|
| 323 |
+
"after Nh/Nm" — delay then run once
|
| 324 |
+
"""
|
| 325 |
+
timing: str # "every 6h", "at 02:00", "after 30m"
|
| 326 |
+
body: List[Any] = field(default_factory=list) # Commands inside the block
|
| 327 |
+
|
| 328 |
+
|
| 329 |
# ============================================================================
|
| 330 |
# BLOCKS (gates, budget, contracts, etc.)
|
| 331 |
# ============================================================================
|
|
|
|
| 439 |
"BudgetBlock",
|
| 440 |
"DataContractBlock",
|
| 441 |
"RewardContractBlock",
|
| 442 |
+
"ScheduleCmd",
|
| 443 |
"TDProgram",
|
| 444 |
]
|
hugging/td_lang/cli.py
CHANGED
|
@@ -21,7 +21,7 @@ from .ast_nodes import (
|
|
| 21 |
SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd,
|
| 22 |
ForkCmd, ResetCmd, PruneCmd, EditCmd,
|
| 23 |
FuseCmd, AbsorbCmd, RepeatBlock, IfBlock,
|
| 24 |
-
NotifyCmd, SaveCmd,
|
| 25 |
SnapshotCmd, ReportCmd,
|
| 26 |
)
|
| 27 |
|
|
@@ -49,6 +49,7 @@ _PHASE_MAP = {
|
|
| 49 |
SaveCmd: ("8", "save"),
|
| 50 |
SnapshotCmd: ("4", "snapshot"),
|
| 51 |
ReportCmd: ("4", "report"),
|
|
|
|
| 52 |
}
|
| 53 |
|
| 54 |
|
|
|
|
| 21 |
SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd,
|
| 22 |
ForkCmd, ResetCmd, PruneCmd, EditCmd,
|
| 23 |
FuseCmd, AbsorbCmd, RepeatBlock, IfBlock,
|
| 24 |
+
NotifyCmd, SaveCmd, ScheduleCmd,
|
| 25 |
SnapshotCmd, ReportCmd,
|
| 26 |
)
|
| 27 |
|
|
|
|
| 49 |
SaveCmd: ("8", "save"),
|
| 50 |
SnapshotCmd: ("4", "snapshot"),
|
| 51 |
ReportCmd: ("4", "report"),
|
| 52 |
+
ScheduleCmd: ("9", "schedule"),
|
| 53 |
}
|
| 54 |
|
| 55 |
|
hugging/td_lang/compiler.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
TD Lang Compiler
|
| 3 |
|
| 4 |
Phase 1 commands: load, merge, heal, eval, commit.
|
| 5 |
Phase 2 commands: synth, train, debate, diagnose.
|
|
@@ -38,6 +38,7 @@ from .ast_nodes import (
|
|
| 38 |
ResetCmd,
|
| 39 |
RewardContractBlock,
|
| 40 |
SaveCmd,
|
|
|
|
| 41 |
SetupBlock,
|
| 42 |
SnapshotCmd,
|
| 43 |
SynthCmd,
|
|
@@ -46,7 +47,7 @@ from .ast_nodes import (
|
|
| 46 |
)
|
| 47 |
from .errors import TDCompileError
|
| 48 |
|
| 49 |
-
# All command types are now implemented (Phase 1 + 2 + 3)
|
| 50 |
|
| 51 |
|
| 52 |
class TDCompiler:
|
|
@@ -86,25 +87,25 @@ class TDCompiler:
|
|
| 86 |
elif isinstance(cmd, MergeCmd):
|
| 87 |
if cmd.target not in seen:
|
| 88 |
raise TDCompileError(
|
| 89 |
-
f"Can't merge into '{cmd.target}'
|
| 90 |
hint=f'Add: load "{cmd.source}" as {cmd.target}',
|
| 91 |
)
|
| 92 |
elif isinstance(cmd, (HealCmd, EvalCmd, CommitCmd)):
|
| 93 |
if cmd.target not in seen:
|
| 94 |
raise TDCompileError(
|
| 95 |
-
f"Can't use '{cmd.target}'
|
| 96 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 97 |
)
|
| 98 |
elif isinstance(cmd, (SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd)):
|
| 99 |
if cmd.target not in seen:
|
| 100 |
raise TDCompileError(
|
| 101 |
-
f"Can't use '{cmd.target}'
|
| 102 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 103 |
)
|
| 104 |
elif isinstance(cmd, ForkCmd):
|
| 105 |
if cmd.source not in seen:
|
| 106 |
raise TDCompileError(
|
| 107 |
-
f"Can't fork '{cmd.source}'
|
| 108 |
hint=f'Add: load "model/path" as {cmd.source}',
|
| 109 |
)
|
| 110 |
if cmd.alias in seen:
|
|
@@ -115,21 +116,21 @@ class TDCompiler:
|
|
| 115 |
elif isinstance(cmd, (ResetCmd, PruneCmd, EditCmd)):
|
| 116 |
if cmd.target not in seen:
|
| 117 |
raise TDCompileError(
|
| 118 |
-
f"Can't use '{cmd.target}'
|
| 119 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 120 |
)
|
| 121 |
elif isinstance(cmd, SnapshotCmd):
|
| 122 |
if cmd.target not in seen:
|
| 123 |
raise TDCompileError(
|
| 124 |
-
f"Can't snapshot '{cmd.target}'
|
| 125 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 126 |
)
|
| 127 |
elif isinstance(cmd, ReportCmd):
|
| 128 |
-
pass # report has no target
|
| 129 |
elif isinstance(cmd, FuseCmd):
|
| 130 |
if cmd.target not in seen:
|
| 131 |
raise TDCompileError(
|
| 132 |
-
f"Can't fuse into '{cmd.target}'
|
| 133 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 134 |
)
|
| 135 |
if len(cmd.sources) < 1:
|
|
@@ -140,9 +141,13 @@ class TDCompiler:
|
|
| 140 |
elif isinstance(cmd, AbsorbCmd):
|
| 141 |
if cmd.target not in seen:
|
| 142 |
raise TDCompileError(
|
| 143 |
-
f"Can't absorb into '{cmd.target}'
|
| 144 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 145 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
# ---------------------------------------------------------------- Build script
|
| 148 |
def _build_script(self, program: TDProgram) -> None:
|
|
@@ -158,7 +163,7 @@ Source: {source_name}
|
|
| 158 |
Compiled: {timestamp}
|
| 159 |
Hash: {source_hash}
|
| 160 |
|
| 161 |
-
DO NOT EDIT
|
| 162 |
"""'''
|
| 163 |
)
|
| 164 |
self._emit(doc)
|
|
@@ -282,6 +287,8 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 282 |
self._emit_notify(cmd, program)
|
| 283 |
elif isinstance(cmd, SaveCmd):
|
| 284 |
self._emit_save(cmd, program)
|
|
|
|
|
|
|
| 285 |
self._emit("")
|
| 286 |
|
| 287 |
self._emit_summary()
|
|
@@ -311,7 +318,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 311 |
self._indent -= 1
|
| 312 |
self._emit("except ImportError:")
|
| 313 |
self._indent += 1
|
| 314 |
-
self._emit('print("[td_lang] huggingface_hub not installed. Storing ref only
|
| 315 |
self._emit("_local_path = _model_ref")
|
| 316 |
self._indent -= 1
|
| 317 |
self._emit("except Exception as e:")
|
|
@@ -381,7 +388,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 381 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 382 |
self._emit("if not checkpoint:")
|
| 383 |
self._indent += 1
|
| 384 |
-
self._emit('print("[td_lang] WARNING: No checkpoint to heal
|
| 385 |
self._indent -= 1
|
| 386 |
self._emit("else:")
|
| 387 |
self._indent += 1
|
|
@@ -400,42 +407,144 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 400 |
self._indent -= 1
|
| 401 |
|
| 402 |
def _emit_eval(self, cmd: EvalCmd) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
self._emit(f'print("[td_lang] Evaluating {cmd.target}...")')
|
| 404 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 405 |
self._emit("if not checkpoint:")
|
| 406 |
self._indent += 1
|
| 407 |
-
self._emit('
|
| 408 |
self._indent -= 1
|
| 409 |
-
self._emit("else:")
|
| 410 |
-
self._indent += 1
|
| 411 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
| 412 |
-
self._emit("import torch")
|
| 413 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
| 414 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 415 |
self._indent += 1
|
| 416 |
self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
|
| 417 |
self._indent -= 1
|
| 418 |
self._emit(")")
|
| 419 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
self._indent += 1
|
| 421 |
-
self._emit("
|
| 422 |
-
self._emit("
|
| 423 |
-
self._emit("
|
|
|
|
|
|
|
| 424 |
self._indent -= 1
|
| 425 |
-
self._emit("
|
| 426 |
-
self._emit(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
self._emit(f'hist_key = "{cmd.target}_eval_history"')
|
| 428 |
self._emit("if hist_key not in results:")
|
| 429 |
self._indent += 1
|
| 430 |
self._emit("results[hist_key] = []")
|
| 431 |
self._indent -= 1
|
| 432 |
-
self._emit("
|
| 433 |
-
self._emit("results[hist_key]
|
|
|
|
| 434 |
self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
|
| 435 |
self._indent += 1
|
| 436 |
self._emit('"op": "eval",')
|
| 437 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 438 |
-
self._emit('"
|
|
|
|
| 439 |
self._indent -= 1
|
| 440 |
self._emit("})")
|
| 441 |
if cmd.output:
|
|
@@ -450,7 +559,6 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 450 |
self._emit('print("[td_lang] Eval results:", json.dumps(eval_result, indent=2, default=str))')
|
| 451 |
self._emit("del model, tok")
|
| 452 |
self._emit("import gc; gc.collect()")
|
| 453 |
-
self._indent -= 1
|
| 454 |
|
| 455 |
def _emit_commit(self, cmd: CommitCmd, global_gates: Optional[GateBlock]) -> None:
|
| 456 |
gates = cmd.gates or (global_gates.must_pass if global_gates else None)
|
|
@@ -484,7 +592,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 484 |
self._indent -= 1
|
| 485 |
self._emit("if failed:")
|
| 486 |
self._indent += 1
|
| 487 |
-
self._emit('raise TDGateError(failed, message="Commit blocked
|
| 488 |
self._indent -= 1
|
| 489 |
self._emit("else:")
|
| 490 |
self._indent += 1
|
|
@@ -523,7 +631,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 523 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 524 |
self._emit("if not checkpoint:")
|
| 525 |
self._indent += 1
|
| 526 |
-
self._emit('print("[td_lang] WARNING: No checkpoint
|
| 527 |
self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
|
| 528 |
self._indent -= 1
|
| 529 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
|
@@ -559,11 +667,66 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 559 |
self._emit('print(f" Response: {response[:200]}...")')
|
| 560 |
self._emit("print()")
|
| 561 |
self._indent -= 1
|
| 562 |
-
self._emit(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
|
| 564 |
self._indent += 1
|
| 565 |
self._emit('"op": "diagnose",')
|
| 566 |
self._emit('"n_prompts": len(diag_prompts),')
|
|
|
|
| 567 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 568 |
self._indent -= 1
|
| 569 |
self._emit("})")
|
|
@@ -572,7 +735,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 572 |
self._emit("diag_path.parent.mkdir(parents=True, exist_ok=True)")
|
| 573 |
self._emit('with open(diag_path, "w") as f:')
|
| 574 |
self._indent += 1
|
| 575 |
-
self._emit("json.dump(
|
| 576 |
self._indent -= 1
|
| 577 |
self._emit('print(f"[td_lang] Diagnosis saved to {diag_path}")')
|
| 578 |
self._emit("del model, tok")
|
|
@@ -604,20 +767,30 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 604 |
self._emit(")")
|
| 605 |
self._emit("model.eval()")
|
| 606 |
self._emit("")
|
| 607 |
-
self._emit("#
|
| 608 |
-
self._emit(f'diag = results.get("{cmd.target}_diagnose",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
self._emit("weak_topics = []")
|
| 610 |
-
self._emit("
|
|
|
|
| 611 |
self._indent += 1
|
| 612 |
self._emit("resp = d.get('response', '')")
|
| 613 |
-
self._emit("for topic in ['math', 'code', 'logic', 'factual'
|
| 614 |
self._indent += 1
|
| 615 |
-
self._emit("if topic in resp.lower():")
|
| 616 |
self._indent += 1
|
| 617 |
self._emit("weak_topics.append(topic)")
|
| 618 |
self._indent -= 1
|
| 619 |
self._indent -= 1
|
| 620 |
self._indent -= 1
|
|
|
|
| 621 |
self._emit("if not weak_topics:")
|
| 622 |
self._indent += 1
|
| 623 |
self._emit("weak_topics = ['math', 'code', 'logic', 'factual']")
|
|
@@ -637,21 +810,81 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 637 |
self._indent -= 1
|
| 638 |
self._emit("}")
|
| 639 |
self._emit("")
|
| 640 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
self._indent += 1
|
| 642 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
self._indent += 1
|
| 644 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
self._indent -= 1
|
| 646 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
self._indent += 1
|
| 648 |
-
self._emit("
|
| 649 |
self._indent -= 1
|
| 650 |
-
self._emit("
|
|
|
|
|
|
|
| 651 |
self._indent += 1
|
| 652 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
self._indent -= 1
|
| 654 |
-
self._emit("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
self._indent -= 1
|
| 656 |
self._emit("")
|
| 657 |
self._emit("synth_data = []")
|
|
@@ -738,18 +971,51 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 738 |
self._emit("")
|
| 739 |
|
| 740 |
if cmd.method == "grpo":
|
| 741 |
-
self._emit("# GRPO training (test_15: 64 steps sweet spot
|
|
|
|
| 742 |
self._emit("from trl import GRPOConfig, GRPOTrainer")
|
| 743 |
-
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
|
|
|
| 744 |
self._emit("from datasets import load_dataset")
|
| 745 |
self._emit("import torch")
|
| 746 |
self._emit("")
|
| 747 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 749 |
self._indent += 1
|
| 750 |
-
self._emit(
|
|
|
|
|
|
|
| 751 |
self._indent -= 1
|
| 752 |
self._emit(")")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
self._emit("")
|
| 754 |
self._emit(f'# Load training data')
|
| 755 |
self._emit(f'dataset_path = "{cmd.dataset}"')
|
|
@@ -772,6 +1038,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 772 |
self._emit('output_dir="td_lang_outputs/grpo_training",')
|
| 773 |
self._emit("save_steps=16,")
|
| 774 |
self._emit('bf16=True,')
|
|
|
|
| 775 |
self._indent -= 1
|
| 776 |
self._emit(")")
|
| 777 |
self._emit("")
|
|
@@ -875,7 +1142,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 875 |
self._emit("if logs['kl'] > 3.1 * ma:")
|
| 876 |
self._indent += 1
|
| 877 |
self._emit("control.should_training_stop = True")
|
| 878 |
-
self._emit("print('[td_lang][early_stop] KL spike detected
|
| 879 |
self._indent -= 2
|
| 880 |
self._indent -= 1
|
| 881 |
self._emit("if 'eval/reward' in logs:")
|
|
@@ -884,7 +1151,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 884 |
self._emit("if len(self.eval_rewards) >= 2 and self.eval_rewards[-1] < self.eval_rewards[-2]:")
|
| 885 |
self._indent += 1
|
| 886 |
self._emit("control.should_training_stop = True")
|
| 887 |
-
self._emit("print('[td_lang][early_stop] Validation reward drop
|
| 888 |
self._indent -= 1
|
| 889 |
self._indent -= 1
|
| 890 |
self._emit("if 'policy_entropy' in logs:")
|
|
@@ -896,7 +1163,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 896 |
self._emit("if self.entropy_history[-1] < 0.93 * baseline:")
|
| 897 |
self._indent += 1
|
| 898 |
self._emit("control.should_training_stop = True")
|
| 899 |
-
self._emit("print('[td_lang][early_stop] Diversity collapsed
|
| 900 |
self._indent -= 2
|
| 901 |
self._indent -= 2
|
| 902 |
self._indent -= 1
|
|
@@ -915,8 +1182,9 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 915 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = "td_lang_outputs/grpo_trained"')
|
| 916 |
|
| 917 |
elif cmd.method in ("sft", "dpo"):
|
| 918 |
-
self._emit(f"# {cmd.method.upper()} training")
|
| 919 |
-
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments")
|
|
|
|
| 920 |
if cmd.method == "sft":
|
| 921 |
self._emit("from trl import SFTTrainer")
|
| 922 |
else:
|
|
@@ -925,11 +1193,29 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 925 |
self._emit("import torch")
|
| 926 |
self._emit("")
|
| 927 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 928 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 929 |
self._indent += 1
|
| 930 |
-
self._emit(
|
| 931 |
self._indent -= 1
|
| 932 |
self._emit(")")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 933 |
self._emit(f'dataset_path = "{cmd.dataset}"')
|
| 934 |
self._emit("if dataset_path.endswith('.jsonl'):")
|
| 935 |
self._indent += 1
|
|
@@ -1140,7 +1426,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1140 |
# ---------------------------------------------------------------- Phase 3 emitters
|
| 1141 |
|
| 1142 |
def _emit_edit(self, cmd: EditCmd) -> None:
|
| 1143 |
-
"""EDIT
|
| 1144 |
|
| 1145 |
From test_18: all 3 AIs agree LoRA is safe default, DoRA beats by 1-4%.
|
| 1146 |
layers_to_transform supports targeting specific layers (e.g., 16-28).
|
|
@@ -1151,7 +1437,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1151 |
layers = cmd.layers # "all", "16-28", or single number
|
| 1152 |
lr = cmd.learning_rate or 1e-4
|
| 1153 |
|
| 1154 |
-
self._emit(f'print("[td_lang] EDIT
|
| 1155 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
| 1156 |
self._emit("import torch")
|
| 1157 |
self._emit("from peft import LoraConfig, get_peft_model, PeftModel")
|
|
@@ -1207,7 +1493,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1207 |
self._emit("")
|
| 1208 |
|
| 1209 |
# Apply adapter
|
| 1210 |
-
self._emit("# Inject adapter
|
| 1211 |
self._emit("model = get_peft_model(model, edit_config)")
|
| 1212 |
self._emit("model.print_trainable_parameters()")
|
| 1213 |
self._emit("")
|
|
@@ -1226,7 +1512,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1226 |
self._indent -= 1
|
| 1227 |
self._emit("")
|
| 1228 |
|
| 1229 |
-
# "Try before buy"
|
| 1230 |
self._emit('sample_prompts = ["What is 7+8?", "Explain photosynthesis in one paragraph.", "Write a Python function fib(n)."]')
|
| 1231 |
self._emit("def run_quick_eval(enable_adapters: bool):")
|
| 1232 |
self._indent += 1
|
|
@@ -1266,19 +1552,19 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1266 |
self._indent -= 1
|
| 1267 |
self._emit("")
|
| 1268 |
|
| 1269 |
-
# Save adapter (don't merge yet
|
| 1270 |
self._emit(f'edit_save_dir = os.path.join(output_dir, "{alias}_edit_{method}")')
|
| 1271 |
self._emit("os.makedirs(edit_save_dir, exist_ok=True)")
|
| 1272 |
self._emit("model.save_pretrained(edit_save_dir)")
|
| 1273 |
self._emit(f'print(f"[td_lang] EDIT adapter saved to {{edit_save_dir}}")')
|
| 1274 |
-
self._emit(f'print("[td_lang] Adapter NOT merged
|
| 1275 |
self._emit("")
|
| 1276 |
|
| 1277 |
# Update models dict
|
| 1278 |
self._emit(f'models["{alias}"] = model')
|
| 1279 |
|
| 1280 |
def _emit_fork(self, cmd: ForkCmd) -> None:
|
| 1281 |
-
"""FORK
|
| 1282 |
|
| 1283 |
From test_18: all 3 AIs say disk-based only on 4090.
|
| 1284 |
Cheap fork = copy manifest + adapter files, share base weights.
|
|
@@ -1287,7 +1573,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1287 |
source = cmd.source
|
| 1288 |
alias = cmd.alias
|
| 1289 |
|
| 1290 |
-
self._emit(f'print("[td_lang] FORK
|
| 1291 |
self._emit(f'source_model = models["{source}"]')
|
| 1292 |
self._emit("import torch")
|
| 1293 |
self._emit("")
|
|
@@ -1300,7 +1586,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1300 |
self._emit("")
|
| 1301 |
|
| 1302 |
# Write manifest
|
| 1303 |
-
self._emit("# Write fork manifest
|
| 1304 |
self._emit("import json")
|
| 1305 |
self._emit("fork_manifest = {")
|
| 1306 |
self._emit(f' "fork_name": "{alias}",')
|
|
@@ -1315,7 +1601,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1315 |
self._emit("is_peft = hasattr(source_model, 'peft_config')")
|
| 1316 |
self._emit("if is_peft:")
|
| 1317 |
self._indent += 1
|
| 1318 |
-
self._emit("# PEFT model
|
| 1319 |
self._emit('adapter_dir = os.path.join(fork_dir, "adapters")')
|
| 1320 |
self._emit("source_model.save_pretrained(adapter_dir)")
|
| 1321 |
self._emit('fork_manifest["fork_type"] = "adapter"')
|
|
@@ -1324,7 +1610,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1324 |
self._indent -= 1
|
| 1325 |
self._emit("else:")
|
| 1326 |
self._indent += 1
|
| 1327 |
-
self._emit("# Full model
|
| 1328 |
self._emit("from safetensors.torch import save_file")
|
| 1329 |
self._emit("state = {k: v.detach().cpu().clone() for k, v in source_model.state_dict().items()}")
|
| 1330 |
self._emit('ckpt_path = os.path.join(fork_dir, "model.safetensors")')
|
|
@@ -1364,7 +1650,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1364 |
self._emit(f'lineage["{alias}"] = {{"forked_from": "{source}", "operations": []}}')
|
| 1365 |
|
| 1366 |
def _emit_reset(self, cmd: ResetCmd) -> None:
|
| 1367 |
-
"""RESET
|
| 1368 |
|
| 1369 |
From test_18: del model, clear CUDA cache, reload.
|
| 1370 |
Must also reset optimizer state. Use assign=True to avoid doubling VRAM.
|
|
@@ -1372,7 +1658,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1372 |
alias = cmd.target
|
| 1373 |
checkpoint = cmd.checkpoint
|
| 1374 |
|
| 1375 |
-
self._emit(f'print("[td_lang] RESET
|
| 1376 |
self._emit("")
|
| 1377 |
|
| 1378 |
# Delete current model and clear CUDA
|
|
@@ -1397,7 +1683,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1397 |
self._emit("")
|
| 1398 |
self._emit("if fork_manifest_path and os.path.exists(fork_manifest_path):")
|
| 1399 |
self._indent += 1
|
| 1400 |
-
self._emit("# Loading from a fork
|
| 1401 |
self._emit("import json")
|
| 1402 |
self._emit("with open(fork_manifest_path) as f:")
|
| 1403 |
self._indent += 1
|
|
@@ -1421,7 +1707,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1421 |
self._emit("# Loading from a safetensors file")
|
| 1422 |
self._emit("from safetensors.torch import load_file")
|
| 1423 |
self._emit("state = load_file(ckpt_path, device='cpu')")
|
| 1424 |
-
self._emit("# Need base model architecture
|
| 1425 |
self._emit(f'base_ref = models.get("__base_ref_{alias}", ckpt_path)')
|
| 1426 |
self._emit("model = AutoModelForCausalLM.from_pretrained(base_ref, torch_dtype=torch.float16, device_map='cuda')")
|
| 1427 |
self._emit("try:")
|
|
@@ -1438,7 +1724,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1438 |
|
| 1439 |
# Re-register in models dict
|
| 1440 |
self._emit(f'models["{alias}"] = model')
|
| 1441 |
-
self._emit(f'print(f"[td_lang] RESET complete
|
| 1442 |
self._emit("")
|
| 1443 |
|
| 1444 |
# Optimizer/cache handling and quick smoke eval
|
|
@@ -1460,7 +1746,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1460 |
self._indent -= 1
|
| 1461 |
|
| 1462 |
def _emit_prune(self, cmd: PruneCmd) -> None:
|
| 1463 |
-
"""PRUNE
|
| 1464 |
|
| 1465 |
From test_18: 20% structured max (LLM-Pruner). Wanda metric (Grok).
|
| 1466 |
Language backbone only, never vision encoder. Recovery: 200-800 steps LoRA.
|
|
@@ -1470,7 +1756,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1470 |
aggressiveness = cmd.aggressiveness
|
| 1471 |
|
| 1472 |
self._emit("import torch")
|
| 1473 |
-
self._emit(f'print("[td_lang] PRUNE
|
| 1474 |
self._emit(f'model = models["{alias}"]')
|
| 1475 |
self._emit("")
|
| 1476 |
|
|
@@ -1484,7 +1770,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1484 |
self._emit("")
|
| 1485 |
|
| 1486 |
# Identify language-only layers (skip vision)
|
| 1487 |
-
self._emit("# Target language backbone ONLY
|
| 1488 |
self._emit("# Filter for language model linear layers")
|
| 1489 |
self._emit("target_modules = []")
|
| 1490 |
self._emit("for name, module in model.named_modules():")
|
|
@@ -1598,8 +1884,8 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1598 |
self._indent -= 1
|
| 1599 |
self._indent -= 1
|
| 1600 |
else: # taylor
|
| 1601 |
-
self._emit("# Taylor: gradient-based importance (needs backprop
|
| 1602 |
-
self._emit("# Falling back to magnitude as MVP
|
| 1603 |
self._emit(f'print("[td_lang] WARNING: Taylor pruning falls back to magnitude on single GPU")')
|
| 1604 |
self._emit("import torch.nn.utils.prune as prune")
|
| 1605 |
self._emit("")
|
|
@@ -1651,7 +1937,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1651 |
# ---------------------------------------------------------------- Phase 7: Loop Control emitters
|
| 1652 |
|
| 1653 |
def _emit_cmd(self, cmd, program: TDProgram) -> None:
|
| 1654 |
-
"""Emit a single command
|
| 1655 |
if isinstance(cmd, LoadCmd):
|
| 1656 |
self._emit_load(cmd)
|
| 1657 |
elif isinstance(cmd, MergeCmd):
|
|
@@ -1694,15 +1980,17 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1694 |
self._emit_repeat(cmd, program)
|
| 1695 |
elif isinstance(cmd, IfBlock):
|
| 1696 |
self._emit_if(cmd, program)
|
|
|
|
|
|
|
| 1697 |
|
| 1698 |
def _emit_repeat(self, cmd: RepeatBlock, program: TDProgram) -> None:
|
| 1699 |
-
"""REPEAT
|
| 1700 |
|
| 1701 |
This is the core of td_loop: the self-improvement cycle.
|
| 1702 |
Each iteration runs the body commands in order.
|
| 1703 |
"""
|
| 1704 |
n = cmd.count
|
| 1705 |
-
self._emit(f'print("[td_lang] REPEAT
|
| 1706 |
self._emit(f"for _loop_iter in range({n}):")
|
| 1707 |
self._indent += 1
|
| 1708 |
self._emit(f'print(f"[td_lang] === Iteration {{_loop_iter + 1}}/{n} ===")')
|
|
@@ -1712,7 +2000,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1712 |
self._emit("elapsed_hours = (time.time() - start_time) / 3600")
|
| 1713 |
self._emit(f"if elapsed_hours >= {program.budget.max_gpu_hours}:")
|
| 1714 |
self._indent += 1
|
| 1715 |
-
self._emit('print("[td_lang] Budget exceeded inside repeat
|
| 1716 |
self._emit("break")
|
| 1717 |
self._indent -= 1
|
| 1718 |
self._emit("")
|
|
@@ -1721,10 +2009,10 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1721 |
self._emit("")
|
| 1722 |
self._emit(f'print(f"[td_lang] Iteration {{_loop_iter + 1}}/{n} complete.")')
|
| 1723 |
self._indent -= 1
|
| 1724 |
-
self._emit(f'print("[td_lang] REPEAT complete
|
| 1725 |
|
| 1726 |
def _emit_if(self, cmd: IfBlock, program: TDProgram) -> None:
|
| 1727 |
-
"""IF/ELSE
|
| 1728 |
|
| 1729 |
Conditions:
|
| 1730 |
- eval_passed: last eval for target had no failures
|
|
@@ -1734,7 +2022,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1734 |
condition = cmd.condition
|
| 1735 |
target = cmd.target
|
| 1736 |
|
| 1737 |
-
self._emit(f'print("[td_lang] IF
|
| 1738 |
self._emit("")
|
| 1739 |
|
| 1740 |
# Emit condition check
|
|
@@ -1777,7 +2065,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1777 |
self._indent -= 1
|
| 1778 |
|
| 1779 |
def _emit_break_if(self, cmd: BreakIfCmd) -> None:
|
| 1780 |
-
"""BREAK_IF
|
| 1781 |
condition = cmd.condition
|
| 1782 |
target = cmd.target or ""
|
| 1783 |
self._emit(f'_brk_eval = results.get("{target}_eval", {{}})')
|
|
@@ -1790,17 +2078,17 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1790 |
self._emit(f"_brk_met = bool(results.get('{target}_{condition}', False))")
|
| 1791 |
self._emit("if _brk_met:")
|
| 1792 |
self._indent += 1
|
| 1793 |
-
self._emit('print("[td_lang] break_if triggered
|
| 1794 |
self._emit("break")
|
| 1795 |
self._indent -= 1
|
| 1796 |
|
| 1797 |
# ---------------------------------------------------------------- Phase 6: Easy Merge emitters
|
| 1798 |
|
| 1799 |
def _emit_fuse(self, cmd: FuseCmd) -> None:
|
| 1800 |
-
"""FUSE
|
| 1801 |
|
| 1802 |
From TD merge strategy: Transport and Merge (optimal transport cross-arch merging).
|
| 1803 |
-
All 5 source models have different architectures
|
| 1804 |
Merge into language backbone only, vision encoder stays untouched.
|
| 1805 |
"""
|
| 1806 |
target = cmd.target
|
|
@@ -1809,7 +2097,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1809 |
strategy = cmd.strategy
|
| 1810 |
n = len(sources)
|
| 1811 |
|
| 1812 |
-
self._emit(f'print("[td_lang] FUSE
|
| 1813 |
self._emit(f'print("[td_lang] Strategy: {strategy}")')
|
| 1814 |
self._emit(f"fuse_sources = {sources}")
|
| 1815 |
self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
|
|
@@ -1825,7 +2113,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1825 |
self._emit(f"strengths = [round(0.5 * (0.8 ** i), 3) for i in range({n})]")
|
| 1826 |
self._emit('print(f"[td_lang] Sequential strategy: strengths = {strengths}")')
|
| 1827 |
else:
|
| 1828 |
-
# weighted
|
| 1829 |
self._emit(f"per_model_strength = round(1.0 / ({n} + 1), 3)")
|
| 1830 |
self._emit("")
|
| 1831 |
|
|
@@ -1914,10 +2202,10 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1914 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 1915 |
self._indent -= 1
|
| 1916 |
self._emit("})")
|
| 1917 |
-
self._emit(f'print("[td_lang] FUSE complete
|
| 1918 |
|
| 1919 |
def _emit_absorb(self, cmd: AbsorbCmd) -> None:
|
| 1920 |
-
"""ABSORB
|
| 1921 |
|
| 1922 |
One-liner shortcut: absorb "model" into target [strength 0.5]
|
| 1923 |
Wraps the merge logic with sensible defaults.
|
|
@@ -1926,7 +2214,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 1926 |
target = cmd.target
|
| 1927 |
strength = cmd.strength
|
| 1928 |
|
| 1929 |
-
self._emit(f'print("[td_lang] ABSORB
|
| 1930 |
self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
|
| 1931 |
self._emit("")
|
| 1932 |
|
|
@@ -2020,12 +2308,12 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2020 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 2021 |
self._indent -= 1
|
| 2022 |
self._emit("})")
|
| 2023 |
-
self._emit(f'print("[td_lang] ABSORB complete
|
| 2024 |
|
| 2025 |
# ---------------------------------------------------------------- Phase 4 emitters
|
| 2026 |
|
| 2027 |
def _emit_data_contract(self, dc: DataContractBlock) -> None:
|
| 2028 |
-
"""Emit data contract validation
|
| 2029 |
|
| 2030 |
From ForgeSpec 2.0 (test_17): data contracts enforce schema on training data.
|
| 2031 |
Required fields, minimum samples, max perplexity.
|
|
@@ -2093,7 +2381,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2093 |
self._emit("")
|
| 2094 |
|
| 2095 |
def _emit_reward_contract(self, rc: RewardContractBlock) -> None:
|
| 2096 |
-
"""Emit reward contract
|
| 2097 |
|
| 2098 |
From test_16: verified rewards only, no learned reward model.
|
| 2099 |
"""
|
|
@@ -2109,7 +2397,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2109 |
self._emit("")
|
| 2110 |
|
| 2111 |
def _emit_snapshot(self, cmd: SnapshotCmd, program: TDProgram) -> None:
|
| 2112 |
-
"""SNAPSHOT
|
| 2113 |
|
| 2114 |
From ForgeSpec 2.0 (test_17): every model state gets a content-addressed hash.
|
| 2115 |
Directory contains: model weights/adapters, eval report, prune spec, manifest.
|
|
@@ -2117,7 +2405,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2117 |
alias = cmd.target
|
| 2118 |
output_dir = cmd.output or "td_lang_outputs/snapshots"
|
| 2119 |
|
| 2120 |
-
self._emit(f'print("[td_lang] SNAPSHOT
|
| 2121 |
self._emit("import hashlib, json, time")
|
| 2122 |
self._emit(f'snap_model = models["{alias}"]')
|
| 2123 |
self._emit("")
|
|
@@ -2147,7 +2435,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2147 |
self._emit("")
|
| 2148 |
|
| 2149 |
# Write manifest
|
| 2150 |
-
self._emit("# Snapshot manifest
|
| 2151 |
self._emit("snap_manifest = {")
|
| 2152 |
self._indent += 1
|
| 2153 |
self._emit(f'"alias": "{alias}",')
|
|
@@ -2198,14 +2486,14 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2198 |
self._emit("})")
|
| 2199 |
|
| 2200 |
def _emit_report(self, cmd: ReportCmd, program: TDProgram) -> None:
|
| 2201 |
-
"""REPORT
|
| 2202 |
|
| 2203 |
Tracks GPU hours, cost, tokens, time per command.
|
| 2204 |
From test_17 ForgeSpec 2.0: economics reports for cost tracking.
|
| 2205 |
"""
|
| 2206 |
output = cmd.output or "economics_report.json"
|
| 2207 |
|
| 2208 |
-
self._emit('print("[td_lang] REPORT
|
| 2209 |
self._emit("elapsed = time.time() - start_time")
|
| 2210 |
self._emit("")
|
| 2211 |
self._emit("report = {")
|
|
@@ -2281,12 +2569,12 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2281 |
# ---------------------------------------------------------------- Phase 8: Autopilot emitters
|
| 2282 |
|
| 2283 |
def _emit_setup(self, setup: SetupBlock) -> None:
|
| 2284 |
-
"""SETUP
|
| 2285 |
|
| 2286 |
Runs at script start: pip install, HF token, ntfy config.
|
| 2287 |
"""
|
| 2288 |
-
self._emit("# ========== SETUP (Phase 8
|
| 2289 |
-
self._emit('print("[td_lang] SETUP
|
| 2290 |
self._emit("")
|
| 2291 |
|
| 2292 |
# pip install
|
|
@@ -2305,7 +2593,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2305 |
self._emit("except Exception as e:")
|
| 2306 |
self._indent += 1
|
| 2307 |
self._emit('print(f"[td_lang] WARNING: pip install failed: {e}")')
|
| 2308 |
-
self._emit('print("[td_lang] Continuing anyway
|
| 2309 |
self._indent -= 1
|
| 2310 |
self._emit("")
|
| 2311 |
|
|
@@ -2374,14 +2662,14 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2374 |
self._emit("")
|
| 2375 |
|
| 2376 |
def _emit_on_error(self, on_error: OnErrorBlock, program: TDProgram) -> None:
|
| 2377 |
-
"""ON_ERROR
|
| 2378 |
|
| 2379 |
Emits a td_safe_run() helper that wraps any function call with:
|
| 2380 |
- Retry N times on failure
|
| 2381 |
- Fallback strategies (reduce batch, skip, snapshot+stop)
|
| 2382 |
- Optional ntfy notification on error
|
| 2383 |
"""
|
| 2384 |
-
self._emit("# ========== ON_ERROR (Phase 8
|
| 2385 |
self._emit(f"TD_MAX_RETRIES = {on_error.retry}")
|
| 2386 |
self._emit(f'TD_FALLBACK = "{on_error.fallback}"')
|
| 2387 |
self._emit(f"TD_NOTIFY_ON_ERROR = {on_error.notify}")
|
|
@@ -2413,10 +2701,10 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2413 |
self._indent -= 1
|
| 2414 |
self._emit('elif TD_FALLBACK == "snapshot_and_stop":')
|
| 2415 |
self._indent += 1
|
| 2416 |
-
self._emit('print(f"[td_lang] OOM
|
| 2417 |
self._emit("if TD_NOTIFY_ON_ERROR:")
|
| 2418 |
self._indent += 1
|
| 2419 |
-
self._emit('td_notify(f"OOM on {step_name}
|
| 2420 |
self._indent -= 1
|
| 2421 |
self._emit("raise")
|
| 2422 |
self._indent -= 2
|
|
@@ -2428,7 +2716,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2428 |
self._indent += 1
|
| 2429 |
self._emit("if TD_NOTIFY_ON_ERROR:")
|
| 2430 |
self._indent += 1
|
| 2431 |
-
self._emit('td_notify(f"FAILED: {step_name} after {TD_MAX_RETRIES} retries
|
| 2432 |
self._indent -= 1
|
| 2433 |
self._emit('if TD_FALLBACK == "skip":')
|
| 2434 |
self._indent += 1
|
|
@@ -2441,19 +2729,19 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2441 |
self._emit("")
|
| 2442 |
|
| 2443 |
def _emit_notify(self, cmd: NotifyCmd, program: TDProgram) -> None:
|
| 2444 |
-
"""NOTIFY
|
| 2445 |
msg = cmd.message.replace('"', '\\"')
|
| 2446 |
self._emit(f'td_notify("{msg}")')
|
| 2447 |
|
| 2448 |
def _emit_save(self, cmd: SaveCmd, program: TDProgram) -> None:
|
| 2449 |
-
"""SAVE
|
| 2450 |
|
| 2451 |
Uses rclone to copy model checkpoint/adapters to Google Drive or any remote.
|
| 2452 |
"""
|
| 2453 |
alias = cmd.target
|
| 2454 |
dest = cmd.destination
|
| 2455 |
|
| 2456 |
-
self._emit(f'print("[td_lang] SAVE
|
| 2457 |
self._emit("")
|
| 2458 |
|
| 2459 |
# Find the model's checkpoint directory
|
|
@@ -2484,7 +2772,7 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2484 |
self._indent += 1
|
| 2485 |
self._emit("import subprocess as _sp")
|
| 2486 |
self._emit("_sp.check_call(_rclone_cmd)")
|
| 2487 |
-
self._emit(f'print("[td_lang] SAVE complete
|
| 2488 |
self._emit(f'td_notify("Model {alias} saved to {dest}")')
|
| 2489 |
self._indent -= 1
|
| 2490 |
self._emit("except FileNotFoundError:")
|
|
@@ -2515,6 +2803,93 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2515 |
self._indent -= 1
|
| 2516 |
self._emit("})")
|
| 2517 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2518 |
# ---------------------------------------------------------------- Budget + summary
|
| 2519 |
def _emit_budget_check(self, program: TDProgram) -> None:
|
| 2520 |
budget = program.budget or BudgetBlock()
|
|
@@ -2585,6 +2960,11 @@ DO NOT EDIT — regenerate from the .td file instead.
|
|
| 2585 |
est_gpu += 0.05 # mostly disk I/O + hashing
|
| 2586 |
elif isinstance(cmd, ReportCmd):
|
| 2587 |
est_gpu += 0.01 # just JSON output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2588 |
|
| 2589 |
est_cost = est_gpu * self.GPU_HOURLY
|
| 2590 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
TD Lang Compiler - turns a TDProgram AST into readable Python code that calls td_fuse.
|
| 3 |
|
| 4 |
Phase 1 commands: load, merge, heal, eval, commit.
|
| 5 |
Phase 2 commands: synth, train, debate, diagnose.
|
|
|
|
| 38 |
ResetCmd,
|
| 39 |
RewardContractBlock,
|
| 40 |
SaveCmd,
|
| 41 |
+
ScheduleCmd,
|
| 42 |
SetupBlock,
|
| 43 |
SnapshotCmd,
|
| 44 |
SynthCmd,
|
|
|
|
| 47 |
)
|
| 48 |
from .errors import TDCompileError
|
| 49 |
|
| 50 |
+
# All command types are now implemented (Phase 1 + 2 + 3 + ... + 9)
|
| 51 |
|
| 52 |
|
| 53 |
class TDCompiler:
|
|
|
|
| 87 |
elif isinstance(cmd, MergeCmd):
|
| 88 |
if cmd.target not in seen:
|
| 89 |
raise TDCompileError(
|
| 90 |
+
f"Can't merge into '{cmd.target}' - it hasn't been loaded yet.",
|
| 91 |
hint=f'Add: load "{cmd.source}" as {cmd.target}',
|
| 92 |
)
|
| 93 |
elif isinstance(cmd, (HealCmd, EvalCmd, CommitCmd)):
|
| 94 |
if cmd.target not in seen:
|
| 95 |
raise TDCompileError(
|
| 96 |
+
f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
|
| 97 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 98 |
)
|
| 99 |
elif isinstance(cmd, (SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd)):
|
| 100 |
if cmd.target not in seen:
|
| 101 |
raise TDCompileError(
|
| 102 |
+
f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
|
| 103 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 104 |
)
|
| 105 |
elif isinstance(cmd, ForkCmd):
|
| 106 |
if cmd.source not in seen:
|
| 107 |
raise TDCompileError(
|
| 108 |
+
f"Can't fork '{cmd.source}' - it hasn't been loaded yet.",
|
| 109 |
hint=f'Add: load "model/path" as {cmd.source}',
|
| 110 |
)
|
| 111 |
if cmd.alias in seen:
|
|
|
|
| 116 |
elif isinstance(cmd, (ResetCmd, PruneCmd, EditCmd)):
|
| 117 |
if cmd.target not in seen:
|
| 118 |
raise TDCompileError(
|
| 119 |
+
f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
|
| 120 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 121 |
)
|
| 122 |
elif isinstance(cmd, SnapshotCmd):
|
| 123 |
if cmd.target not in seen:
|
| 124 |
raise TDCompileError(
|
| 125 |
+
f"Can't snapshot '{cmd.target}' - it hasn't been loaded yet.",
|
| 126 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 127 |
)
|
| 128 |
elif isinstance(cmd, ReportCmd):
|
| 129 |
+
pass # report has no target - always valid
|
| 130 |
elif isinstance(cmd, FuseCmd):
|
| 131 |
if cmd.target not in seen:
|
| 132 |
raise TDCompileError(
|
| 133 |
+
f"Can't fuse into '{cmd.target}' - it hasn't been loaded yet.",
|
| 134 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 135 |
)
|
| 136 |
if len(cmd.sources) < 1:
|
|
|
|
| 141 |
elif isinstance(cmd, AbsorbCmd):
|
| 142 |
if cmd.target not in seen:
|
| 143 |
raise TDCompileError(
|
| 144 |
+
f"Can't absorb into '{cmd.target}' - it hasn't been loaded yet.",
|
| 145 |
hint=f'Add: load "model/path" as {cmd.target}',
|
| 146 |
)
|
| 147 |
+
elif isinstance(cmd, (RepeatBlock, IfBlock, ScheduleCmd)):
|
| 148 |
+
pass # block commands - body validation happens at emit time
|
| 149 |
+
elif isinstance(cmd, (NotifyCmd, SaveCmd)):
|
| 150 |
+
pass # utility commands - always valid
|
| 151 |
|
| 152 |
# ---------------------------------------------------------------- Build script
|
| 153 |
def _build_script(self, program: TDProgram) -> None:
|
|
|
|
| 163 |
Compiled: {timestamp}
|
| 164 |
Hash: {source_hash}
|
| 165 |
|
| 166 |
+
DO NOT EDIT - regenerate from the .td file instead.
|
| 167 |
"""'''
|
| 168 |
)
|
| 169 |
self._emit(doc)
|
|
|
|
| 287 |
self._emit_notify(cmd, program)
|
| 288 |
elif isinstance(cmd, SaveCmd):
|
| 289 |
self._emit_save(cmd, program)
|
| 290 |
+
elif isinstance(cmd, ScheduleCmd):
|
| 291 |
+
self._emit_schedule(cmd, program)
|
| 292 |
self._emit("")
|
| 293 |
|
| 294 |
self._emit_summary()
|
|
|
|
| 318 |
self._indent -= 1
|
| 319 |
self._emit("except ImportError:")
|
| 320 |
self._indent += 1
|
| 321 |
+
self._emit('print("[td_lang] huggingface_hub not installed. Storing ref only - download will happen at merge time.")')
|
| 322 |
self._emit("_local_path = _model_ref")
|
| 323 |
self._indent -= 1
|
| 324 |
self._emit("except Exception as e:")
|
|
|
|
| 388 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 389 |
self._emit("if not checkpoint:")
|
| 390 |
self._indent += 1
|
| 391 |
+
self._emit('print("[td_lang] WARNING: No checkpoint to heal - run a merge first.")')
|
| 392 |
self._indent -= 1
|
| 393 |
self._emit("else:")
|
| 394 |
self._indent += 1
|
|
|
|
| 407 |
self._indent -= 1
|
| 408 |
|
| 409 |
def _emit_eval(self, cmd: EvalCmd) -> None:
|
| 410 |
+
"""Generate self-contained evaluation - math, code, reasoning, perplexity.
|
| 411 |
+
|
| 412 |
+
No dependency on td_fuse. Tests the model on real tasks and returns
|
| 413 |
+
pass/fail plus scores per category. Uses 'improved' flag to track
|
| 414 |
+
whether the model got better vs previous eval.
|
| 415 |
+
"""
|
| 416 |
self._emit(f'print("[td_lang] Evaluating {cmd.target}...")')
|
| 417 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 418 |
self._emit("if not checkpoint:")
|
| 419 |
self._indent += 1
|
| 420 |
+
self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
|
| 421 |
self._indent -= 1
|
|
|
|
|
|
|
| 422 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
| 423 |
+
self._emit("import torch, re, ast")
|
| 424 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
| 425 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 426 |
self._indent += 1
|
| 427 |
self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
|
| 428 |
self._indent -= 1
|
| 429 |
self._emit(")")
|
| 430 |
+
self._emit("model.eval()")
|
| 431 |
+
self._emit("")
|
| 432 |
+
self._emit("# Mini-benchmark: math, code, reasoning, perplexity")
|
| 433 |
+
self._emit("eval_tests = {")
|
| 434 |
+
self._indent += 1
|
| 435 |
+
self._emit('"math": [')
|
| 436 |
self._indent += 1
|
| 437 |
+
self._emit('{"prompt": "What is 17 * 23? Answer with just the number.", "answer": "391"},')
|
| 438 |
+
self._emit('{"prompt": "What is 144 / 12? Answer with just the number.", "answer": "12"},')
|
| 439 |
+
self._emit('{"prompt": "What is 256 + 789? Answer with just the number.", "answer": "1045"},')
|
| 440 |
+
self._emit('{"prompt": "What is 15 squared? Answer with just the number.", "answer": "225"},')
|
| 441 |
+
self._emit('{"prompt": "What is the square root of 81? Answer with just the number.", "answer": "9"},')
|
| 442 |
self._indent -= 1
|
| 443 |
+
self._emit("],")
|
| 444 |
+
self._emit('"code": [')
|
| 445 |
+
self._indent += 1
|
| 446 |
+
self._emit('{"prompt": "Write a Python function that returns the sum of a list. Just the function, nothing else.", "check": "def"},')
|
| 447 |
+
self._emit('{"prompt": "Write a Python function to check if a number is prime. Just the function.", "check": "def"},')
|
| 448 |
+
self._emit('{"prompt": "Write a Python one-liner list comprehension that squares numbers 1-10.", "check": "["},')
|
| 449 |
+
self._indent -= 1
|
| 450 |
+
self._emit("],")
|
| 451 |
+
self._emit('"reasoning": [')
|
| 452 |
+
self._indent += 1
|
| 453 |
+
self._emit('{"prompt": "If all dogs are animals, and all animals breathe, do all dogs breathe? Answer yes or no.", "answer": "yes"},')
|
| 454 |
+
self._emit('{"prompt": "A bat and ball cost $1.10 together. The bat costs $1 more than the ball. How much does the ball cost? Answer with just the number.", "answer": "0.05"},')
|
| 455 |
+
self._emit('{"prompt": "If it takes 5 machines 5 minutes to make 5 widgets, how long would it take 100 machines to make 100 widgets? Answer in minutes.", "answer": "5"},')
|
| 456 |
+
self._indent -= 1
|
| 457 |
+
self._emit("],")
|
| 458 |
+
self._indent -= 1
|
| 459 |
+
self._emit("}")
|
| 460 |
+
self._emit("")
|
| 461 |
+
self._emit("eval_result = {'overall': True, 'scores': {}, 'details': {}}")
|
| 462 |
+
self._emit("total_correct = 0")
|
| 463 |
+
self._emit("total_tests = 0")
|
| 464 |
+
self._emit("")
|
| 465 |
+
self._emit("for category, tests in eval_tests.items():")
|
| 466 |
+
self._indent += 1
|
| 467 |
+
self._emit("cat_correct = 0")
|
| 468 |
+
self._emit("cat_details = []")
|
| 469 |
+
self._emit("for test in tests:")
|
| 470 |
+
self._indent += 1
|
| 471 |
+
self._emit("total_tests += 1")
|
| 472 |
+
self._emit('inputs = tok(test["prompt"], return_tensors="pt").to(model.device)')
|
| 473 |
+
self._emit("with torch.no_grad():")
|
| 474 |
+
self._indent += 1
|
| 475 |
+
self._emit("output = model.generate(**inputs, max_new_tokens=256, do_sample=False, temperature=0.0)")
|
| 476 |
+
self._indent -= 1
|
| 477 |
+
self._emit("response = tok.decode(output[0], skip_special_tokens=True)")
|
| 478 |
+
self._emit('# Strip the prompt from the response if model echoes it')
|
| 479 |
+
self._emit('if response.startswith(test["prompt"]):')
|
| 480 |
+
self._indent += 1
|
| 481 |
+
self._emit('response = response[len(test["prompt"]):].strip()')
|
| 482 |
+
self._indent -= 1
|
| 483 |
+
self._emit("passed = False")
|
| 484 |
+
self._emit('if "answer" in test:')
|
| 485 |
+
self._indent += 1
|
| 486 |
+
self._emit('passed = test["answer"].lower() in response.lower()')
|
| 487 |
+
self._indent -= 1
|
| 488 |
+
self._emit('elif "check" in test:')
|
| 489 |
+
self._indent += 1
|
| 490 |
+
self._emit('passed = test["check"] in response')
|
| 491 |
+
self._emit("# Also try to parse as valid Python")
|
| 492 |
+
self._emit("try:")
|
| 493 |
+
self._indent += 1
|
| 494 |
+
self._emit("ast.parse(response)")
|
| 495 |
+
self._indent -= 1
|
| 496 |
+
self._emit("except SyntaxError:")
|
| 497 |
+
self._indent += 1
|
| 498 |
+
self._emit("passed = False # Code doesn't compile")
|
| 499 |
+
self._indent -= 2
|
| 500 |
+
self._emit("if passed:")
|
| 501 |
+
self._indent += 1
|
| 502 |
+
self._emit("cat_correct += 1")
|
| 503 |
+
self._emit("total_correct += 1")
|
| 504 |
+
self._indent -= 1
|
| 505 |
+
self._emit('cat_details.append({"prompt": test["prompt"][:60], "passed": passed})')
|
| 506 |
+
self._indent -= 1
|
| 507 |
+
self._emit("score = cat_correct / max(len(tests), 1)")
|
| 508 |
+
self._emit('eval_result["scores"][category] = round(score, 3)')
|
| 509 |
+
self._emit('eval_result["details"][category] = cat_details')
|
| 510 |
+
self._emit('print(f" {category}: {cat_correct}/{len(tests)} ({score:.0%})")')
|
| 511 |
+
self._indent -= 1
|
| 512 |
+
self._emit("")
|
| 513 |
+
self._emit("# Perplexity test (lower = model is more confident/coherent)")
|
| 514 |
+
self._emit('ppl_text = "The capital of France is Paris. Water boils at 100 degrees Celsius."')
|
| 515 |
+
self._emit('ppl_inputs = tok(ppl_text, return_tensors="pt").to(model.device)')
|
| 516 |
+
self._emit("with torch.no_grad():")
|
| 517 |
+
self._indent += 1
|
| 518 |
+
self._emit('ppl_loss = model(**ppl_inputs, labels=ppl_inputs["input_ids"]).loss')
|
| 519 |
+
self._indent -= 1
|
| 520 |
+
self._emit("perplexity = torch.exp(ppl_loss).item()")
|
| 521 |
+
self._emit('eval_result["perplexity"] = round(perplexity, 2)')
|
| 522 |
+
self._emit('eval_result["scores"]["perplexity"] = "pass" if perplexity < 20.0 else "fail"')
|
| 523 |
+
self._emit('_ppl_label = "pass" if perplexity < 20.0 else "FAIL - too high"')
|
| 524 |
+
self._emit('print(f" perplexity: {perplexity:.2f} ({_ppl_label})")')
|
| 525 |
+
self._emit("")
|
| 526 |
+
self._emit("# Overall score")
|
| 527 |
+
self._emit("overall_score = total_correct / max(total_tests, 1)")
|
| 528 |
+
self._emit('eval_result["overall_score"] = round(overall_score, 3)')
|
| 529 |
+
self._emit('eval_result["overall"] = overall_score >= 0.5 and perplexity < 20.0')
|
| 530 |
+
self._emit('_overall_label = "PASS" if eval_result["overall"] else "FAIL"')
|
| 531 |
+
self._emit('print(f" OVERALL: {total_correct}/{total_tests} ({overall_score:.0%}) - {_overall_label}")')
|
| 532 |
+
self._emit("")
|
| 533 |
+
self._emit("# Track improvement over previous eval")
|
| 534 |
self._emit(f'hist_key = "{cmd.target}_eval_history"')
|
| 535 |
self._emit("if hist_key not in results:")
|
| 536 |
self._indent += 1
|
| 537 |
self._emit("results[hist_key] = []")
|
| 538 |
self._indent -= 1
|
| 539 |
+
self._emit("results[hist_key].append(overall_score)")
|
| 540 |
+
self._emit('eval_result["improved"] = len(results[hist_key]) < 2 or results[hist_key][-1] >= results[hist_key][-2]')
|
| 541 |
+
self._emit(f'results["{cmd.target}_eval"] = eval_result')
|
| 542 |
self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
|
| 543 |
self._indent += 1
|
| 544 |
self._emit('"op": "eval",')
|
| 545 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 546 |
+
self._emit('"overall_score": overall_score,')
|
| 547 |
+
self._emit('"perplexity": perplexity,')
|
| 548 |
self._indent -= 1
|
| 549 |
self._emit("})")
|
| 550 |
if cmd.output:
|
|
|
|
| 559 |
self._emit('print("[td_lang] Eval results:", json.dumps(eval_result, indent=2, default=str))')
|
| 560 |
self._emit("del model, tok")
|
| 561 |
self._emit("import gc; gc.collect()")
|
|
|
|
| 562 |
|
| 563 |
def _emit_commit(self, cmd: CommitCmd, global_gates: Optional[GateBlock]) -> None:
|
| 564 |
gates = cmd.gates or (global_gates.must_pass if global_gates else None)
|
|
|
|
| 592 |
self._indent -= 1
|
| 593 |
self._emit("if failed:")
|
| 594 |
self._indent += 1
|
| 595 |
+
self._emit('raise TDGateError(failed, message="Commit blocked - gates failed")')
|
| 596 |
self._indent -= 1
|
| 597 |
self._emit("else:")
|
| 598 |
self._indent += 1
|
|
|
|
| 631 |
self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
|
| 632 |
self._emit("if not checkpoint:")
|
| 633 |
self._indent += 1
|
| 634 |
+
self._emit('print("[td_lang] WARNING: No checkpoint - using model_ref instead.")')
|
| 635 |
self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
|
| 636 |
self._indent -= 1
|
| 637 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
|
|
|
| 667 |
self._emit('print(f" Response: {response[:200]}...")')
|
| 668 |
self._emit("print()")
|
| 669 |
self._indent -= 1
|
| 670 |
+
self._emit("")
|
| 671 |
+
self._emit("# Parse responses into structured weakness categories")
|
| 672 |
+
self._emit("import re as _re")
|
| 673 |
+
self._emit("weakness_categories = {")
|
| 674 |
+
self._indent += 1
|
| 675 |
+
self._emit("'math': ['math', 'arithmetic', 'calculation', 'algebra', 'geometry', 'calculus'],")
|
| 676 |
+
self._emit("'code': ['code', 'coding', 'programming', 'debug', 'syntax', 'algorithm'],")
|
| 677 |
+
self._emit("'logic': ['logic', 'reasoning', 'inference', 'fallac', 'deduction', 'chain'],")
|
| 678 |
+
self._emit("'factual': ['factual', 'hallucin', 'accuracy', 'knowledge', 'recall', 'memory'],")
|
| 679 |
+
self._emit("'creativity': ['creative', 'creativity', 'imagination', 'novel', 'original'],")
|
| 680 |
+
self._emit("'instruction': ['instruction', 'follow', 'format', 'comply', 'understand'],")
|
| 681 |
+
self._indent -= 1
|
| 682 |
+
self._emit("}")
|
| 683 |
+
self._emit("")
|
| 684 |
+
self._emit("weakness_scores = {cat: 0 for cat in weakness_categories}")
|
| 685 |
+
self._emit("for d in diagnose_results:")
|
| 686 |
+
self._indent += 1
|
| 687 |
+
self._emit("resp_lower = d['response'].lower()")
|
| 688 |
+
self._emit("for cat, keywords in weakness_categories.items():")
|
| 689 |
+
self._indent += 1
|
| 690 |
+
self._emit("for kw in keywords:")
|
| 691 |
+
self._indent += 1
|
| 692 |
+
self._emit("if kw in resp_lower:")
|
| 693 |
+
self._indent += 1
|
| 694 |
+
self._emit("weakness_scores[cat] += 1")
|
| 695 |
+
self._emit("break")
|
| 696 |
+
self._indent -= 3
|
| 697 |
+
self._indent -= 1
|
| 698 |
+
self._emit("")
|
| 699 |
+
self._emit("# Rank weaknesses by how many prompts mentioned them")
|
| 700 |
+
self._emit("ranked = sorted(weakness_scores.items(), key=lambda x: x[1], reverse=True)")
|
| 701 |
+
self._emit("top_weaknesses = [cat for cat, score in ranked if score > 0][:4]")
|
| 702 |
+
self._emit("if not top_weaknesses:")
|
| 703 |
+
self._indent += 1
|
| 704 |
+
self._emit("top_weaknesses = ['math', 'logic', 'code'] # safe defaults")
|
| 705 |
+
self._indent -= 1
|
| 706 |
+
self._emit("")
|
| 707 |
+
self._emit("diagnosis = {")
|
| 708 |
+
self._indent += 1
|
| 709 |
+
self._emit("'raw_responses': diagnose_results,")
|
| 710 |
+
self._emit("'weakness_scores': weakness_scores,")
|
| 711 |
+
self._emit("'top_weaknesses': top_weaknesses,")
|
| 712 |
+
self._emit("'ranked': ranked,")
|
| 713 |
+
self._indent -= 1
|
| 714 |
+
self._emit("}")
|
| 715 |
+
self._emit("print('[td_lang] Weakness ranking:')")
|
| 716 |
+
self._emit("for cat, score in ranked:")
|
| 717 |
+
self._indent += 1
|
| 718 |
+
self._emit("if score > 0:")
|
| 719 |
+
self._indent += 1
|
| 720 |
+
self._emit("print(f' {cat}: mentioned in {score}/{len(diag_prompts)} prompts')")
|
| 721 |
+
self._indent -= 2
|
| 722 |
+
self._emit("print(f'[td_lang] Top weaknesses to target: {top_weaknesses}')")
|
| 723 |
+
self._emit("")
|
| 724 |
+
self._emit(f'results["{cmd.target}_diagnose"] = diagnosis')
|
| 725 |
self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
|
| 726 |
self._indent += 1
|
| 727 |
self._emit('"op": "diagnose",')
|
| 728 |
self._emit('"n_prompts": len(diag_prompts),')
|
| 729 |
+
self._emit('"top_weaknesses": top_weaknesses,')
|
| 730 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 731 |
self._indent -= 1
|
| 732 |
self._emit("})")
|
|
|
|
| 735 |
self._emit("diag_path.parent.mkdir(parents=True, exist_ok=True)")
|
| 736 |
self._emit('with open(diag_path, "w") as f:')
|
| 737 |
self._indent += 1
|
| 738 |
+
self._emit("json.dump(diagnosis, f, indent=2, default=str)")
|
| 739 |
self._indent -= 1
|
| 740 |
self._emit('print(f"[td_lang] Diagnosis saved to {diag_path}")')
|
| 741 |
self._emit("del model, tok")
|
|
|
|
| 767 |
self._emit(")")
|
| 768 |
self._emit("model.eval()")
|
| 769 |
self._emit("")
|
| 770 |
+
self._emit("# Use structured diagnosis if available (upgraded diagnose outputs top_weaknesses)")
|
| 771 |
+
self._emit(f'diag = results.get("{cmd.target}_diagnose", {{}})')
|
| 772 |
+
self._emit("if isinstance(diag, dict) and 'top_weaknesses' in diag:")
|
| 773 |
+
self._indent += 1
|
| 774 |
+
self._emit("weak_topics = diag['top_weaknesses']")
|
| 775 |
+
self._emit("print(f'[td_lang] Targeting weaknesses from diagnosis: {weak_topics}')")
|
| 776 |
+
self._indent -= 1
|
| 777 |
+
self._emit("else:")
|
| 778 |
+
self._indent += 1
|
| 779 |
+
self._emit("# Fallback: scan raw responses for weakness keywords")
|
| 780 |
self._emit("weak_topics = []")
|
| 781 |
+
self._emit("raw = diag if isinstance(diag, list) else diag.get('raw_responses', [])")
|
| 782 |
+
self._emit("for d in raw:")
|
| 783 |
self._indent += 1
|
| 784 |
self._emit("resp = d.get('response', '')")
|
| 785 |
+
self._emit("for topic in ['math', 'code', 'logic', 'factual']:")
|
| 786 |
self._indent += 1
|
| 787 |
+
self._emit("if topic in resp.lower() and topic not in weak_topics:")
|
| 788 |
self._indent += 1
|
| 789 |
self._emit("weak_topics.append(topic)")
|
| 790 |
self._indent -= 1
|
| 791 |
self._indent -= 1
|
| 792 |
self._indent -= 1
|
| 793 |
+
self._indent -= 1
|
| 794 |
self._emit("if not weak_topics:")
|
| 795 |
self._indent += 1
|
| 796 |
self._emit("weak_topics = ['math', 'code', 'logic', 'factual']")
|
|
|
|
| 810 |
self._indent -= 1
|
| 811 |
self._emit("}")
|
| 812 |
self._emit("")
|
| 813 |
+
self._emit("# Seed problems - model generates MORE from these (not just these 4)")
|
| 814 |
+
self._emit("seed_problems = {")
|
| 815 |
+
self._indent += 1
|
| 816 |
+
self._emit("'math': [")
|
| 817 |
+
self._indent += 1
|
| 818 |
+
self._emit("'Compute (17*19 - 121) / 3',")
|
| 819 |
+
self._emit("'Find the derivative of x^3 + 2x^2 - 5x + 7',")
|
| 820 |
+
self._emit("'Solve for x: 3x + 7 = 22',")
|
| 821 |
+
self._emit("'What is the sum of the first 20 positive integers?',")
|
| 822 |
+
self._emit("'A rectangle has area 48 and perimeter 28. Find its dimensions.',")
|
| 823 |
+
self._emit("'Calculate 15% of 240',")
|
| 824 |
+
self._indent -= 1
|
| 825 |
+
self._emit("],")
|
| 826 |
+
self._emit("'code': [")
|
| 827 |
+
self._indent += 1
|
| 828 |
+
self._emit("'Implement binary search in Python',")
|
| 829 |
+
self._emit("'Write a function to reverse a linked list',")
|
| 830 |
+
self._emit("'Parse a CSV file and compute column averages',")
|
| 831 |
+
self._emit("'Implement a LRU cache with O(1) get and put',")
|
| 832 |
+
self._emit("'Write a function to find all permutations of a string',")
|
| 833 |
+
self._emit("'Implement merge sort',")
|
| 834 |
+
self._indent -= 1
|
| 835 |
+
self._emit("],")
|
| 836 |
+
self._emit("'logic': [")
|
| 837 |
self._indent += 1
|
| 838 |
+
self._emit("'If all A are B and all B are C, are all A C? Explain your reasoning.',")
|
| 839 |
+
self._emit("'A says B is lying. B says C is lying. C says both A and B are lying. Who is telling the truth?',")
|
| 840 |
+
self._emit("'Three boxes: one has gold, one has silver, one is empty. Box A says gold is in B. Box B says gold is in B. Box C says gold is not in A. Only one tells truth. Where is the gold?',")
|
| 841 |
+
self._emit("'If it takes 5 machines 5 minutes to make 5 widgets, how long does it take 100 machines to make 100 widgets?',")
|
| 842 |
+
self._indent -= 1
|
| 843 |
+
self._emit("],")
|
| 844 |
+
self._emit("'factual': [")
|
| 845 |
self._indent += 1
|
| 846 |
+
self._emit("'Explain the difference between TCP and UDP in networking',")
|
| 847 |
+
self._emit("'What are the three laws of thermodynamics?',")
|
| 848 |
+
self._emit("'Describe how transformers work in machine learning',")
|
| 849 |
+
self._emit("'What causes tides on Earth?',")
|
| 850 |
+
self._indent -= 1
|
| 851 |
+
self._emit("],")
|
| 852 |
self._indent -= 1
|
| 853 |
+
self._emit("}")
|
| 854 |
+
self._emit("")
|
| 855 |
+
self._emit("# Ask the model to generate MORE problems like the seeds")
|
| 856 |
+
self._emit("print('[td_lang] Generating problem bank from seeds...')")
|
| 857 |
+
self._emit("problem_bank = dict(seed_problems) # start with seeds")
|
| 858 |
+
self._emit("for domain in weak_topics:")
|
| 859 |
+
self._indent += 1
|
| 860 |
+
self._emit("if domain not in seed_problems:")
|
| 861 |
+
self._indent += 1
|
| 862 |
+
self._emit("continue")
|
| 863 |
+
self._indent -= 1
|
| 864 |
+
self._emit("examples = '; '.join(seed_problems.get(domain, [])[:3])")
|
| 865 |
+
self._emit("gen_prompt = f'Generate 10 diverse {domain} problems similar to: {examples}. List them numbered 1-10, one per line.'")
|
| 866 |
+
self._emit('gen_inputs = tok(gen_prompt, return_tensors="pt").to(model.device)')
|
| 867 |
+
self._emit("with torch.no_grad():")
|
| 868 |
self._indent += 1
|
| 869 |
+
self._emit("gen_out = model.generate(**gen_inputs, max_new_tokens=512, do_sample=True, temperature=0.9)")
|
| 870 |
self._indent -= 1
|
| 871 |
+
self._emit("gen_text = tok.decode(gen_out[0], skip_special_tokens=True)")
|
| 872 |
+
self._emit("# Parse numbered lines as new problems")
|
| 873 |
+
self._emit("for line in gen_text.split(chr(10)):")
|
| 874 |
self._indent += 1
|
| 875 |
+
self._emit("line = re.sub(r'^\\d+[.)\\s]+', '', line.strip())")
|
| 876 |
+
self._emit("if len(line) > 15:")
|
| 877 |
+
self._indent += 1
|
| 878 |
+
self._emit("problem_bank.setdefault(domain, []).append(line)")
|
| 879 |
+
self._indent -= 2
|
| 880 |
self._indent -= 1
|
| 881 |
+
self._emit("total_problems = sum(len(v) for v in problem_bank.values())")
|
| 882 |
+
self._emit("print(f'[td_lang] Problem bank: {total_problems} problems across {len(problem_bank)} domains')")
|
| 883 |
+
self._emit("")
|
| 884 |
+
self._emit("def make_problem(domain: str) -> str:")
|
| 885 |
+
self._indent += 1
|
| 886 |
+
self._emit("pool = problem_bank.get(domain, problem_bank.get('math', ['Solve 2+2']))")
|
| 887 |
+
self._emit("return random.choice(pool)")
|
| 888 |
self._indent -= 1
|
| 889 |
self._emit("")
|
| 890 |
self._emit("synth_data = []")
|
|
|
|
| 971 |
self._emit("")
|
| 972 |
|
| 973 |
if cmd.method == "grpo":
|
| 974 |
+
self._emit("# GRPO training with QLoRA (test_15: 64 steps sweet spot)")
|
| 975 |
+
self._emit("# QLoRA = 4-bit base model + LoRA adapters = fits on 24GB 4090")
|
| 976 |
self._emit("from trl import GRPOConfig, GRPOTrainer")
|
| 977 |
+
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig")
|
| 978 |
+
self._emit("from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training")
|
| 979 |
self._emit("from datasets import load_dataset")
|
| 980 |
self._emit("import torch")
|
| 981 |
self._emit("")
|
| 982 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
| 983 |
+
self._emit("if tok.pad_token is None:")
|
| 984 |
+
self._indent += 1
|
| 985 |
+
self._emit("tok.pad_token = tok.eos_token")
|
| 986 |
+
self._indent -= 1
|
| 987 |
+
self._emit("")
|
| 988 |
+
self._emit("# 4-bit quantization - shrinks 7B model from 14GB to ~4GB VRAM")
|
| 989 |
+
self._emit("bnb_config = BitsAndBytesConfig(")
|
| 990 |
+
self._indent += 1
|
| 991 |
+
self._emit("load_in_4bit=True,")
|
| 992 |
+
self._emit('bnb_4bit_quant_type="nf4",')
|
| 993 |
+
self._emit("bnb_4bit_compute_dtype=torch.bfloat16,")
|
| 994 |
+
self._emit("bnb_4bit_use_double_quant=True,")
|
| 995 |
+
self._indent -= 1
|
| 996 |
+
self._emit(")")
|
| 997 |
+
self._emit("")
|
| 998 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 999 |
self._indent += 1
|
| 1000 |
+
self._emit("checkpoint,")
|
| 1001 |
+
self._emit("quantization_config=bnb_config,")
|
| 1002 |
+
self._emit('device_map="auto",')
|
| 1003 |
self._indent -= 1
|
| 1004 |
self._emit(")")
|
| 1005 |
+
self._emit("model = prepare_model_for_kbit_training(model)")
|
| 1006 |
+
self._emit("")
|
| 1007 |
+
self._emit("# LoRA adapters on mid-to-late layers (test_12: layers 16-28 for 32-layer)")
|
| 1008 |
+
self._emit("lora_config = LoraConfig(")
|
| 1009 |
+
self._indent += 1
|
| 1010 |
+
self._emit("r=32,")
|
| 1011 |
+
self._emit("lora_alpha=64,")
|
| 1012 |
+
self._emit("lora_dropout=0.05,")
|
| 1013 |
+
self._emit('target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],')
|
| 1014 |
+
self._emit('task_type="CAUSAL_LM",')
|
| 1015 |
+
self._indent -= 1
|
| 1016 |
+
self._emit(")")
|
| 1017 |
+
self._emit("model = get_peft_model(model, lora_config)")
|
| 1018 |
+
self._emit("model.print_trainable_parameters() # Shows ~1-2% trainable vs total")
|
| 1019 |
self._emit("")
|
| 1020 |
self._emit(f'# Load training data')
|
| 1021 |
self._emit(f'dataset_path = "{cmd.dataset}"')
|
|
|
|
| 1038 |
self._emit('output_dir="td_lang_outputs/grpo_training",')
|
| 1039 |
self._emit("save_steps=16,")
|
| 1040 |
self._emit('bf16=True,')
|
| 1041 |
+
self._emit("gradient_checkpointing=True, # saves VRAM at slight speed cost")
|
| 1042 |
self._indent -= 1
|
| 1043 |
self._emit(")")
|
| 1044 |
self._emit("")
|
|
|
|
| 1142 |
self._emit("if logs['kl'] > 3.1 * ma:")
|
| 1143 |
self._indent += 1
|
| 1144 |
self._emit("control.should_training_stop = True")
|
| 1145 |
+
self._emit("print('[td_lang][early_stop] KL spike detected - stopping GRPO')")
|
| 1146 |
self._indent -= 2
|
| 1147 |
self._indent -= 1
|
| 1148 |
self._emit("if 'eval/reward' in logs:")
|
|
|
|
| 1151 |
self._emit("if len(self.eval_rewards) >= 2 and self.eval_rewards[-1] < self.eval_rewards[-2]:")
|
| 1152 |
self._indent += 1
|
| 1153 |
self._emit("control.should_training_stop = True")
|
| 1154 |
+
self._emit("print('[td_lang][early_stop] Validation reward drop - stopping GRPO')")
|
| 1155 |
self._indent -= 1
|
| 1156 |
self._indent -= 1
|
| 1157 |
self._emit("if 'policy_entropy' in logs:")
|
|
|
|
| 1163 |
self._emit("if self.entropy_history[-1] < 0.93 * baseline:")
|
| 1164 |
self._indent += 1
|
| 1165 |
self._emit("control.should_training_stop = True")
|
| 1166 |
+
self._emit("print('[td_lang][early_stop] Diversity collapsed - stopping GRPO')")
|
| 1167 |
self._indent -= 2
|
| 1168 |
self._indent -= 2
|
| 1169 |
self._indent -= 1
|
|
|
|
| 1182 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = "td_lang_outputs/grpo_trained"')
|
| 1183 |
|
| 1184 |
elif cmd.method in ("sft", "dpo"):
|
| 1185 |
+
self._emit(f"# {cmd.method.upper()} training with QLoRA (fits on 24GB 4090)")
|
| 1186 |
+
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig")
|
| 1187 |
+
self._emit("from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training")
|
| 1188 |
if cmd.method == "sft":
|
| 1189 |
self._emit("from trl import SFTTrainer")
|
| 1190 |
else:
|
|
|
|
| 1193 |
self._emit("import torch")
|
| 1194 |
self._emit("")
|
| 1195 |
self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
|
| 1196 |
+
self._emit("if tok.pad_token is None:")
|
| 1197 |
+
self._indent += 1
|
| 1198 |
+
self._emit("tok.pad_token = tok.eos_token")
|
| 1199 |
+
self._indent -= 1
|
| 1200 |
+
self._emit("")
|
| 1201 |
+
self._emit("bnb_config = BitsAndBytesConfig(")
|
| 1202 |
+
self._indent += 1
|
| 1203 |
+
self._emit("load_in_4bit=True,")
|
| 1204 |
+
self._emit('bnb_4bit_quant_type="nf4",')
|
| 1205 |
+
self._emit("bnb_4bit_compute_dtype=torch.bfloat16,")
|
| 1206 |
+
self._emit("bnb_4bit_use_double_quant=True,")
|
| 1207 |
+
self._indent -= 1
|
| 1208 |
+
self._emit(")")
|
| 1209 |
self._emit("model = AutoModelForCausalLM.from_pretrained(")
|
| 1210 |
self._indent += 1
|
| 1211 |
+
self._emit("checkpoint, quantization_config=bnb_config, device_map='auto',")
|
| 1212 |
self._indent -= 1
|
| 1213 |
self._emit(")")
|
| 1214 |
+
self._emit("model = prepare_model_for_kbit_training(model)")
|
| 1215 |
+
self._emit("lora_config = LoraConfig(r=32, lora_alpha=64, lora_dropout=0.05,")
|
| 1216 |
+
self._emit(' target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],')
|
| 1217 |
+
self._emit(' task_type="CAUSAL_LM")')
|
| 1218 |
+
self._emit("model = get_peft_model(model, lora_config)")
|
| 1219 |
self._emit(f'dataset_path = "{cmd.dataset}"')
|
| 1220 |
self._emit("if dataset_path.endswith('.jsonl'):")
|
| 1221 |
self._indent += 1
|
|
|
|
| 1426 |
# ---------------------------------------------------------------- Phase 3 emitters
|
| 1427 |
|
| 1428 |
def _emit_edit(self, cmd: EditCmd) -> None:
|
| 1429 |
+
"""EDIT - surgical LoRA/DoRA on specific layers.
|
| 1430 |
|
| 1431 |
From test_18: all 3 AIs agree LoRA is safe default, DoRA beats by 1-4%.
|
| 1432 |
layers_to_transform supports targeting specific layers (e.g., 16-28).
|
|
|
|
| 1437 |
layers = cmd.layers # "all", "16-28", or single number
|
| 1438 |
lr = cmd.learning_rate or 1e-4
|
| 1439 |
|
| 1440 |
+
self._emit(f'print("[td_lang] EDIT - surgical {method} on {alias}, layers={layers}")')
|
| 1441 |
self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
|
| 1442 |
self._emit("import torch")
|
| 1443 |
self._emit("from peft import LoraConfig, get_peft_model, PeftModel")
|
|
|
|
| 1493 |
self._emit("")
|
| 1494 |
|
| 1495 |
# Apply adapter
|
| 1496 |
+
self._emit("# Inject adapter - base weights stay frozen")
|
| 1497 |
self._emit("model = get_peft_model(model, edit_config)")
|
| 1498 |
self._emit("model.print_trainable_parameters()")
|
| 1499 |
self._emit("")
|
|
|
|
| 1512 |
self._indent -= 1
|
| 1513 |
self._emit("")
|
| 1514 |
|
| 1515 |
+
# "Try before buy" - actual eval with adapters on vs off
|
| 1516 |
self._emit('sample_prompts = ["What is 7+8?", "Explain photosynthesis in one paragraph.", "Write a Python function fib(n)."]')
|
| 1517 |
self._emit("def run_quick_eval(enable_adapters: bool):")
|
| 1518 |
self._indent += 1
|
|
|
|
| 1552 |
self._indent -= 1
|
| 1553 |
self._emit("")
|
| 1554 |
|
| 1555 |
+
# Save adapter (don't merge yet - let commit/gates decide)
|
| 1556 |
self._emit(f'edit_save_dir = os.path.join(output_dir, "{alias}_edit_{method}")')
|
| 1557 |
self._emit("os.makedirs(edit_save_dir, exist_ok=True)")
|
| 1558 |
self._emit("model.save_pretrained(edit_save_dir)")
|
| 1559 |
self._emit(f'print(f"[td_lang] EDIT adapter saved to {{edit_save_dir}}")')
|
| 1560 |
+
self._emit(f'print("[td_lang] Adapter NOT merged - use commit with gates to merge permanently")')
|
| 1561 |
self._emit("")
|
| 1562 |
|
| 1563 |
# Update models dict
|
| 1564 |
self._emit(f'models["{alias}"] = model')
|
| 1565 |
|
| 1566 |
def _emit_fork(self, cmd: ForkCmd) -> None:
|
| 1567 |
+
"""FORK - branch current model weights for parallel experiments.
|
| 1568 |
|
| 1569 |
From test_18: all 3 AIs say disk-based only on 4090.
|
| 1570 |
Cheap fork = copy manifest + adapter files, share base weights.
|
|
|
|
| 1573 |
source = cmd.source
|
| 1574 |
alias = cmd.alias
|
| 1575 |
|
| 1576 |
+
self._emit(f'print("[td_lang] FORK - branching {source} as {alias}")')
|
| 1577 |
self._emit(f'source_model = models["{source}"]')
|
| 1578 |
self._emit("import torch")
|
| 1579 |
self._emit("")
|
|
|
|
| 1586 |
self._emit("")
|
| 1587 |
|
| 1588 |
# Write manifest
|
| 1589 |
+
self._emit("# Write fork manifest - tracks lineage")
|
| 1590 |
self._emit("import json")
|
| 1591 |
self._emit("fork_manifest = {")
|
| 1592 |
self._emit(f' "fork_name": "{alias}",')
|
|
|
|
| 1601 |
self._emit("is_peft = hasattr(source_model, 'peft_config')")
|
| 1602 |
self._emit("if is_peft:")
|
| 1603 |
self._indent += 1
|
| 1604 |
+
self._emit("# PEFT model - save only adapter weights (small, fast)")
|
| 1605 |
self._emit('adapter_dir = os.path.join(fork_dir, "adapters")')
|
| 1606 |
self._emit("source_model.save_pretrained(adapter_dir)")
|
| 1607 |
self._emit('fork_manifest["fork_type"] = "adapter"')
|
|
|
|
| 1610 |
self._indent -= 1
|
| 1611 |
self._emit("else:")
|
| 1612 |
self._indent += 1
|
| 1613 |
+
self._emit("# Full model - clone tensors then save to safetensors")
|
| 1614 |
self._emit("from safetensors.torch import save_file")
|
| 1615 |
self._emit("state = {k: v.detach().cpu().clone() for k, v in source_model.state_dict().items()}")
|
| 1616 |
self._emit('ckpt_path = os.path.join(fork_dir, "model.safetensors")')
|
|
|
|
| 1650 |
self._emit(f'lineage["{alias}"] = {{"forked_from": "{source}", "operations": []}}')
|
| 1651 |
|
| 1652 |
def _emit_reset(self, cmd: ResetCmd) -> None:
|
| 1653 |
+
"""RESET - revert model to a previous checkpoint.
|
| 1654 |
|
| 1655 |
From test_18: del model, clear CUDA cache, reload.
|
| 1656 |
Must also reset optimizer state. Use assign=True to avoid doubling VRAM.
|
|
|
|
| 1658 |
alias = cmd.target
|
| 1659 |
checkpoint = cmd.checkpoint
|
| 1660 |
|
| 1661 |
+
self._emit(f'print("[td_lang] RESET - reverting {alias} to {checkpoint}")')
|
| 1662 |
self._emit("")
|
| 1663 |
|
| 1664 |
# Delete current model and clear CUDA
|
|
|
|
| 1683 |
self._emit("")
|
| 1684 |
self._emit("if fork_manifest_path and os.path.exists(fork_manifest_path):")
|
| 1685 |
self._indent += 1
|
| 1686 |
+
self._emit("# Loading from a fork - read manifest")
|
| 1687 |
self._emit("import json")
|
| 1688 |
self._emit("with open(fork_manifest_path) as f:")
|
| 1689 |
self._indent += 1
|
|
|
|
| 1707 |
self._emit("# Loading from a safetensors file")
|
| 1708 |
self._emit("from safetensors.torch import load_file")
|
| 1709 |
self._emit("state = load_file(ckpt_path, device='cpu')")
|
| 1710 |
+
self._emit("# Need base model architecture - reload from original")
|
| 1711 |
self._emit(f'base_ref = models.get("__base_ref_{alias}", ckpt_path)')
|
| 1712 |
self._emit("model = AutoModelForCausalLM.from_pretrained(base_ref, torch_dtype=torch.float16, device_map='cuda')")
|
| 1713 |
self._emit("try:")
|
|
|
|
| 1724 |
|
| 1725 |
# Re-register in models dict
|
| 1726 |
self._emit(f'models["{alias}"] = model')
|
| 1727 |
+
self._emit(f'print(f"[td_lang] RESET complete - {alias} restored from {checkpoint}")')
|
| 1728 |
self._emit("")
|
| 1729 |
|
| 1730 |
# Optimizer/cache handling and quick smoke eval
|
|
|
|
| 1746 |
self._indent -= 1
|
| 1747 |
|
| 1748 |
def _emit_prune(self, cmd: PruneCmd) -> None:
|
| 1749 |
+
"""PRUNE - structural pruning of language backbone.
|
| 1750 |
|
| 1751 |
From test_18: 20% structured max (LLM-Pruner). Wanda metric (Grok).
|
| 1752 |
Language backbone only, never vision encoder. Recovery: 200-800 steps LoRA.
|
|
|
|
| 1756 |
aggressiveness = cmd.aggressiveness
|
| 1757 |
|
| 1758 |
self._emit("import torch")
|
| 1759 |
+
self._emit(f'print("[td_lang] PRUNE - {method} pruning on {alias}, {aggressiveness*100:.0f}% removal")')
|
| 1760 |
self._emit(f'model = models["{alias}"]')
|
| 1761 |
self._emit("")
|
| 1762 |
|
|
|
|
| 1770 |
self._emit("")
|
| 1771 |
|
| 1772 |
# Identify language-only layers (skip vision)
|
| 1773 |
+
self._emit("# Target language backbone ONLY - never prune vision encoder")
|
| 1774 |
self._emit("# Filter for language model linear layers")
|
| 1775 |
self._emit("target_modules = []")
|
| 1776 |
self._emit("for name, module in model.named_modules():")
|
|
|
|
| 1884 |
self._indent -= 1
|
| 1885 |
self._indent -= 1
|
| 1886 |
else: # taylor
|
| 1887 |
+
self._emit("# Taylor: gradient-based importance (needs backprop - VRAM heavy)")
|
| 1888 |
+
self._emit("# Falling back to magnitude as MVP - Taylor needs calibration + backprop")
|
| 1889 |
self._emit(f'print("[td_lang] WARNING: Taylor pruning falls back to magnitude on single GPU")')
|
| 1890 |
self._emit("import torch.nn.utils.prune as prune")
|
| 1891 |
self._emit("")
|
|
|
|
| 1937 |
# ---------------------------------------------------------------- Phase 7: Loop Control emitters
|
| 1938 |
|
| 1939 |
def _emit_cmd(self, cmd, program: TDProgram) -> None:
|
| 1940 |
+
"""Emit a single command - used by repeat/if to emit body commands."""
|
| 1941 |
if isinstance(cmd, LoadCmd):
|
| 1942 |
self._emit_load(cmd)
|
| 1943 |
elif isinstance(cmd, MergeCmd):
|
|
|
|
| 1980 |
self._emit_repeat(cmd, program)
|
| 1981 |
elif isinstance(cmd, IfBlock):
|
| 1982 |
self._emit_if(cmd, program)
|
| 1983 |
+
elif isinstance(cmd, ScheduleCmd):
|
| 1984 |
+
self._emit_schedule(cmd, program)
|
| 1985 |
|
| 1986 |
def _emit_repeat(self, cmd: RepeatBlock, program: TDProgram) -> None:
|
| 1987 |
+
"""REPEAT - run a block of commands N times.
|
| 1988 |
|
| 1989 |
This is the core of td_loop: the self-improvement cycle.
|
| 1990 |
Each iteration runs the body commands in order.
|
| 1991 |
"""
|
| 1992 |
n = cmd.count
|
| 1993 |
+
self._emit(f'print("[td_lang] REPEAT - running {n} iterations")')
|
| 1994 |
self._emit(f"for _loop_iter in range({n}):")
|
| 1995 |
self._indent += 1
|
| 1996 |
self._emit(f'print(f"[td_lang] === Iteration {{_loop_iter + 1}}/{n} ===")')
|
|
|
|
| 2000 |
self._emit("elapsed_hours = (time.time() - start_time) / 3600")
|
| 2001 |
self._emit(f"if elapsed_hours >= {program.budget.max_gpu_hours}:")
|
| 2002 |
self._indent += 1
|
| 2003 |
+
self._emit('print("[td_lang] Budget exceeded inside repeat - stopping loop.")')
|
| 2004 |
self._emit("break")
|
| 2005 |
self._indent -= 1
|
| 2006 |
self._emit("")
|
|
|
|
| 2009 |
self._emit("")
|
| 2010 |
self._emit(f'print(f"[td_lang] Iteration {{_loop_iter + 1}}/{n} complete.")')
|
| 2011 |
self._indent -= 1
|
| 2012 |
+
self._emit(f'print("[td_lang] REPEAT complete - {n} iterations done.")')
|
| 2013 |
|
| 2014 |
def _emit_if(self, cmd: IfBlock, program: TDProgram) -> None:
|
| 2015 |
+
"""IF/ELSE - conditional execution based on eval results.
|
| 2016 |
|
| 2017 |
Conditions:
|
| 2018 |
- eval_passed: last eval for target had no failures
|
|
|
|
| 2022 |
condition = cmd.condition
|
| 2023 |
target = cmd.target
|
| 2024 |
|
| 2025 |
+
self._emit(f'print("[td_lang] IF - checking {condition} for {target}")')
|
| 2026 |
self._emit("")
|
| 2027 |
|
| 2028 |
# Emit condition check
|
|
|
|
| 2065 |
self._indent -= 1
|
| 2066 |
|
| 2067 |
def _emit_break_if(self, cmd: BreakIfCmd) -> None:
|
| 2068 |
+
"""BREAK_IF - early exit from repeat based on condition."""
|
| 2069 |
condition = cmd.condition
|
| 2070 |
target = cmd.target or ""
|
| 2071 |
self._emit(f'_brk_eval = results.get("{target}_eval", {{}})')
|
|
|
|
| 2078 |
self._emit(f"_brk_met = bool(results.get('{target}_{condition}', False))")
|
| 2079 |
self._emit("if _brk_met:")
|
| 2080 |
self._indent += 1
|
| 2081 |
+
self._emit('print("[td_lang] break_if triggered - exiting loop")')
|
| 2082 |
self._emit("break")
|
| 2083 |
self._indent -= 1
|
| 2084 |
|
| 2085 |
# ---------------------------------------------------------------- Phase 6: Easy Merge emitters
|
| 2086 |
|
| 2087 |
def _emit_fuse(self, cmd: FuseCmd) -> None:
|
| 2088 |
+
"""FUSE - merge multiple models into target in one command.
|
| 2089 |
|
| 2090 |
From TD merge strategy: Transport and Merge (optimal transport cross-arch merging).
|
| 2091 |
+
All 5 source models have different architectures - Transport and Merge handles this.
|
| 2092 |
Merge into language backbone only, vision encoder stays untouched.
|
| 2093 |
"""
|
| 2094 |
target = cmd.target
|
|
|
|
| 2097 |
strategy = cmd.strategy
|
| 2098 |
n = len(sources)
|
| 2099 |
|
| 2100 |
+
self._emit(f'print("[td_lang] FUSE - merging {n} models into {target} using {method}")')
|
| 2101 |
self._emit(f'print("[td_lang] Strategy: {strategy}")')
|
| 2102 |
self._emit(f"fuse_sources = {sources}")
|
| 2103 |
self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
|
|
|
|
| 2113 |
self._emit(f"strengths = [round(0.5 * (0.8 ** i), 3) for i in range({n})]")
|
| 2114 |
self._emit('print(f"[td_lang] Sequential strategy: strengths = {strengths}")')
|
| 2115 |
else:
|
| 2116 |
+
# weighted - default to equal if no weights specified
|
| 2117 |
self._emit(f"per_model_strength = round(1.0 / ({n} + 1), 3)")
|
| 2118 |
self._emit("")
|
| 2119 |
|
|
|
|
| 2202 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 2203 |
self._indent -= 1
|
| 2204 |
self._emit("})")
|
| 2205 |
+
self._emit(f'print("[td_lang] FUSE complete - {n} models merged into {target}")')
|
| 2206 |
|
| 2207 |
def _emit_absorb(self, cmd: AbsorbCmd) -> None:
|
| 2208 |
+
"""ABSORB - simplified single-model merge.
|
| 2209 |
|
| 2210 |
One-liner shortcut: absorb "model" into target [strength 0.5]
|
| 2211 |
Wraps the merge logic with sensible defaults.
|
|
|
|
| 2214 |
target = cmd.target
|
| 2215 |
strength = cmd.strength
|
| 2216 |
|
| 2217 |
+
self._emit(f'print("[td_lang] ABSORB - merging {source} into {target} (strength={strength})")')
|
| 2218 |
self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
|
| 2219 |
self._emit("")
|
| 2220 |
|
|
|
|
| 2308 |
self._emit('"timestamp": datetime.now().isoformat(),')
|
| 2309 |
self._indent -= 1
|
| 2310 |
self._emit("})")
|
| 2311 |
+
self._emit(f'print("[td_lang] ABSORB complete - {source} merged into {target}")')
|
| 2312 |
|
| 2313 |
# ---------------------------------------------------------------- Phase 4 emitters
|
| 2314 |
|
| 2315 |
def _emit_data_contract(self, dc: DataContractBlock) -> None:
|
| 2316 |
+
"""Emit data contract validation - checked at synth/train time.
|
| 2317 |
|
| 2318 |
From ForgeSpec 2.0 (test_17): data contracts enforce schema on training data.
|
| 2319 |
Required fields, minimum samples, max perplexity.
|
|
|
|
| 2381 |
self._emit("")
|
| 2382 |
|
| 2383 |
def _emit_reward_contract(self, rc: RewardContractBlock) -> None:
|
| 2384 |
+
"""Emit reward contract - enforced during GRPO training.
|
| 2385 |
|
| 2386 |
From test_16: verified rewards only, no learned reward model.
|
| 2387 |
"""
|
|
|
|
| 2397 |
self._emit("")
|
| 2398 |
|
| 2399 |
def _emit_snapshot(self, cmd: SnapshotCmd, program: TDProgram) -> None:
|
| 2400 |
+
"""SNAPSHOT - content-hashed model state for artifact lineage.
|
| 2401 |
|
| 2402 |
From ForgeSpec 2.0 (test_17): every model state gets a content-addressed hash.
|
| 2403 |
Directory contains: model weights/adapters, eval report, prune spec, manifest.
|
|
|
|
| 2405 |
alias = cmd.target
|
| 2406 |
output_dir = cmd.output or "td_lang_outputs/snapshots"
|
| 2407 |
|
| 2408 |
+
self._emit(f'print("[td_lang] SNAPSHOT - saving content-hashed state for {alias}")')
|
| 2409 |
self._emit("import hashlib, json, time")
|
| 2410 |
self._emit(f'snap_model = models["{alias}"]')
|
| 2411 |
self._emit("")
|
|
|
|
| 2435 |
self._emit("")
|
| 2436 |
|
| 2437 |
# Write manifest
|
| 2438 |
+
self._emit("# Snapshot manifest - full provenance record")
|
| 2439 |
self._emit("snap_manifest = {")
|
| 2440 |
self._indent += 1
|
| 2441 |
self._emit(f'"alias": "{alias}",')
|
|
|
|
| 2486 |
self._emit("})")
|
| 2487 |
|
| 2488 |
def _emit_report(self, cmd: ReportCmd, program: TDProgram) -> None:
|
| 2489 |
+
"""REPORT - economics report for the run.
|
| 2490 |
|
| 2491 |
Tracks GPU hours, cost, tokens, time per command.
|
| 2492 |
From test_17 ForgeSpec 2.0: economics reports for cost tracking.
|
| 2493 |
"""
|
| 2494 |
output = cmd.output or "economics_report.json"
|
| 2495 |
|
| 2496 |
+
self._emit('print("[td_lang] REPORT - generating economics report")')
|
| 2497 |
self._emit("elapsed = time.time() - start_time")
|
| 2498 |
self._emit("")
|
| 2499 |
self._emit("report = {")
|
|
|
|
| 2569 |
# ---------------------------------------------------------------- Phase 8: Autopilot emitters
|
| 2570 |
|
| 2571 |
def _emit_setup(self, setup: SetupBlock) -> None:
|
| 2572 |
+
"""SETUP - auto-install dependencies and configure environment.
|
| 2573 |
|
| 2574 |
Runs at script start: pip install, HF token, ntfy config.
|
| 2575 |
"""
|
| 2576 |
+
self._emit("# ========== SETUP (Phase 8 - Autopilot) ==========")
|
| 2577 |
+
self._emit('print("[td_lang] SETUP - configuring environment...")')
|
| 2578 |
self._emit("")
|
| 2579 |
|
| 2580 |
# pip install
|
|
|
|
| 2593 |
self._emit("except Exception as e:")
|
| 2594 |
self._indent += 1
|
| 2595 |
self._emit('print(f"[td_lang] WARNING: pip install failed: {e}")')
|
| 2596 |
+
self._emit('print("[td_lang] Continuing anyway - packages may already be installed.")')
|
| 2597 |
self._indent -= 1
|
| 2598 |
self._emit("")
|
| 2599 |
|
|
|
|
| 2662 |
self._emit("")
|
| 2663 |
|
| 2664 |
def _emit_on_error(self, on_error: OnErrorBlock, program: TDProgram) -> None:
|
| 2665 |
+
"""ON_ERROR - wrap each step in retry/fallback logic.
|
| 2666 |
|
| 2667 |
Emits a td_safe_run() helper that wraps any function call with:
|
| 2668 |
- Retry N times on failure
|
| 2669 |
- Fallback strategies (reduce batch, skip, snapshot+stop)
|
| 2670 |
- Optional ntfy notification on error
|
| 2671 |
"""
|
| 2672 |
+
self._emit("# ========== ON_ERROR (Phase 8 - Crash Recovery) ==========")
|
| 2673 |
self._emit(f"TD_MAX_RETRIES = {on_error.retry}")
|
| 2674 |
self._emit(f'TD_FALLBACK = "{on_error.fallback}"')
|
| 2675 |
self._emit(f"TD_NOTIFY_ON_ERROR = {on_error.notify}")
|
|
|
|
| 2701 |
self._indent -= 1
|
| 2702 |
self._emit('elif TD_FALLBACK == "snapshot_and_stop":')
|
| 2703 |
self._indent += 1
|
| 2704 |
+
self._emit('print(f"[td_lang] OOM - saving snapshot and stopping.")')
|
| 2705 |
self._emit("if TD_NOTIFY_ON_ERROR:")
|
| 2706 |
self._indent += 1
|
| 2707 |
+
self._emit('td_notify(f"OOM on {step_name} - snapshot saved, stopping.")')
|
| 2708 |
self._indent -= 1
|
| 2709 |
self._emit("raise")
|
| 2710 |
self._indent -= 2
|
|
|
|
| 2716 |
self._indent += 1
|
| 2717 |
self._emit("if TD_NOTIFY_ON_ERROR:")
|
| 2718 |
self._indent += 1
|
| 2719 |
+
self._emit('td_notify(f"FAILED: {step_name} after {TD_MAX_RETRIES} retries - {e}")')
|
| 2720 |
self._indent -= 1
|
| 2721 |
self._emit('if TD_FALLBACK == "skip":')
|
| 2722 |
self._indent += 1
|
|
|
|
| 2729 |
self._emit("")
|
| 2730 |
|
| 2731 |
def _emit_notify(self, cmd: NotifyCmd, program: TDProgram) -> None:
|
| 2732 |
+
"""NOTIFY - send message via ntfy.sh."""
|
| 2733 |
msg = cmd.message.replace('"', '\\"')
|
| 2734 |
self._emit(f'td_notify("{msg}")')
|
| 2735 |
|
| 2736 |
def _emit_save(self, cmd: SaveCmd, program: TDProgram) -> None:
|
| 2737 |
+
"""SAVE - upload model to cloud storage via rclone.
|
| 2738 |
|
| 2739 |
Uses rclone to copy model checkpoint/adapters to Google Drive or any remote.
|
| 2740 |
"""
|
| 2741 |
alias = cmd.target
|
| 2742 |
dest = cmd.destination
|
| 2743 |
|
| 2744 |
+
self._emit(f'print("[td_lang] SAVE - uploading {alias} to {dest}")')
|
| 2745 |
self._emit("")
|
| 2746 |
|
| 2747 |
# Find the model's checkpoint directory
|
|
|
|
| 2772 |
self._indent += 1
|
| 2773 |
self._emit("import subprocess as _sp")
|
| 2774 |
self._emit("_sp.check_call(_rclone_cmd)")
|
| 2775 |
+
self._emit(f'print("[td_lang] SAVE complete - {alias} uploaded to {dest}")')
|
| 2776 |
self._emit(f'td_notify("Model {alias} saved to {dest}")')
|
| 2777 |
self._indent -= 1
|
| 2778 |
self._emit("except FileNotFoundError:")
|
|
|
|
| 2803 |
self._indent -= 1
|
| 2804 |
self._emit("})")
|
| 2805 |
|
| 2806 |
+
# ---------------------------------------------------------------- Phase 9: Schedule
|
| 2807 |
+
def _emit_schedule(self, cmd: ScheduleCmd, program: TDProgram) -> None:
|
| 2808 |
+
"""SCHEDULE - time-based command execution.
|
| 2809 |
+
|
| 2810 |
+
Patterns:
|
| 2811 |
+
"every 6h" → loop with time.sleep(6*3600)
|
| 2812 |
+
"every 30m" → loop with time.sleep(30*60)
|
| 2813 |
+
"at 02:00" → wait until that time, run once
|
| 2814 |
+
"after 30m" → sleep then run once
|
| 2815 |
+
"""
|
| 2816 |
+
timing = cmd.timing.strip()
|
| 2817 |
+
self._emit(f'print("[td_lang] SCHEDULE - timing: {timing}")')
|
| 2818 |
+
self._emit("import time as _time")
|
| 2819 |
+
self._emit("from datetime import datetime as _dt, timedelta as _td")
|
| 2820 |
+
self._emit("")
|
| 2821 |
+
|
| 2822 |
+
if timing.startswith("every "):
|
| 2823 |
+
# Parse interval: "every 6h" or "every 30m"
|
| 2824 |
+
interval_str = timing[6:].strip()
|
| 2825 |
+
self._emit(f'_interval_str = "{interval_str}"')
|
| 2826 |
+
self._emit("if _interval_str.endswith('h'):")
|
| 2827 |
+
self._indent += 1
|
| 2828 |
+
self._emit("_interval_secs = int(_interval_str[:-1]) * 3600")
|
| 2829 |
+
self._indent -= 1
|
| 2830 |
+
self._emit("elif _interval_str.endswith('m'):")
|
| 2831 |
+
self._indent += 1
|
| 2832 |
+
self._emit("_interval_secs = int(_interval_str[:-1]) * 60")
|
| 2833 |
+
self._indent -= 1
|
| 2834 |
+
self._emit("else:")
|
| 2835 |
+
self._indent += 1
|
| 2836 |
+
self._emit("_interval_secs = int(_interval_str) * 3600 # default to hours")
|
| 2837 |
+
self._indent -= 1
|
| 2838 |
+
self._emit('print(f"[td_lang] Running every {_interval_secs}s ({_interval_str}). Ctrl+C to stop.")')
|
| 2839 |
+
self._emit("_sched_iter = 0")
|
| 2840 |
+
self._emit("while True:")
|
| 2841 |
+
self._indent += 1
|
| 2842 |
+
self._emit("_sched_iter += 1")
|
| 2843 |
+
self._emit('print(f"[td_lang] Schedule iteration {_sched_iter} starting at {_dt.now()}")')
|
| 2844 |
+
for body_cmd in cmd.body:
|
| 2845 |
+
self._emit_cmd(body_cmd, program)
|
| 2846 |
+
self._emit('print(f"[td_lang] Iteration {_sched_iter} done. Sleeping {_interval_secs}s...")')
|
| 2847 |
+
self._emit("_time.sleep(_interval_secs)")
|
| 2848 |
+
self._indent -= 1
|
| 2849 |
+
|
| 2850 |
+
elif timing.startswith("at "):
|
| 2851 |
+
# Parse time: "at 02:00"
|
| 2852 |
+
time_str = timing[3:].strip()
|
| 2853 |
+
self._emit(f'_target_time = _dt.strptime("{time_str}", "%H:%M").time()')
|
| 2854 |
+
self._emit("_now = _dt.now()")
|
| 2855 |
+
self._emit("_target = _dt.combine(_now.date(), _target_time)")
|
| 2856 |
+
self._emit("if _target <= _now:")
|
| 2857 |
+
self._indent += 1
|
| 2858 |
+
self._emit("_target += _td(days=1) # schedule for tomorrow if time already passed")
|
| 2859 |
+
self._indent -= 1
|
| 2860 |
+
self._emit("_wait = (_target - _now).total_seconds()")
|
| 2861 |
+
self._emit('print(f"[td_lang] Waiting {_wait:.0f}s until {_target}...")')
|
| 2862 |
+
self._emit("_time.sleep(_wait)")
|
| 2863 |
+
self._emit('print(f"[td_lang] Scheduled time reached: {_dt.now()}")')
|
| 2864 |
+
for body_cmd in cmd.body:
|
| 2865 |
+
self._emit_cmd(body_cmd, program)
|
| 2866 |
+
|
| 2867 |
+
elif timing.startswith("after "):
|
| 2868 |
+
# Parse delay: "after 30m" or "after 2h"
|
| 2869 |
+
delay_str = timing[6:].strip()
|
| 2870 |
+
self._emit(f'_delay_str = "{delay_str}"')
|
| 2871 |
+
self._emit("if _delay_str.endswith('h'):")
|
| 2872 |
+
self._indent += 1
|
| 2873 |
+
self._emit("_delay_secs = int(_delay_str[:-1]) * 3600")
|
| 2874 |
+
self._indent -= 1
|
| 2875 |
+
self._emit("elif _delay_str.endswith('m'):")
|
| 2876 |
+
self._indent += 1
|
| 2877 |
+
self._emit("_delay_secs = int(_delay_str[:-1]) * 60")
|
| 2878 |
+
self._indent -= 1
|
| 2879 |
+
self._emit("else:")
|
| 2880 |
+
self._indent += 1
|
| 2881 |
+
self._emit("_delay_secs = int(_delay_str) * 3600")
|
| 2882 |
+
self._indent -= 1
|
| 2883 |
+
self._emit('print(f"[td_lang] Waiting {_delay_secs}s before running...")')
|
| 2884 |
+
self._emit("_time.sleep(_delay_secs)")
|
| 2885 |
+
self._emit('print(f"[td_lang] Delay complete. Running scheduled commands...")')
|
| 2886 |
+
for body_cmd in cmd.body:
|
| 2887 |
+
self._emit_cmd(body_cmd, program)
|
| 2888 |
+
|
| 2889 |
+
else:
|
| 2890 |
+
self._emit(f'print("[td_lang] WARNING: Unknown schedule pattern: {timing}")')
|
| 2891 |
+
self._emit('print("[td_lang] Supported: every Nh/Nm, at HH:MM, after Nh/Nm")')
|
| 2892 |
+
|
| 2893 |
# ---------------------------------------------------------------- Budget + summary
|
| 2894 |
def _emit_budget_check(self, program: TDProgram) -> None:
|
| 2895 |
budget = program.budget or BudgetBlock()
|
|
|
|
| 2960 |
est_gpu += 0.05 # mostly disk I/O + hashing
|
| 2961 |
elif isinstance(cmd, ReportCmd):
|
| 2962 |
est_gpu += 0.01 # just JSON output
|
| 2963 |
+
elif isinstance(cmd, ScheduleCmd):
|
| 2964 |
+
body_est = 1.0 * len(cmd.body)
|
| 2965 |
+
est_gpu += body_est # at least one run
|
| 2966 |
+
elif isinstance(cmd, (NotifyCmd, SaveCmd)):
|
| 2967 |
+
est_gpu += 0.01
|
| 2968 |
|
| 2969 |
est_cost = est_gpu * self.GPU_HOURLY
|
| 2970 |
|
hugging/td_lang/errors.py
CHANGED
|
@@ -87,6 +87,7 @@ COMMON_FIXES = {
|
|
| 87 |
"report": "Format: report [-> economics.json]",
|
| 88 |
"fuse": 'Format: fuse ["model1", "model2"] into target [strategy equal]',
|
| 89 |
"absorb": 'Format: absorb "model" into target [strength 0.5]',
|
|
|
|
| 90 |
}
|
| 91 |
|
| 92 |
|
|
|
|
| 87 |
"report": "Format: report [-> economics.json]",
|
| 88 |
"fuse": 'Format: fuse ["model1", "model2"] into target [strategy equal]',
|
| 89 |
"absorb": 'Format: absorb "model" into target [strength 0.5]',
|
| 90 |
+
"schedule": 'Format: schedule "every 6h" { commands... } or schedule "at 02:00" { ... }',
|
| 91 |
}
|
| 92 |
|
| 93 |
|
hugging/td_lang/examples/demo_schedule.td
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Demo: Schedule command (Phase 9)
|
| 2 |
+
# Run training at specific times or on repeat
|
| 3 |
+
|
| 4 |
+
setup {
|
| 5 |
+
pip = [torch, transformers, peft, bitsandbytes, trl]
|
| 6 |
+
hf_token = env
|
| 7 |
+
notify = "ntfy.sh/my_ai"
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
on_error {
|
| 11 |
+
retry = 3
|
| 12 |
+
fallback = reduce_batch
|
| 13 |
+
notify = true
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
load "Qwen/Qwen3-VL-8B-Instruct" as base
|
| 17 |
+
|
| 18 |
+
# Run training loop every 6 hours (overnight training)
|
| 19 |
+
schedule "every 6h" {
|
| 20 |
+
diagnose base -> weaknesses.json
|
| 21 |
+
synth base from base filter cherry_llm -> training_data.jsonl
|
| 22 |
+
train base on "training_data.jsonl" using grpo steps 64 lr 5e-5
|
| 23 |
+
eval base -> eval_results.json
|
| 24 |
+
if eval_passed base {
|
| 25 |
+
commit base
|
| 26 |
+
snapshot base -> snapshots/
|
| 27 |
+
save base to "gdrive:TD/models/latest"
|
| 28 |
+
notify "Training cycle passed! Model improved."
|
| 29 |
+
} else {
|
| 30 |
+
reset base to "snapshots/"
|
| 31 |
+
notify "Training cycle failed. Reset to last good."
|
| 32 |
+
}
|
| 33 |
+
}
|
hugging/td_lang/grammar.py
CHANGED
|
@@ -32,6 +32,7 @@ from .ast_nodes import (
|
|
| 32 |
ResetCmd,
|
| 33 |
RewardContractBlock,
|
| 34 |
SaveCmd,
|
|
|
|
| 35 |
SetupBlock,
|
| 36 |
SnapshotCmd,
|
| 37 |
SynthCmd,
|
|
@@ -78,6 +79,7 @@ TD_GRAMMAR = r"""
|
|
| 78 |
| reward_contract_block
|
| 79 |
| setup_block
|
| 80 |
| on_error_block
|
|
|
|
| 81 |
|
| 82 |
// ======================== PHASE 1 COMMANDS ========================
|
| 83 |
|
|
@@ -151,7 +153,7 @@ TD_GRAMMAR = r"""
|
|
| 151 |
| fork_cmd | reset_cmd | prune_cmd | edit_cmd
|
| 152 |
| fuse_cmd | absorb_cmd | snapshot_cmd | report_cmd
|
| 153 |
| notify_cmd | save_cmd
|
| 154 |
-
| repeat_block_cmd | if_block_cmd) _NL*
|
| 155 |
|
| 156 |
// ======================== PHASE 6 — EASY MERGE COMMANDS ========================
|
| 157 |
|
|
@@ -224,6 +226,13 @@ TD_GRAMMAR = r"""
|
|
| 224 |
onerr_fallback: "fallback" "=" IDENT
|
| 225 |
onerr_notify: "notify" "=" IDENT
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
// ======================== SHARED RULES ========================
|
| 228 |
|
| 229 |
// List of names: [name1, name2, name3]
|
|
@@ -454,6 +463,11 @@ class TDTransformer(Transformer):
|
|
| 454 |
def else_clause(self, *body_cmds) -> list:
|
| 455 |
return list(body_cmds)
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
# --- Phase 6: Easy Merge Commands ---
|
| 458 |
|
| 459 |
def fuse_cmd(self, sources: list[str], target: str, *opts) -> FuseCmd:
|
|
|
|
| 32 |
ResetCmd,
|
| 33 |
RewardContractBlock,
|
| 34 |
SaveCmd,
|
| 35 |
+
ScheduleCmd,
|
| 36 |
SetupBlock,
|
| 37 |
SnapshotCmd,
|
| 38 |
SynthCmd,
|
|
|
|
| 79 |
| reward_contract_block
|
| 80 |
| setup_block
|
| 81 |
| on_error_block
|
| 82 |
+
| schedule_cmd
|
| 83 |
|
| 84 |
// ======================== PHASE 1 COMMANDS ========================
|
| 85 |
|
|
|
|
| 153 |
| fork_cmd | reset_cmd | prune_cmd | edit_cmd
|
| 154 |
| fuse_cmd | absorb_cmd | snapshot_cmd | report_cmd
|
| 155 |
| notify_cmd | save_cmd
|
| 156 |
+
| repeat_block_cmd | if_block_cmd | schedule_cmd) _NL*
|
| 157 |
|
| 158 |
// ======================== PHASE 6 — EASY MERGE COMMANDS ========================
|
| 159 |
|
|
|
|
| 226 |
onerr_fallback: "fallback" "=" IDENT
|
| 227 |
onerr_notify: "notify" "=" IDENT
|
| 228 |
|
| 229 |
+
// ======================== PHASE 9 — SCHEDULE ========================
|
| 230 |
+
|
| 231 |
+
// schedule "every 6h" { commands... }
|
| 232 |
+
// schedule "at 02:00" { commands... }
|
| 233 |
+
// schedule "after 30m" { commands... }
|
| 234 |
+
schedule_cmd: "schedule" string "{" _NL* body_cmd+ _NL* "}"
|
| 235 |
+
|
| 236 |
// ======================== SHARED RULES ========================
|
| 237 |
|
| 238 |
// List of names: [name1, name2, name3]
|
|
|
|
| 463 |
def else_clause(self, *body_cmds) -> list:
|
| 464 |
return list(body_cmds)
|
| 465 |
|
| 466 |
+
# --- Phase 9: Schedule ---
|
| 467 |
+
|
| 468 |
+
def schedule_cmd(self, timing: str, *body_cmds) -> ScheduleCmd:
|
| 469 |
+
return ScheduleCmd(timing=timing, body=list(body_cmds))
|
| 470 |
+
|
| 471 |
# --- Phase 6: Easy Merge Commands ---
|
| 472 |
|
| 473 |
def fuse_cmd(self, sources: list[str], target: str, *opts) -> FuseCmd:
|