td-builder commited on
Commit
2834afb
·
verified ·
1 Parent(s): 9a9bead

Upload 56 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  hugging/td_lang/__pycache__/compiler.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  hugging/td_lang/__pycache__/compiler.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
37
+ hugging/td_lang/__pycache__/compiler.cpython-310.pyc filter=lfs diff=lfs merge=lfs -text
hugging/td_lang/__init__.py CHANGED
@@ -27,6 +27,11 @@ Phase 2: diagnose, synth, train, debate
27
  Phase 3: fork, reset, prune, edit
28
  Phase 4: snapshot, report, data_contract, reward_contract
29
  Phase 5: CLI polish, --version, info command, --verbose
 
 
 
 
 
30
 
31
  Designed from interviews test_14 (10 commands) and test_17 (ForgeSpec 2.0).
32
  """
 
27
  Phase 3: fork, reset, prune, edit
28
  Phase 4: snapshot, report, data_contract, reward_contract
29
  Phase 5: CLI polish, --version, info command, --verbose
30
+ Phase 6: fuse, absorb (easy merge)
31
+ Phase 7: repeat, if/else (loop control)
32
+ Phase 8: setup, on_error, notify, save (autopilot)
33
+ Phase 9: schedule (time-based execution)
34
+ Engine upgrades: QLoRA training, self-contained eval, model-generated synth problems
35
 
36
  Designed from interviews test_14 (10 commands) and test_17 (ForgeSpec 2.0).
37
  """
hugging/td_lang/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/__init__.cpython-310.pyc and b/hugging/td_lang/__pycache__/__init__.cpython-310.pyc differ
 
hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc and b/hugging/td_lang/__pycache__/ast_nodes.cpython-310.pyc differ
 
hugging/td_lang/__pycache__/cli.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/cli.cpython-310.pyc and b/hugging/td_lang/__pycache__/cli.cpython-310.pyc differ
 
hugging/td_lang/__pycache__/compiler.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/compiler.cpython-310.pyc and b/hugging/td_lang/__pycache__/compiler.cpython-310.pyc differ
 
hugging/td_lang/__pycache__/errors.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/errors.cpython-310.pyc and b/hugging/td_lang/__pycache__/errors.cpython-310.pyc differ
 
hugging/td_lang/__pycache__/grammar.cpython-310.pyc CHANGED
Binary files a/hugging/td_lang/__pycache__/grammar.cpython-310.pyc and b/hugging/td_lang/__pycache__/grammar.cpython-310.pyc differ
 
hugging/td_lang/ast_nodes.py CHANGED
@@ -304,6 +304,28 @@ class OnErrorBlock:
304
  notify: bool = True # Send ntfy notification on error
305
 
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  # ============================================================================
308
  # BLOCKS (gates, budget, contracts, etc.)
309
  # ============================================================================
@@ -417,5 +439,6 @@ __all__ = [
417
  "BudgetBlock",
418
  "DataContractBlock",
419
  "RewardContractBlock",
 
420
  "TDProgram",
421
  ]
 
304
  notify: bool = True # Send ntfy notification on error
305
 
306
 
307
+ # ============================================================================
308
+ # PHASE 9 — SCHEDULE (time-based execution)
309
+ # ============================================================================
310
+
311
+ @dataclass
312
+ class ScheduleCmd:
313
+ """Schedule a block of commands to run at a specific time or interval. (Phase 9)
314
+
315
+ Examples:
316
+ schedule "every 6h" { diagnose base; train base ... }
317
+ schedule "at 02:00" { train base on "data.jsonl" using grpo }
318
+ schedule "after 30m" { eval base -> results.json }
319
+
320
+ Patterns:
321
+ "every Nh/Nm" — repeat every N hours/minutes
322
+ "at HH:MM" — run once at that time
323
+ "after Nh/Nm" — delay then run once
324
+ """
325
+ timing: str # "every 6h", "at 02:00", "after 30m"
326
+ body: List[Any] = field(default_factory=list) # Commands inside the block
327
+
328
+
329
  # ============================================================================
330
  # BLOCKS (gates, budget, contracts, etc.)
331
  # ============================================================================
 
439
  "BudgetBlock",
440
  "DataContractBlock",
441
  "RewardContractBlock",
442
+ "ScheduleCmd",
443
  "TDProgram",
444
  ]
hugging/td_lang/cli.py CHANGED
@@ -21,7 +21,7 @@ from .ast_nodes import (
21
  SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd,
22
  ForkCmd, ResetCmd, PruneCmd, EditCmd,
23
  FuseCmd, AbsorbCmd, RepeatBlock, IfBlock,
24
- NotifyCmd, SaveCmd,
25
  SnapshotCmd, ReportCmd,
26
  )
27
 
@@ -49,6 +49,7 @@ _PHASE_MAP = {
49
  SaveCmd: ("8", "save"),
50
  SnapshotCmd: ("4", "snapshot"),
51
  ReportCmd: ("4", "report"),
 
52
  }
53
 
54
 
 
21
  SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd,
22
  ForkCmd, ResetCmd, PruneCmd, EditCmd,
23
  FuseCmd, AbsorbCmd, RepeatBlock, IfBlock,
24
+ NotifyCmd, SaveCmd, ScheduleCmd,
25
  SnapshotCmd, ReportCmd,
26
  )
27
 
 
49
  SaveCmd: ("8", "save"),
50
  SnapshotCmd: ("4", "snapshot"),
51
  ReportCmd: ("4", "report"),
52
+ ScheduleCmd: ("9", "schedule"),
53
  }
54
 
55
 
hugging/td_lang/compiler.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- TD Lang Compiler turns a TDProgram AST into readable Python code that calls td_fuse.
3
 
4
  Phase 1 commands: load, merge, heal, eval, commit.
5
  Phase 2 commands: synth, train, debate, diagnose.
@@ -38,6 +38,7 @@ from .ast_nodes import (
38
  ResetCmd,
39
  RewardContractBlock,
40
  SaveCmd,
 
41
  SetupBlock,
42
  SnapshotCmd,
43
  SynthCmd,
@@ -46,7 +47,7 @@ from .ast_nodes import (
46
  )
47
  from .errors import TDCompileError
48
 
49
- # All command types are now implemented (Phase 1 + 2 + 3)
50
 
51
 
52
  class TDCompiler:
@@ -86,25 +87,25 @@ class TDCompiler:
86
  elif isinstance(cmd, MergeCmd):
87
  if cmd.target not in seen:
88
  raise TDCompileError(
89
- f"Can't merge into '{cmd.target}' it hasn't been loaded yet.",
90
  hint=f'Add: load "{cmd.source}" as {cmd.target}',
91
  )
92
  elif isinstance(cmd, (HealCmd, EvalCmd, CommitCmd)):
93
  if cmd.target not in seen:
94
  raise TDCompileError(
95
- f"Can't use '{cmd.target}' it hasn't been loaded yet.",
96
  hint=f'Add: load "model/path" as {cmd.target}',
97
  )
98
  elif isinstance(cmd, (SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd)):
99
  if cmd.target not in seen:
100
  raise TDCompileError(
101
- f"Can't use '{cmd.target}' it hasn't been loaded yet.",
102
  hint=f'Add: load "model/path" as {cmd.target}',
103
  )
104
  elif isinstance(cmd, ForkCmd):
105
  if cmd.source not in seen:
106
  raise TDCompileError(
107
- f"Can't fork '{cmd.source}' it hasn't been loaded yet.",
108
  hint=f'Add: load "model/path" as {cmd.source}',
109
  )
110
  if cmd.alias in seen:
@@ -115,21 +116,21 @@ class TDCompiler:
115
  elif isinstance(cmd, (ResetCmd, PruneCmd, EditCmd)):
116
  if cmd.target not in seen:
117
  raise TDCompileError(
118
- f"Can't use '{cmd.target}' it hasn't been loaded yet.",
119
  hint=f'Add: load "model/path" as {cmd.target}',
120
  )
121
  elif isinstance(cmd, SnapshotCmd):
122
  if cmd.target not in seen:
123
  raise TDCompileError(
124
- f"Can't snapshot '{cmd.target}' it hasn't been loaded yet.",
125
  hint=f'Add: load "model/path" as {cmd.target}',
126
  )
127
  elif isinstance(cmd, ReportCmd):
128
- pass # report has no target always valid
129
  elif isinstance(cmd, FuseCmd):
130
  if cmd.target not in seen:
131
  raise TDCompileError(
132
- f"Can't fuse into '{cmd.target}' it hasn't been loaded yet.",
133
  hint=f'Add: load "model/path" as {cmd.target}',
134
  )
135
  if len(cmd.sources) < 1:
@@ -140,9 +141,13 @@ class TDCompiler:
140
  elif isinstance(cmd, AbsorbCmd):
141
  if cmd.target not in seen:
142
  raise TDCompileError(
143
- f"Can't absorb into '{cmd.target}' it hasn't been loaded yet.",
144
  hint=f'Add: load "model/path" as {cmd.target}',
145
  )
 
 
 
 
146
 
147
  # ---------------------------------------------------------------- Build script
148
  def _build_script(self, program: TDProgram) -> None:
@@ -158,7 +163,7 @@ Source: {source_name}
158
  Compiled: {timestamp}
159
  Hash: {source_hash}
160
 
161
- DO NOT EDIT regenerate from the .td file instead.
162
  """'''
163
  )
164
  self._emit(doc)
@@ -282,6 +287,8 @@ DO NOT EDIT — regenerate from the .td file instead.
282
  self._emit_notify(cmd, program)
283
  elif isinstance(cmd, SaveCmd):
284
  self._emit_save(cmd, program)
 
 
285
  self._emit("")
286
 
287
  self._emit_summary()
@@ -311,7 +318,7 @@ DO NOT EDIT — regenerate from the .td file instead.
311
  self._indent -= 1
312
  self._emit("except ImportError:")
313
  self._indent += 1
314
- self._emit('print("[td_lang] huggingface_hub not installed. Storing ref only download will happen at merge time.")')
315
  self._emit("_local_path = _model_ref")
316
  self._indent -= 1
317
  self._emit("except Exception as e:")
@@ -381,7 +388,7 @@ DO NOT EDIT — regenerate from the .td file instead.
381
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
382
  self._emit("if not checkpoint:")
383
  self._indent += 1
384
- self._emit('print("[td_lang] WARNING: No checkpoint to heal run a merge first.")')
385
  self._indent -= 1
386
  self._emit("else:")
387
  self._indent += 1
@@ -400,42 +407,144 @@ DO NOT EDIT — regenerate from the .td file instead.
400
  self._indent -= 1
401
 
402
  def _emit_eval(self, cmd: EvalCmd) -> None:
 
 
 
 
 
 
403
  self._emit(f'print("[td_lang] Evaluating {cmd.target}...")')
404
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
405
  self._emit("if not checkpoint:")
406
  self._indent += 1
407
- self._emit('print("[td_lang] WARNING: No checkpoint to evaluate.")')
408
  self._indent -= 1
409
- self._emit("else:")
410
- self._indent += 1
411
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
412
- self._emit("import torch")
413
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
414
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
415
  self._indent += 1
416
  self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
417
  self._indent -= 1
418
  self._emit(")")
419
- self._emit("eval_result = validate_merged_model(")
 
 
 
 
 
420
  self._indent += 1
421
- self._emit("model=model, tokenizer=tok,")
422
- self._emit("merged_sources=merged_stages,")
423
- self._emit("cfg=MergeConfig(),")
 
 
424
  self._indent -= 1
425
- self._emit(")")
426
- self._emit(f'results["{cmd.target}_eval"] = eval_result')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  self._emit(f'hist_key = "{cmd.target}_eval_history"')
428
  self._emit("if hist_key not in results:")
429
  self._indent += 1
430
  self._emit("results[hist_key] = []")
431
  self._indent -= 1
432
- self._emit("metric = 1.0 if eval_result.get('overall', False) else 0.0")
433
- self._emit("results[hist_key].append(metric)")
 
434
  self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
435
  self._indent += 1
436
  self._emit('"op": "eval",')
437
  self._emit('"timestamp": datetime.now().isoformat(),')
438
- self._emit('"result": eval_result,')
 
439
  self._indent -= 1
440
  self._emit("})")
441
  if cmd.output:
@@ -450,7 +559,6 @@ DO NOT EDIT — regenerate from the .td file instead.
450
  self._emit('print("[td_lang] Eval results:", json.dumps(eval_result, indent=2, default=str))')
451
  self._emit("del model, tok")
452
  self._emit("import gc; gc.collect()")
453
- self._indent -= 1
454
 
455
  def _emit_commit(self, cmd: CommitCmd, global_gates: Optional[GateBlock]) -> None:
456
  gates = cmd.gates or (global_gates.must_pass if global_gates else None)
@@ -484,7 +592,7 @@ DO NOT EDIT — regenerate from the .td file instead.
484
  self._indent -= 1
485
  self._emit("if failed:")
486
  self._indent += 1
487
- self._emit('raise TDGateError(failed, message="Commit blocked gates failed")')
488
  self._indent -= 1
489
  self._emit("else:")
490
  self._indent += 1
@@ -523,7 +631,7 @@ DO NOT EDIT — regenerate from the .td file instead.
523
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
524
  self._emit("if not checkpoint:")
525
  self._indent += 1
526
- self._emit('print("[td_lang] WARNING: No checkpoint using model_ref instead.")')
527
  self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
528
  self._indent -= 1
529
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
@@ -559,11 +667,66 @@ DO NOT EDIT — regenerate from the .td file instead.
559
  self._emit('print(f" Response: {response[:200]}...")')
560
  self._emit("print()")
561
  self._indent -= 1
562
- self._emit(f'results["{cmd.target}_diagnose"] = diagnose_results')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
564
  self._indent += 1
565
  self._emit('"op": "diagnose",')
566
  self._emit('"n_prompts": len(diag_prompts),')
 
567
  self._emit('"timestamp": datetime.now().isoformat(),')
568
  self._indent -= 1
569
  self._emit("})")
@@ -572,7 +735,7 @@ DO NOT EDIT — regenerate from the .td file instead.
572
  self._emit("diag_path.parent.mkdir(parents=True, exist_ok=True)")
573
  self._emit('with open(diag_path, "w") as f:')
574
  self._indent += 1
575
- self._emit("json.dump(diagnose_results, f, indent=2, default=str)")
576
  self._indent -= 1
577
  self._emit('print(f"[td_lang] Diagnosis saved to {diag_path}")')
578
  self._emit("del model, tok")
@@ -604,20 +767,30 @@ DO NOT EDIT — regenerate from the .td file instead.
604
  self._emit(")")
605
  self._emit("model.eval()")
606
  self._emit("")
607
- self._emit("# Weakness-aware topic selection from diagnosis (if available)")
608
- self._emit(f'diag = results.get("{cmd.target}_diagnose", [])')
 
 
 
 
 
 
 
 
609
  self._emit("weak_topics = []")
610
- self._emit("for d in diag:")
 
611
  self._indent += 1
612
  self._emit("resp = d.get('response', '')")
613
- self._emit("for topic in ['math', 'code', 'logic', 'factual', 'long chain', 'tools']:")
614
  self._indent += 1
615
- self._emit("if topic in resp.lower():")
616
  self._indent += 1
617
  self._emit("weak_topics.append(topic)")
618
  self._indent -= 1
619
  self._indent -= 1
620
  self._indent -= 1
 
621
  self._emit("if not weak_topics:")
622
  self._indent += 1
623
  self._emit("weak_topics = ['math', 'code', 'logic', 'factual']")
@@ -637,21 +810,81 @@ DO NOT EDIT — regenerate from the .td file instead.
637
  self._indent -= 1
638
  self._emit("}")
639
  self._emit("")
640
- self._emit("def make_problem(domain: str) -> str:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  self._indent += 1
642
- self._emit("if domain == 'math':")
 
 
 
 
 
 
643
  self._indent += 1
644
- self._emit("return 'Compute (17*19 - 121) / 3' if random.random() < 0.5 else 'Integrate x^2 from 0 to 3'")
 
 
 
 
 
645
  self._indent -= 1
646
- self._emit("if domain == 'code':")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
  self._indent += 1
648
- self._emit("return 'Implement Dijkstra shortest path' if random.random() < 0.5 else 'Parse JSON safely in Python'")
649
  self._indent -= 1
650
- self._emit("if domain == 'logic':")
 
 
651
  self._indent += 1
652
- self._emit("return 'Does the conclusion follow? If all A are B and all B are C, are all A C?'")
 
 
 
 
653
  self._indent -= 1
654
- self._emit("return 'Summarize the causes of the 2008 financial crisis in 3 bullet points.'")
 
 
 
 
 
 
655
  self._indent -= 1
656
  self._emit("")
657
  self._emit("synth_data = []")
@@ -738,18 +971,51 @@ DO NOT EDIT — regenerate from the .td file instead.
738
  self._emit("")
739
 
740
  if cmd.method == "grpo":
741
- self._emit("# GRPO training (test_15: 64 steps sweet spot, eval every 16)")
 
742
  self._emit("from trl import GRPOConfig, GRPOTrainer")
743
- self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
 
744
  self._emit("from datasets import load_dataset")
745
  self._emit("import torch")
746
  self._emit("")
747
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
749
  self._indent += 1
750
- self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
 
 
751
  self._indent -= 1
752
  self._emit(")")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
  self._emit("")
754
  self._emit(f'# Load training data')
755
  self._emit(f'dataset_path = "{cmd.dataset}"')
@@ -772,6 +1038,7 @@ DO NOT EDIT — regenerate from the .td file instead.
772
  self._emit('output_dir="td_lang_outputs/grpo_training",')
773
  self._emit("save_steps=16,")
774
  self._emit('bf16=True,')
 
775
  self._indent -= 1
776
  self._emit(")")
777
  self._emit("")
@@ -875,7 +1142,7 @@ DO NOT EDIT — regenerate from the .td file instead.
875
  self._emit("if logs['kl'] > 3.1 * ma:")
876
  self._indent += 1
877
  self._emit("control.should_training_stop = True")
878
- self._emit("print('[td_lang][early_stop] KL spike detected stopping GRPO')")
879
  self._indent -= 2
880
  self._indent -= 1
881
  self._emit("if 'eval/reward' in logs:")
@@ -884,7 +1151,7 @@ DO NOT EDIT — regenerate from the .td file instead.
884
  self._emit("if len(self.eval_rewards) >= 2 and self.eval_rewards[-1] < self.eval_rewards[-2]:")
885
  self._indent += 1
886
  self._emit("control.should_training_stop = True")
887
- self._emit("print('[td_lang][early_stop] Validation reward drop stopping GRPO')")
888
  self._indent -= 1
889
  self._indent -= 1
890
  self._emit("if 'policy_entropy' in logs:")
@@ -896,7 +1163,7 @@ DO NOT EDIT — regenerate from the .td file instead.
896
  self._emit("if self.entropy_history[-1] < 0.93 * baseline:")
897
  self._indent += 1
898
  self._emit("control.should_training_stop = True")
899
- self._emit("print('[td_lang][early_stop] Diversity collapsed stopping GRPO')")
900
  self._indent -= 2
901
  self._indent -= 2
902
  self._indent -= 1
@@ -915,8 +1182,9 @@ DO NOT EDIT — regenerate from the .td file instead.
915
  self._emit(f'models["{cmd.target}"]["checkpoint"] = "td_lang_outputs/grpo_trained"')
916
 
917
  elif cmd.method in ("sft", "dpo"):
918
- self._emit(f"# {cmd.method.upper()} training")
919
- self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments")
 
920
  if cmd.method == "sft":
921
  self._emit("from trl import SFTTrainer")
922
  else:
@@ -925,11 +1193,29 @@ DO NOT EDIT — regenerate from the .td file instead.
925
  self._emit("import torch")
926
  self._emit("")
927
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
 
 
 
 
 
 
 
 
 
 
 
 
 
928
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
929
  self._indent += 1
930
- self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
931
  self._indent -= 1
932
  self._emit(")")
 
 
 
 
 
933
  self._emit(f'dataset_path = "{cmd.dataset}"')
934
  self._emit("if dataset_path.endswith('.jsonl'):")
935
  self._indent += 1
@@ -1140,7 +1426,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1140
  # ---------------------------------------------------------------- Phase 3 emitters
1141
 
1142
  def _emit_edit(self, cmd: EditCmd) -> None:
1143
- """EDIT surgical LoRA/DoRA on specific layers.
1144
 
1145
  From test_18: all 3 AIs agree LoRA is safe default, DoRA beats by 1-4%.
1146
  layers_to_transform supports targeting specific layers (e.g., 16-28).
@@ -1151,7 +1437,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1151
  layers = cmd.layers # "all", "16-28", or single number
1152
  lr = cmd.learning_rate or 1e-4
1153
 
1154
- self._emit(f'print("[td_lang] EDIT surgical {method} on {alias}, layers={layers}")')
1155
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
1156
  self._emit("import torch")
1157
  self._emit("from peft import LoraConfig, get_peft_model, PeftModel")
@@ -1207,7 +1493,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1207
  self._emit("")
1208
 
1209
  # Apply adapter
1210
- self._emit("# Inject adapter base weights stay frozen")
1211
  self._emit("model = get_peft_model(model, edit_config)")
1212
  self._emit("model.print_trainable_parameters()")
1213
  self._emit("")
@@ -1226,7 +1512,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1226
  self._indent -= 1
1227
  self._emit("")
1228
 
1229
- # "Try before buy" actual eval with adapters on vs off
1230
  self._emit('sample_prompts = ["What is 7+8?", "Explain photosynthesis in one paragraph.", "Write a Python function fib(n)."]')
1231
  self._emit("def run_quick_eval(enable_adapters: bool):")
1232
  self._indent += 1
@@ -1266,19 +1552,19 @@ DO NOT EDIT — regenerate from the .td file instead.
1266
  self._indent -= 1
1267
  self._emit("")
1268
 
1269
- # Save adapter (don't merge yet let commit/gates decide)
1270
  self._emit(f'edit_save_dir = os.path.join(output_dir, "{alias}_edit_{method}")')
1271
  self._emit("os.makedirs(edit_save_dir, exist_ok=True)")
1272
  self._emit("model.save_pretrained(edit_save_dir)")
1273
  self._emit(f'print(f"[td_lang] EDIT adapter saved to {{edit_save_dir}}")')
1274
- self._emit(f'print("[td_lang] Adapter NOT merged use commit with gates to merge permanently")')
1275
  self._emit("")
1276
 
1277
  # Update models dict
1278
  self._emit(f'models["{alias}"] = model')
1279
 
1280
  def _emit_fork(self, cmd: ForkCmd) -> None:
1281
- """FORK branch current model weights for parallel experiments.
1282
 
1283
  From test_18: all 3 AIs say disk-based only on 4090.
1284
  Cheap fork = copy manifest + adapter files, share base weights.
@@ -1287,7 +1573,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1287
  source = cmd.source
1288
  alias = cmd.alias
1289
 
1290
- self._emit(f'print("[td_lang] FORK branching {source} as {alias}")')
1291
  self._emit(f'source_model = models["{source}"]')
1292
  self._emit("import torch")
1293
  self._emit("")
@@ -1300,7 +1586,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1300
  self._emit("")
1301
 
1302
  # Write manifest
1303
- self._emit("# Write fork manifest tracks lineage")
1304
  self._emit("import json")
1305
  self._emit("fork_manifest = {")
1306
  self._emit(f' "fork_name": "{alias}",')
@@ -1315,7 +1601,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1315
  self._emit("is_peft = hasattr(source_model, 'peft_config')")
1316
  self._emit("if is_peft:")
1317
  self._indent += 1
1318
- self._emit("# PEFT model save only adapter weights (small, fast)")
1319
  self._emit('adapter_dir = os.path.join(fork_dir, "adapters")')
1320
  self._emit("source_model.save_pretrained(adapter_dir)")
1321
  self._emit('fork_manifest["fork_type"] = "adapter"')
@@ -1324,7 +1610,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1324
  self._indent -= 1
1325
  self._emit("else:")
1326
  self._indent += 1
1327
- self._emit("# Full model clone tensors then save to safetensors")
1328
  self._emit("from safetensors.torch import save_file")
1329
  self._emit("state = {k: v.detach().cpu().clone() for k, v in source_model.state_dict().items()}")
1330
  self._emit('ckpt_path = os.path.join(fork_dir, "model.safetensors")')
@@ -1364,7 +1650,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1364
  self._emit(f'lineage["{alias}"] = {{"forked_from": "{source}", "operations": []}}')
1365
 
1366
  def _emit_reset(self, cmd: ResetCmd) -> None:
1367
- """RESET revert model to a previous checkpoint.
1368
 
1369
  From test_18: del model, clear CUDA cache, reload.
1370
  Must also reset optimizer state. Use assign=True to avoid doubling VRAM.
@@ -1372,7 +1658,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1372
  alias = cmd.target
1373
  checkpoint = cmd.checkpoint
1374
 
1375
- self._emit(f'print("[td_lang] RESET reverting {alias} to {checkpoint}")')
1376
  self._emit("")
1377
 
1378
  # Delete current model and clear CUDA
@@ -1397,7 +1683,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1397
  self._emit("")
1398
  self._emit("if fork_manifest_path and os.path.exists(fork_manifest_path):")
1399
  self._indent += 1
1400
- self._emit("# Loading from a fork read manifest")
1401
  self._emit("import json")
1402
  self._emit("with open(fork_manifest_path) as f:")
1403
  self._indent += 1
@@ -1421,7 +1707,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1421
  self._emit("# Loading from a safetensors file")
1422
  self._emit("from safetensors.torch import load_file")
1423
  self._emit("state = load_file(ckpt_path, device='cpu')")
1424
- self._emit("# Need base model architecture reload from original")
1425
  self._emit(f'base_ref = models.get("__base_ref_{alias}", ckpt_path)')
1426
  self._emit("model = AutoModelForCausalLM.from_pretrained(base_ref, torch_dtype=torch.float16, device_map='cuda')")
1427
  self._emit("try:")
@@ -1438,7 +1724,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1438
 
1439
  # Re-register in models dict
1440
  self._emit(f'models["{alias}"] = model')
1441
- self._emit(f'print(f"[td_lang] RESET complete {alias} restored from {checkpoint}")')
1442
  self._emit("")
1443
 
1444
  # Optimizer/cache handling and quick smoke eval
@@ -1460,7 +1746,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1460
  self._indent -= 1
1461
 
1462
  def _emit_prune(self, cmd: PruneCmd) -> None:
1463
- """PRUNE structural pruning of language backbone.
1464
 
1465
  From test_18: 20% structured max (LLM-Pruner). Wanda metric (Grok).
1466
  Language backbone only, never vision encoder. Recovery: 200-800 steps LoRA.
@@ -1470,7 +1756,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1470
  aggressiveness = cmd.aggressiveness
1471
 
1472
  self._emit("import torch")
1473
- self._emit(f'print("[td_lang] PRUNE {method} pruning on {alias}, {aggressiveness*100:.0f}% removal")')
1474
  self._emit(f'model = models["{alias}"]')
1475
  self._emit("")
1476
 
@@ -1484,7 +1770,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1484
  self._emit("")
1485
 
1486
  # Identify language-only layers (skip vision)
1487
- self._emit("# Target language backbone ONLY never prune vision encoder")
1488
  self._emit("# Filter for language model linear layers")
1489
  self._emit("target_modules = []")
1490
  self._emit("for name, module in model.named_modules():")
@@ -1598,8 +1884,8 @@ DO NOT EDIT — regenerate from the .td file instead.
1598
  self._indent -= 1
1599
  self._indent -= 1
1600
  else: # taylor
1601
- self._emit("# Taylor: gradient-based importance (needs backprop VRAM heavy)")
1602
- self._emit("# Falling back to magnitude as MVP Taylor needs calibration + backprop")
1603
  self._emit(f'print("[td_lang] WARNING: Taylor pruning falls back to magnitude on single GPU")')
1604
  self._emit("import torch.nn.utils.prune as prune")
1605
  self._emit("")
@@ -1651,7 +1937,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1651
  # ---------------------------------------------------------------- Phase 7: Loop Control emitters
1652
 
1653
  def _emit_cmd(self, cmd, program: TDProgram) -> None:
1654
- """Emit a single command used by repeat/if to emit body commands."""
1655
  if isinstance(cmd, LoadCmd):
1656
  self._emit_load(cmd)
1657
  elif isinstance(cmd, MergeCmd):
@@ -1694,15 +1980,17 @@ DO NOT EDIT — regenerate from the .td file instead.
1694
  self._emit_repeat(cmd, program)
1695
  elif isinstance(cmd, IfBlock):
1696
  self._emit_if(cmd, program)
 
 
1697
 
1698
  def _emit_repeat(self, cmd: RepeatBlock, program: TDProgram) -> None:
1699
- """REPEAT run a block of commands N times.
1700
 
1701
  This is the core of td_loop: the self-improvement cycle.
1702
  Each iteration runs the body commands in order.
1703
  """
1704
  n = cmd.count
1705
- self._emit(f'print("[td_lang] REPEAT running {n} iterations")')
1706
  self._emit(f"for _loop_iter in range({n}):")
1707
  self._indent += 1
1708
  self._emit(f'print(f"[td_lang] === Iteration {{_loop_iter + 1}}/{n} ===")')
@@ -1712,7 +2000,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1712
  self._emit("elapsed_hours = (time.time() - start_time) / 3600")
1713
  self._emit(f"if elapsed_hours >= {program.budget.max_gpu_hours}:")
1714
  self._indent += 1
1715
- self._emit('print("[td_lang] Budget exceeded inside repeat stopping loop.")')
1716
  self._emit("break")
1717
  self._indent -= 1
1718
  self._emit("")
@@ -1721,10 +2009,10 @@ DO NOT EDIT — regenerate from the .td file instead.
1721
  self._emit("")
1722
  self._emit(f'print(f"[td_lang] Iteration {{_loop_iter + 1}}/{n} complete.")')
1723
  self._indent -= 1
1724
- self._emit(f'print("[td_lang] REPEAT complete {n} iterations done.")')
1725
 
1726
  def _emit_if(self, cmd: IfBlock, program: TDProgram) -> None:
1727
- """IF/ELSE conditional execution based on eval results.
1728
 
1729
  Conditions:
1730
  - eval_passed: last eval for target had no failures
@@ -1734,7 +2022,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1734
  condition = cmd.condition
1735
  target = cmd.target
1736
 
1737
- self._emit(f'print("[td_lang] IF checking {condition} for {target}")')
1738
  self._emit("")
1739
 
1740
  # Emit condition check
@@ -1777,7 +2065,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1777
  self._indent -= 1
1778
 
1779
  def _emit_break_if(self, cmd: BreakIfCmd) -> None:
1780
- """BREAK_IF early exit from repeat based on condition."""
1781
  condition = cmd.condition
1782
  target = cmd.target or ""
1783
  self._emit(f'_brk_eval = results.get("{target}_eval", {{}})')
@@ -1790,17 +2078,17 @@ DO NOT EDIT — regenerate from the .td file instead.
1790
  self._emit(f"_brk_met = bool(results.get('{target}_{condition}', False))")
1791
  self._emit("if _brk_met:")
1792
  self._indent += 1
1793
- self._emit('print("[td_lang] break_if triggered exiting loop")')
1794
  self._emit("break")
1795
  self._indent -= 1
1796
 
1797
  # ---------------------------------------------------------------- Phase 6: Easy Merge emitters
1798
 
1799
  def _emit_fuse(self, cmd: FuseCmd) -> None:
1800
- """FUSE merge multiple models into target in one command.
1801
 
1802
  From TD merge strategy: Transport and Merge (optimal transport cross-arch merging).
1803
- All 5 source models have different architectures Transport and Merge handles this.
1804
  Merge into language backbone only, vision encoder stays untouched.
1805
  """
1806
  target = cmd.target
@@ -1809,7 +2097,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1809
  strategy = cmd.strategy
1810
  n = len(sources)
1811
 
1812
- self._emit(f'print("[td_lang] FUSE merging {n} models into {target} using {method}")')
1813
  self._emit(f'print("[td_lang] Strategy: {strategy}")')
1814
  self._emit(f"fuse_sources = {sources}")
1815
  self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
@@ -1825,7 +2113,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1825
  self._emit(f"strengths = [round(0.5 * (0.8 ** i), 3) for i in range({n})]")
1826
  self._emit('print(f"[td_lang] Sequential strategy: strengths = {strengths}")')
1827
  else:
1828
- # weighted default to equal if no weights specified
1829
  self._emit(f"per_model_strength = round(1.0 / ({n} + 1), 3)")
1830
  self._emit("")
1831
 
@@ -1914,10 +2202,10 @@ DO NOT EDIT — regenerate from the .td file instead.
1914
  self._emit('"timestamp": datetime.now().isoformat(),')
1915
  self._indent -= 1
1916
  self._emit("})")
1917
- self._emit(f'print("[td_lang] FUSE complete {n} models merged into {target}")')
1918
 
1919
  def _emit_absorb(self, cmd: AbsorbCmd) -> None:
1920
- """ABSORB simplified single-model merge.
1921
 
1922
  One-liner shortcut: absorb "model" into target [strength 0.5]
1923
  Wraps the merge logic with sensible defaults.
@@ -1926,7 +2214,7 @@ DO NOT EDIT — regenerate from the .td file instead.
1926
  target = cmd.target
1927
  strength = cmd.strength
1928
 
1929
- self._emit(f'print("[td_lang] ABSORB merging {source} into {target} (strength={strength})")')
1930
  self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
1931
  self._emit("")
1932
 
@@ -2020,12 +2308,12 @@ DO NOT EDIT — regenerate from the .td file instead.
2020
  self._emit('"timestamp": datetime.now().isoformat(),')
2021
  self._indent -= 1
2022
  self._emit("})")
2023
- self._emit(f'print("[td_lang] ABSORB complete {source} merged into {target}")')
2024
 
2025
  # ---------------------------------------------------------------- Phase 4 emitters
2026
 
2027
  def _emit_data_contract(self, dc: DataContractBlock) -> None:
2028
- """Emit data contract validation checked at synth/train time.
2029
 
2030
  From ForgeSpec 2.0 (test_17): data contracts enforce schema on training data.
2031
  Required fields, minimum samples, max perplexity.
@@ -2093,7 +2381,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2093
  self._emit("")
2094
 
2095
  def _emit_reward_contract(self, rc: RewardContractBlock) -> None:
2096
- """Emit reward contract enforced during GRPO training.
2097
 
2098
  From test_16: verified rewards only, no learned reward model.
2099
  """
@@ -2109,7 +2397,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2109
  self._emit("")
2110
 
2111
  def _emit_snapshot(self, cmd: SnapshotCmd, program: TDProgram) -> None:
2112
- """SNAPSHOT content-hashed model state for artifact lineage.
2113
 
2114
  From ForgeSpec 2.0 (test_17): every model state gets a content-addressed hash.
2115
  Directory contains: model weights/adapters, eval report, prune spec, manifest.
@@ -2117,7 +2405,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2117
  alias = cmd.target
2118
  output_dir = cmd.output or "td_lang_outputs/snapshots"
2119
 
2120
- self._emit(f'print("[td_lang] SNAPSHOT saving content-hashed state for {alias}")')
2121
  self._emit("import hashlib, json, time")
2122
  self._emit(f'snap_model = models["{alias}"]')
2123
  self._emit("")
@@ -2147,7 +2435,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2147
  self._emit("")
2148
 
2149
  # Write manifest
2150
- self._emit("# Snapshot manifest full provenance record")
2151
  self._emit("snap_manifest = {")
2152
  self._indent += 1
2153
  self._emit(f'"alias": "{alias}",')
@@ -2198,14 +2486,14 @@ DO NOT EDIT — regenerate from the .td file instead.
2198
  self._emit("})")
2199
 
2200
  def _emit_report(self, cmd: ReportCmd, program: TDProgram) -> None:
2201
- """REPORT economics report for the run.
2202
 
2203
  Tracks GPU hours, cost, tokens, time per command.
2204
  From test_17 ForgeSpec 2.0: economics reports for cost tracking.
2205
  """
2206
  output = cmd.output or "economics_report.json"
2207
 
2208
- self._emit('print("[td_lang] REPORT generating economics report")')
2209
  self._emit("elapsed = time.time() - start_time")
2210
  self._emit("")
2211
  self._emit("report = {")
@@ -2281,12 +2569,12 @@ DO NOT EDIT — regenerate from the .td file instead.
2281
  # ---------------------------------------------------------------- Phase 8: Autopilot emitters
2282
 
2283
  def _emit_setup(self, setup: SetupBlock) -> None:
2284
- """SETUP auto-install dependencies and configure environment.
2285
 
2286
  Runs at script start: pip install, HF token, ntfy config.
2287
  """
2288
- self._emit("# ========== SETUP (Phase 8 Autopilot) ==========")
2289
- self._emit('print("[td_lang] SETUP configuring environment...")')
2290
  self._emit("")
2291
 
2292
  # pip install
@@ -2305,7 +2593,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2305
  self._emit("except Exception as e:")
2306
  self._indent += 1
2307
  self._emit('print(f"[td_lang] WARNING: pip install failed: {e}")')
2308
- self._emit('print("[td_lang] Continuing anyway packages may already be installed.")')
2309
  self._indent -= 1
2310
  self._emit("")
2311
 
@@ -2374,14 +2662,14 @@ DO NOT EDIT — regenerate from the .td file instead.
2374
  self._emit("")
2375
 
2376
  def _emit_on_error(self, on_error: OnErrorBlock, program: TDProgram) -> None:
2377
- """ON_ERROR wrap each step in retry/fallback logic.
2378
 
2379
  Emits a td_safe_run() helper that wraps any function call with:
2380
  - Retry N times on failure
2381
  - Fallback strategies (reduce batch, skip, snapshot+stop)
2382
  - Optional ntfy notification on error
2383
  """
2384
- self._emit("# ========== ON_ERROR (Phase 8 Crash Recovery) ==========")
2385
  self._emit(f"TD_MAX_RETRIES = {on_error.retry}")
2386
  self._emit(f'TD_FALLBACK = "{on_error.fallback}"')
2387
  self._emit(f"TD_NOTIFY_ON_ERROR = {on_error.notify}")
@@ -2413,10 +2701,10 @@ DO NOT EDIT — regenerate from the .td file instead.
2413
  self._indent -= 1
2414
  self._emit('elif TD_FALLBACK == "snapshot_and_stop":')
2415
  self._indent += 1
2416
- self._emit('print(f"[td_lang] OOM saving snapshot and stopping.")')
2417
  self._emit("if TD_NOTIFY_ON_ERROR:")
2418
  self._indent += 1
2419
- self._emit('td_notify(f"OOM on {step_name} snapshot saved, stopping.")')
2420
  self._indent -= 1
2421
  self._emit("raise")
2422
  self._indent -= 2
@@ -2428,7 +2716,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2428
  self._indent += 1
2429
  self._emit("if TD_NOTIFY_ON_ERROR:")
2430
  self._indent += 1
2431
- self._emit('td_notify(f"FAILED: {step_name} after {TD_MAX_RETRIES} retries {e}")')
2432
  self._indent -= 1
2433
  self._emit('if TD_FALLBACK == "skip":')
2434
  self._indent += 1
@@ -2441,19 +2729,19 @@ DO NOT EDIT — regenerate from the .td file instead.
2441
  self._emit("")
2442
 
2443
  def _emit_notify(self, cmd: NotifyCmd, program: TDProgram) -> None:
2444
- """NOTIFY send message via ntfy.sh."""
2445
  msg = cmd.message.replace('"', '\\"')
2446
  self._emit(f'td_notify("{msg}")')
2447
 
2448
  def _emit_save(self, cmd: SaveCmd, program: TDProgram) -> None:
2449
- """SAVE upload model to cloud storage via rclone.
2450
 
2451
  Uses rclone to copy model checkpoint/adapters to Google Drive or any remote.
2452
  """
2453
  alias = cmd.target
2454
  dest = cmd.destination
2455
 
2456
- self._emit(f'print("[td_lang] SAVE uploading {alias} to {dest}")')
2457
  self._emit("")
2458
 
2459
  # Find the model's checkpoint directory
@@ -2484,7 +2772,7 @@ DO NOT EDIT — regenerate from the .td file instead.
2484
  self._indent += 1
2485
  self._emit("import subprocess as _sp")
2486
  self._emit("_sp.check_call(_rclone_cmd)")
2487
- self._emit(f'print("[td_lang] SAVE complete {alias} uploaded to {dest}")')
2488
  self._emit(f'td_notify("Model {alias} saved to {dest}")')
2489
  self._indent -= 1
2490
  self._emit("except FileNotFoundError:")
@@ -2515,6 +2803,93 @@ DO NOT EDIT — regenerate from the .td file instead.
2515
  self._indent -= 1
2516
  self._emit("})")
2517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2518
  # ---------------------------------------------------------------- Budget + summary
2519
  def _emit_budget_check(self, program: TDProgram) -> None:
2520
  budget = program.budget or BudgetBlock()
@@ -2585,6 +2960,11 @@ DO NOT EDIT — regenerate from the .td file instead.
2585
  est_gpu += 0.05 # mostly disk I/O + hashing
2586
  elif isinstance(cmd, ReportCmd):
2587
  est_gpu += 0.01 # just JSON output
 
 
 
 
 
2588
 
2589
  est_cost = est_gpu * self.GPU_HOURLY
2590
 
 
1
  """
2
+ TD Lang Compiler - turns a TDProgram AST into readable Python code that calls td_fuse.
3
 
4
  Phase 1 commands: load, merge, heal, eval, commit.
5
  Phase 2 commands: synth, train, debate, diagnose.
 
38
  ResetCmd,
39
  RewardContractBlock,
40
  SaveCmd,
41
+ ScheduleCmd,
42
  SetupBlock,
43
  SnapshotCmd,
44
  SynthCmd,
 
47
  )
48
  from .errors import TDCompileError
49
 
50
+ # All command types are now implemented (Phase 1 + 2 + 3 + ... + 9)
51
 
52
 
53
  class TDCompiler:
 
87
  elif isinstance(cmd, MergeCmd):
88
  if cmd.target not in seen:
89
  raise TDCompileError(
90
+ f"Can't merge into '{cmd.target}' - it hasn't been loaded yet.",
91
  hint=f'Add: load "{cmd.source}" as {cmd.target}',
92
  )
93
  elif isinstance(cmd, (HealCmd, EvalCmd, CommitCmd)):
94
  if cmd.target not in seen:
95
  raise TDCompileError(
96
+ f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
97
  hint=f'Add: load "model/path" as {cmd.target}',
98
  )
99
  elif isinstance(cmd, (SynthCmd, TrainCmd, DebateCmd, DiagnoseCmd)):
100
  if cmd.target not in seen:
101
  raise TDCompileError(
102
+ f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
103
  hint=f'Add: load "model/path" as {cmd.target}',
104
  )
105
  elif isinstance(cmd, ForkCmd):
106
  if cmd.source not in seen:
107
  raise TDCompileError(
108
+ f"Can't fork '{cmd.source}' - it hasn't been loaded yet.",
109
  hint=f'Add: load "model/path" as {cmd.source}',
110
  )
111
  if cmd.alias in seen:
 
116
  elif isinstance(cmd, (ResetCmd, PruneCmd, EditCmd)):
117
  if cmd.target not in seen:
118
  raise TDCompileError(
119
+ f"Can't use '{cmd.target}' - it hasn't been loaded yet.",
120
  hint=f'Add: load "model/path" as {cmd.target}',
121
  )
122
  elif isinstance(cmd, SnapshotCmd):
123
  if cmd.target not in seen:
124
  raise TDCompileError(
125
+ f"Can't snapshot '{cmd.target}' - it hasn't been loaded yet.",
126
  hint=f'Add: load "model/path" as {cmd.target}',
127
  )
128
  elif isinstance(cmd, ReportCmd):
129
+ pass # report has no target - always valid
130
  elif isinstance(cmd, FuseCmd):
131
  if cmd.target not in seen:
132
  raise TDCompileError(
133
+ f"Can't fuse into '{cmd.target}' - it hasn't been loaded yet.",
134
  hint=f'Add: load "model/path" as {cmd.target}',
135
  )
136
  if len(cmd.sources) < 1:
 
141
  elif isinstance(cmd, AbsorbCmd):
142
  if cmd.target not in seen:
143
  raise TDCompileError(
144
+ f"Can't absorb into '{cmd.target}' - it hasn't been loaded yet.",
145
  hint=f'Add: load "model/path" as {cmd.target}',
146
  )
147
+ elif isinstance(cmd, (RepeatBlock, IfBlock, ScheduleCmd)):
148
+ pass # block commands - body validation happens at emit time
149
+ elif isinstance(cmd, (NotifyCmd, SaveCmd)):
150
+ pass # utility commands - always valid
151
 
152
  # ---------------------------------------------------------------- Build script
153
  def _build_script(self, program: TDProgram) -> None:
 
163
  Compiled: {timestamp}
164
  Hash: {source_hash}
165
 
166
+ DO NOT EDIT - regenerate from the .td file instead.
167
  """'''
168
  )
169
  self._emit(doc)
 
287
  self._emit_notify(cmd, program)
288
  elif isinstance(cmd, SaveCmd):
289
  self._emit_save(cmd, program)
290
+ elif isinstance(cmd, ScheduleCmd):
291
+ self._emit_schedule(cmd, program)
292
  self._emit("")
293
 
294
  self._emit_summary()
 
318
  self._indent -= 1
319
  self._emit("except ImportError:")
320
  self._indent += 1
321
+ self._emit('print("[td_lang] huggingface_hub not installed. Storing ref only - download will happen at merge time.")')
322
  self._emit("_local_path = _model_ref")
323
  self._indent -= 1
324
  self._emit("except Exception as e:")
 
388
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
389
  self._emit("if not checkpoint:")
390
  self._indent += 1
391
+ self._emit('print("[td_lang] WARNING: No checkpoint to heal - run a merge first.")')
392
  self._indent -= 1
393
  self._emit("else:")
394
  self._indent += 1
 
407
  self._indent -= 1
408
 
409
  def _emit_eval(self, cmd: EvalCmd) -> None:
410
+ """Generate self-contained evaluation - math, code, reasoning, perplexity.
411
+
412
+ No dependency on td_fuse. Tests the model on real tasks and returns
413
+ pass/fail plus scores per category. Uses 'improved' flag to track
414
+ whether the model got better vs previous eval.
415
+ """
416
  self._emit(f'print("[td_lang] Evaluating {cmd.target}...")')
417
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
418
  self._emit("if not checkpoint:")
419
  self._indent += 1
420
+ self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
421
  self._indent -= 1
 
 
422
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
423
+ self._emit("import torch, re, ast")
424
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
425
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
426
  self._indent += 1
427
  self._emit('checkpoint, torch_dtype=torch.bfloat16, device_map="auto"')
428
  self._indent -= 1
429
  self._emit(")")
430
+ self._emit("model.eval()")
431
+ self._emit("")
432
+ self._emit("# Mini-benchmark: math, code, reasoning, perplexity")
433
+ self._emit("eval_tests = {")
434
+ self._indent += 1
435
+ self._emit('"math": [')
436
  self._indent += 1
437
+ self._emit('{"prompt": "What is 17 * 23? Answer with just the number.", "answer": "391"},')
438
+ self._emit('{"prompt": "What is 144 / 12? Answer with just the number.", "answer": "12"},')
439
+ self._emit('{"prompt": "What is 256 + 789? Answer with just the number.", "answer": "1045"},')
440
+ self._emit('{"prompt": "What is 15 squared? Answer with just the number.", "answer": "225"},')
441
+ self._emit('{"prompt": "What is the square root of 81? Answer with just the number.", "answer": "9"},')
442
  self._indent -= 1
443
+ self._emit("],")
444
+ self._emit('"code": [')
445
+ self._indent += 1
446
+ self._emit('{"prompt": "Write a Python function that returns the sum of a list. Just the function, nothing else.", "check": "def"},')
447
+ self._emit('{"prompt": "Write a Python function to check if a number is prime. Just the function.", "check": "def"},')
448
+ self._emit('{"prompt": "Write a Python one-liner list comprehension that squares numbers 1-10.", "check": "["},')
449
+ self._indent -= 1
450
+ self._emit("],")
451
+ self._emit('"reasoning": [')
452
+ self._indent += 1
453
+ self._emit('{"prompt": "If all dogs are animals, and all animals breathe, do all dogs breathe? Answer yes or no.", "answer": "yes"},')
454
+ self._emit('{"prompt": "A bat and ball cost $1.10 together. The bat costs $1 more than the ball. How much does the ball cost? Answer with just the number.", "answer": "0.05"},')
455
+ self._emit('{"prompt": "If it takes 5 machines 5 minutes to make 5 widgets, how long would it take 100 machines to make 100 widgets? Answer in minutes.", "answer": "5"},')
456
+ self._indent -= 1
457
+ self._emit("],")
458
+ self._indent -= 1
459
+ self._emit("}")
460
+ self._emit("")
461
+ self._emit("eval_result = {'overall': True, 'scores': {}, 'details': {}}")
462
+ self._emit("total_correct = 0")
463
+ self._emit("total_tests = 0")
464
+ self._emit("")
465
+ self._emit("for category, tests in eval_tests.items():")
466
+ self._indent += 1
467
+ self._emit("cat_correct = 0")
468
+ self._emit("cat_details = []")
469
+ self._emit("for test in tests:")
470
+ self._indent += 1
471
+ self._emit("total_tests += 1")
472
+ self._emit('inputs = tok(test["prompt"], return_tensors="pt").to(model.device)')
473
+ self._emit("with torch.no_grad():")
474
+ self._indent += 1
475
+ self._emit("output = model.generate(**inputs, max_new_tokens=256, do_sample=False, temperature=0.0)")
476
+ self._indent -= 1
477
+ self._emit("response = tok.decode(output[0], skip_special_tokens=True)")
478
+ self._emit('# Strip the prompt from the response if model echoes it')
479
+ self._emit('if response.startswith(test["prompt"]):')
480
+ self._indent += 1
481
+ self._emit('response = response[len(test["prompt"]):].strip()')
482
+ self._indent -= 1
483
+ self._emit("passed = False")
484
+ self._emit('if "answer" in test:')
485
+ self._indent += 1
486
+ self._emit('passed = test["answer"].lower() in response.lower()')
487
+ self._indent -= 1
488
+ self._emit('elif "check" in test:')
489
+ self._indent += 1
490
+ self._emit('passed = test["check"] in response')
491
+ self._emit("# Also try to parse as valid Python")
492
+ self._emit("try:")
493
+ self._indent += 1
494
+ self._emit("ast.parse(response)")
495
+ self._indent -= 1
496
+ self._emit("except SyntaxError:")
497
+ self._indent += 1
498
+ self._emit("passed = False # Code doesn't compile")
499
+ self._indent -= 2
500
+ self._emit("if passed:")
501
+ self._indent += 1
502
+ self._emit("cat_correct += 1")
503
+ self._emit("total_correct += 1")
504
+ self._indent -= 1
505
+ self._emit('cat_details.append({"prompt": test["prompt"][:60], "passed": passed})')
506
+ self._indent -= 1
507
+ self._emit("score = cat_correct / max(len(tests), 1)")
508
+ self._emit('eval_result["scores"][category] = round(score, 3)')
509
+ self._emit('eval_result["details"][category] = cat_details')
510
+ self._emit('print(f" {category}: {cat_correct}/{len(tests)} ({score:.0%})")')
511
+ self._indent -= 1
512
+ self._emit("")
513
+ self._emit("# Perplexity test (lower = model is more confident/coherent)")
514
+ self._emit('ppl_text = "The capital of France is Paris. Water boils at 100 degrees Celsius."')
515
+ self._emit('ppl_inputs = tok(ppl_text, return_tensors="pt").to(model.device)')
516
+ self._emit("with torch.no_grad():")
517
+ self._indent += 1
518
+ self._emit('ppl_loss = model(**ppl_inputs, labels=ppl_inputs["input_ids"]).loss')
519
+ self._indent -= 1
520
+ self._emit("perplexity = torch.exp(ppl_loss).item()")
521
+ self._emit('eval_result["perplexity"] = round(perplexity, 2)')
522
+ self._emit('eval_result["scores"]["perplexity"] = "pass" if perplexity < 20.0 else "fail"')
523
+ self._emit('_ppl_label = "pass" if perplexity < 20.0 else "FAIL - too high"')
524
+ self._emit('print(f" perplexity: {perplexity:.2f} ({_ppl_label})")')
525
+ self._emit("")
526
+ self._emit("# Overall score")
527
+ self._emit("overall_score = total_correct / max(total_tests, 1)")
528
+ self._emit('eval_result["overall_score"] = round(overall_score, 3)')
529
+ self._emit('eval_result["overall"] = overall_score >= 0.5 and perplexity < 20.0')
530
+ self._emit('_overall_label = "PASS" if eval_result["overall"] else "FAIL"')
531
+ self._emit('print(f" OVERALL: {total_correct}/{total_tests} ({overall_score:.0%}) - {_overall_label}")')
532
+ self._emit("")
533
+ self._emit("# Track improvement over previous eval")
534
  self._emit(f'hist_key = "{cmd.target}_eval_history"')
535
  self._emit("if hist_key not in results:")
536
  self._indent += 1
537
  self._emit("results[hist_key] = []")
538
  self._indent -= 1
539
+ self._emit("results[hist_key].append(overall_score)")
540
+ self._emit('eval_result["improved"] = len(results[hist_key]) < 2 or results[hist_key][-1] >= results[hist_key][-2]')
541
+ self._emit(f'results["{cmd.target}_eval"] = eval_result')
542
  self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
543
  self._indent += 1
544
  self._emit('"op": "eval",')
545
  self._emit('"timestamp": datetime.now().isoformat(),')
546
+ self._emit('"overall_score": overall_score,')
547
+ self._emit('"perplexity": perplexity,')
548
  self._indent -= 1
549
  self._emit("})")
550
  if cmd.output:
 
559
  self._emit('print("[td_lang] Eval results:", json.dumps(eval_result, indent=2, default=str))')
560
  self._emit("del model, tok")
561
  self._emit("import gc; gc.collect()")
 
562
 
563
  def _emit_commit(self, cmd: CommitCmd, global_gates: Optional[GateBlock]) -> None:
564
  gates = cmd.gates or (global_gates.must_pass if global_gates else None)
 
592
  self._indent -= 1
593
  self._emit("if failed:")
594
  self._indent += 1
595
+ self._emit('raise TDGateError(failed, message="Commit blocked - gates failed")')
596
  self._indent -= 1
597
  self._emit("else:")
598
  self._indent += 1
 
631
  self._emit(f'checkpoint = models.get("{cmd.target}", {{}}).get("checkpoint")')
632
  self._emit("if not checkpoint:")
633
  self._indent += 1
634
+ self._emit('print("[td_lang] WARNING: No checkpoint - using model_ref instead.")')
635
  self._emit(f'checkpoint = models["{cmd.target}"]["model_ref"]')
636
  self._indent -= 1
637
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
 
667
  self._emit('print(f" Response: {response[:200]}...")')
668
  self._emit("print()")
669
  self._indent -= 1
670
+ self._emit("")
671
+ self._emit("# Parse responses into structured weakness categories")
672
+ self._emit("import re as _re")
673
+ self._emit("weakness_categories = {")
674
+ self._indent += 1
675
+ self._emit("'math': ['math', 'arithmetic', 'calculation', 'algebra', 'geometry', 'calculus'],")
676
+ self._emit("'code': ['code', 'coding', 'programming', 'debug', 'syntax', 'algorithm'],")
677
+ self._emit("'logic': ['logic', 'reasoning', 'inference', 'fallac', 'deduction', 'chain'],")
678
+ self._emit("'factual': ['factual', 'hallucin', 'accuracy', 'knowledge', 'recall', 'memory'],")
679
+ self._emit("'creativity': ['creative', 'creativity', 'imagination', 'novel', 'original'],")
680
+ self._emit("'instruction': ['instruction', 'follow', 'format', 'comply', 'understand'],")
681
+ self._indent -= 1
682
+ self._emit("}")
683
+ self._emit("")
684
+ self._emit("weakness_scores = {cat: 0 for cat in weakness_categories}")
685
+ self._emit("for d in diagnose_results:")
686
+ self._indent += 1
687
+ self._emit("resp_lower = d['response'].lower()")
688
+ self._emit("for cat, keywords in weakness_categories.items():")
689
+ self._indent += 1
690
+ self._emit("for kw in keywords:")
691
+ self._indent += 1
692
+ self._emit("if kw in resp_lower:")
693
+ self._indent += 1
694
+ self._emit("weakness_scores[cat] += 1")
695
+ self._emit("break")
696
+ self._indent -= 3
697
+ self._indent -= 1
698
+ self._emit("")
699
+ self._emit("# Rank weaknesses by how many prompts mentioned them")
700
+ self._emit("ranked = sorted(weakness_scores.items(), key=lambda x: x[1], reverse=True)")
701
+ self._emit("top_weaknesses = [cat for cat, score in ranked if score > 0][:4]")
702
+ self._emit("if not top_weaknesses:")
703
+ self._indent += 1
704
+ self._emit("top_weaknesses = ['math', 'logic', 'code'] # safe defaults")
705
+ self._indent -= 1
706
+ self._emit("")
707
+ self._emit("diagnosis = {")
708
+ self._indent += 1
709
+ self._emit("'raw_responses': diagnose_results,")
710
+ self._emit("'weakness_scores': weakness_scores,")
711
+ self._emit("'top_weaknesses': top_weaknesses,")
712
+ self._emit("'ranked': ranked,")
713
+ self._indent -= 1
714
+ self._emit("}")
715
+ self._emit("print('[td_lang] Weakness ranking:')")
716
+ self._emit("for cat, score in ranked:")
717
+ self._indent += 1
718
+ self._emit("if score > 0:")
719
+ self._indent += 1
720
+ self._emit("print(f' {cat}: mentioned in {score}/{len(diag_prompts)} prompts')")
721
+ self._indent -= 2
722
+ self._emit("print(f'[td_lang] Top weaknesses to target: {top_weaknesses}')")
723
+ self._emit("")
724
+ self._emit(f'results["{cmd.target}_diagnose"] = diagnosis')
725
  self._emit(f'lineage["{cmd.target}"]["operations"].append({{')
726
  self._indent += 1
727
  self._emit('"op": "diagnose",')
728
  self._emit('"n_prompts": len(diag_prompts),')
729
+ self._emit('"top_weaknesses": top_weaknesses,')
730
  self._emit('"timestamp": datetime.now().isoformat(),')
731
  self._indent -= 1
732
  self._emit("})")
 
735
  self._emit("diag_path.parent.mkdir(parents=True, exist_ok=True)")
736
  self._emit('with open(diag_path, "w") as f:')
737
  self._indent += 1
738
+ self._emit("json.dump(diagnosis, f, indent=2, default=str)")
739
  self._indent -= 1
740
  self._emit('print(f"[td_lang] Diagnosis saved to {diag_path}")')
741
  self._emit("del model, tok")
 
767
  self._emit(")")
768
  self._emit("model.eval()")
769
  self._emit("")
770
+ self._emit("# Use structured diagnosis if available (upgraded diagnose outputs top_weaknesses)")
771
+ self._emit(f'diag = results.get("{cmd.target}_diagnose", {{}})')
772
+ self._emit("if isinstance(diag, dict) and 'top_weaknesses' in diag:")
773
+ self._indent += 1
774
+ self._emit("weak_topics = diag['top_weaknesses']")
775
+ self._emit("print(f'[td_lang] Targeting weaknesses from diagnosis: {weak_topics}')")
776
+ self._indent -= 1
777
+ self._emit("else:")
778
+ self._indent += 1
779
+ self._emit("# Fallback: scan raw responses for weakness keywords")
780
  self._emit("weak_topics = []")
781
+ self._emit("raw = diag if isinstance(diag, list) else diag.get('raw_responses', [])")
782
+ self._emit("for d in raw:")
783
  self._indent += 1
784
  self._emit("resp = d.get('response', '')")
785
+ self._emit("for topic in ['math', 'code', 'logic', 'factual']:")
786
  self._indent += 1
787
+ self._emit("if topic in resp.lower() and topic not in weak_topics:")
788
  self._indent += 1
789
  self._emit("weak_topics.append(topic)")
790
  self._indent -= 1
791
  self._indent -= 1
792
  self._indent -= 1
793
+ self._indent -= 1
794
  self._emit("if not weak_topics:")
795
  self._indent += 1
796
  self._emit("weak_topics = ['math', 'code', 'logic', 'factual']")
 
810
  self._indent -= 1
811
  self._emit("}")
812
  self._emit("")
813
+ self._emit("# Seed problems - model generates MORE from these (not just these 4)")
814
+ self._emit("seed_problems = {")
815
+ self._indent += 1
816
+ self._emit("'math': [")
817
+ self._indent += 1
818
+ self._emit("'Compute (17*19 - 121) / 3',")
819
+ self._emit("'Find the derivative of x^3 + 2x^2 - 5x + 7',")
820
+ self._emit("'Solve for x: 3x + 7 = 22',")
821
+ self._emit("'What is the sum of the first 20 positive integers?',")
822
+ self._emit("'A rectangle has area 48 and perimeter 28. Find its dimensions.',")
823
+ self._emit("'Calculate 15% of 240',")
824
+ self._indent -= 1
825
+ self._emit("],")
826
+ self._emit("'code': [")
827
+ self._indent += 1
828
+ self._emit("'Implement binary search in Python',")
829
+ self._emit("'Write a function to reverse a linked list',")
830
+ self._emit("'Parse a CSV file and compute column averages',")
831
+ self._emit("'Implement a LRU cache with O(1) get and put',")
832
+ self._emit("'Write a function to find all permutations of a string',")
833
+ self._emit("'Implement merge sort',")
834
+ self._indent -= 1
835
+ self._emit("],")
836
+ self._emit("'logic': [")
837
  self._indent += 1
838
+ self._emit("'If all A are B and all B are C, are all A C? Explain your reasoning.',")
839
+ self._emit("'A says B is lying. B says C is lying. C says both A and B are lying. Who is telling the truth?',")
840
+ self._emit("'Three boxes: one has gold, one has silver, one is empty. Box A says gold is in B. Box B says gold is in B. Box C says gold is not in A. Only one tells truth. Where is the gold?',")
841
+ self._emit("'If it takes 5 machines 5 minutes to make 5 widgets, how long does it take 100 machines to make 100 widgets?',")
842
+ self._indent -= 1
843
+ self._emit("],")
844
+ self._emit("'factual': [")
845
  self._indent += 1
846
+ self._emit("'Explain the difference between TCP and UDP in networking',")
847
+ self._emit("'What are the three laws of thermodynamics?',")
848
+ self._emit("'Describe how transformers work in machine learning',")
849
+ self._emit("'What causes tides on Earth?',")
850
+ self._indent -= 1
851
+ self._emit("],")
852
  self._indent -= 1
853
+ self._emit("}")
854
+ self._emit("")
855
+ self._emit("# Ask the model to generate MORE problems like the seeds")
856
+ self._emit("print('[td_lang] Generating problem bank from seeds...')")
857
+ self._emit("problem_bank = dict(seed_problems) # start with seeds")
858
+ self._emit("for domain in weak_topics:")
859
+ self._indent += 1
860
+ self._emit("if domain not in seed_problems:")
861
+ self._indent += 1
862
+ self._emit("continue")
863
+ self._indent -= 1
864
+ self._emit("examples = '; '.join(seed_problems.get(domain, [])[:3])")
865
+ self._emit("gen_prompt = f'Generate 10 diverse {domain} problems similar to: {examples}. List them numbered 1-10, one per line.'")
866
+ self._emit('gen_inputs = tok(gen_prompt, return_tensors="pt").to(model.device)')
867
+ self._emit("with torch.no_grad():")
868
  self._indent += 1
869
+ self._emit("gen_out = model.generate(**gen_inputs, max_new_tokens=512, do_sample=True, temperature=0.9)")
870
  self._indent -= 1
871
+ self._emit("gen_text = tok.decode(gen_out[0], skip_special_tokens=True)")
872
+ self._emit("# Parse numbered lines as new problems")
873
+ self._emit("for line in gen_text.split(chr(10)):")
874
  self._indent += 1
875
+ self._emit("line = re.sub(r'^\\d+[.)\\s]+', '', line.strip())")
876
+ self._emit("if len(line) > 15:")
877
+ self._indent += 1
878
+ self._emit("problem_bank.setdefault(domain, []).append(line)")
879
+ self._indent -= 2
880
  self._indent -= 1
881
+ self._emit("total_problems = sum(len(v) for v in problem_bank.values())")
882
+ self._emit("print(f'[td_lang] Problem bank: {total_problems} problems across {len(problem_bank)} domains')")
883
+ self._emit("")
884
+ self._emit("def make_problem(domain: str) -> str:")
885
+ self._indent += 1
886
+ self._emit("pool = problem_bank.get(domain, problem_bank.get('math', ['Solve 2+2']))")
887
+ self._emit("return random.choice(pool)")
888
  self._indent -= 1
889
  self._emit("")
890
  self._emit("synth_data = []")
 
971
  self._emit("")
972
 
973
  if cmd.method == "grpo":
974
+ self._emit("# GRPO training with QLoRA (test_15: 64 steps sweet spot)")
975
+ self._emit("# QLoRA = 4-bit base model + LoRA adapters = fits on 24GB 4090")
976
  self._emit("from trl import GRPOConfig, GRPOTrainer")
977
+ self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig")
978
+ self._emit("from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training")
979
  self._emit("from datasets import load_dataset")
980
  self._emit("import torch")
981
  self._emit("")
982
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
983
+ self._emit("if tok.pad_token is None:")
984
+ self._indent += 1
985
+ self._emit("tok.pad_token = tok.eos_token")
986
+ self._indent -= 1
987
+ self._emit("")
988
+ self._emit("# 4-bit quantization - shrinks 7B model from 14GB to ~4GB VRAM")
989
+ self._emit("bnb_config = BitsAndBytesConfig(")
990
+ self._indent += 1
991
+ self._emit("load_in_4bit=True,")
992
+ self._emit('bnb_4bit_quant_type="nf4",')
993
+ self._emit("bnb_4bit_compute_dtype=torch.bfloat16,")
994
+ self._emit("bnb_4bit_use_double_quant=True,")
995
+ self._indent -= 1
996
+ self._emit(")")
997
+ self._emit("")
998
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
999
  self._indent += 1
1000
+ self._emit("checkpoint,")
1001
+ self._emit("quantization_config=bnb_config,")
1002
+ self._emit('device_map="auto",')
1003
  self._indent -= 1
1004
  self._emit(")")
1005
+ self._emit("model = prepare_model_for_kbit_training(model)")
1006
+ self._emit("")
1007
+ self._emit("# LoRA adapters on mid-to-late layers (test_12: layers 16-28 for 32-layer)")
1008
+ self._emit("lora_config = LoraConfig(")
1009
+ self._indent += 1
1010
+ self._emit("r=32,")
1011
+ self._emit("lora_alpha=64,")
1012
+ self._emit("lora_dropout=0.05,")
1013
+ self._emit('target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],')
1014
+ self._emit('task_type="CAUSAL_LM",')
1015
+ self._indent -= 1
1016
+ self._emit(")")
1017
+ self._emit("model = get_peft_model(model, lora_config)")
1018
+ self._emit("model.print_trainable_parameters() # Shows ~1-2% trainable vs total")
1019
  self._emit("")
1020
  self._emit(f'# Load training data')
1021
  self._emit(f'dataset_path = "{cmd.dataset}"')
 
1038
  self._emit('output_dir="td_lang_outputs/grpo_training",')
1039
  self._emit("save_steps=16,")
1040
  self._emit('bf16=True,')
1041
+ self._emit("gradient_checkpointing=True, # saves VRAM at slight speed cost")
1042
  self._indent -= 1
1043
  self._emit(")")
1044
  self._emit("")
 
1142
  self._emit("if logs['kl'] > 3.1 * ma:")
1143
  self._indent += 1
1144
  self._emit("control.should_training_stop = True")
1145
+ self._emit("print('[td_lang][early_stop] KL spike detected - stopping GRPO')")
1146
  self._indent -= 2
1147
  self._indent -= 1
1148
  self._emit("if 'eval/reward' in logs:")
 
1151
  self._emit("if len(self.eval_rewards) >= 2 and self.eval_rewards[-1] < self.eval_rewards[-2]:")
1152
  self._indent += 1
1153
  self._emit("control.should_training_stop = True")
1154
+ self._emit("print('[td_lang][early_stop] Validation reward drop - stopping GRPO')")
1155
  self._indent -= 1
1156
  self._indent -= 1
1157
  self._emit("if 'policy_entropy' in logs:")
 
1163
  self._emit("if self.entropy_history[-1] < 0.93 * baseline:")
1164
  self._indent += 1
1165
  self._emit("control.should_training_stop = True")
1166
+ self._emit("print('[td_lang][early_stop] Diversity collapsed - stopping GRPO')")
1167
  self._indent -= 2
1168
  self._indent -= 2
1169
  self._indent -= 1
 
1182
  self._emit(f'models["{cmd.target}"]["checkpoint"] = "td_lang_outputs/grpo_trained"')
1183
 
1184
  elif cmd.method in ("sft", "dpo"):
1185
+ self._emit(f"# {cmd.method.upper()} training with QLoRA (fits on 24GB 4090)")
1186
+ self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig")
1187
+ self._emit("from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training")
1188
  if cmd.method == "sft":
1189
  self._emit("from trl import SFTTrainer")
1190
  else:
 
1193
  self._emit("import torch")
1194
  self._emit("")
1195
  self._emit("tok = AutoTokenizer.from_pretrained(checkpoint)")
1196
+ self._emit("if tok.pad_token is None:")
1197
+ self._indent += 1
1198
+ self._emit("tok.pad_token = tok.eos_token")
1199
+ self._indent -= 1
1200
+ self._emit("")
1201
+ self._emit("bnb_config = BitsAndBytesConfig(")
1202
+ self._indent += 1
1203
+ self._emit("load_in_4bit=True,")
1204
+ self._emit('bnb_4bit_quant_type="nf4",')
1205
+ self._emit("bnb_4bit_compute_dtype=torch.bfloat16,")
1206
+ self._emit("bnb_4bit_use_double_quant=True,")
1207
+ self._indent -= 1
1208
+ self._emit(")")
1209
  self._emit("model = AutoModelForCausalLM.from_pretrained(")
1210
  self._indent += 1
1211
+ self._emit("checkpoint, quantization_config=bnb_config, device_map='auto',")
1212
  self._indent -= 1
1213
  self._emit(")")
1214
+ self._emit("model = prepare_model_for_kbit_training(model)")
1215
+ self._emit("lora_config = LoraConfig(r=32, lora_alpha=64, lora_dropout=0.05,")
1216
+ self._emit(' target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],')
1217
+ self._emit(' task_type="CAUSAL_LM")')
1218
+ self._emit("model = get_peft_model(model, lora_config)")
1219
  self._emit(f'dataset_path = "{cmd.dataset}"')
1220
  self._emit("if dataset_path.endswith('.jsonl'):")
1221
  self._indent += 1
 
1426
  # ---------------------------------------------------------------- Phase 3 emitters
1427
 
1428
  def _emit_edit(self, cmd: EditCmd) -> None:
1429
+ """EDIT - surgical LoRA/DoRA on specific layers.
1430
 
1431
  From test_18: all 3 AIs agree LoRA is safe default, DoRA beats by 1-4%.
1432
  layers_to_transform supports targeting specific layers (e.g., 16-28).
 
1437
  layers = cmd.layers # "all", "16-28", or single number
1438
  lr = cmd.learning_rate or 1e-4
1439
 
1440
+ self._emit(f'print("[td_lang] EDIT - surgical {method} on {alias}, layers={layers}")')
1441
  self._emit("from transformers import AutoModelForCausalLM, AutoTokenizer")
1442
  self._emit("import torch")
1443
  self._emit("from peft import LoraConfig, get_peft_model, PeftModel")
 
1493
  self._emit("")
1494
 
1495
  # Apply adapter
1496
+ self._emit("# Inject adapter - base weights stay frozen")
1497
  self._emit("model = get_peft_model(model, edit_config)")
1498
  self._emit("model.print_trainable_parameters()")
1499
  self._emit("")
 
1512
  self._indent -= 1
1513
  self._emit("")
1514
 
1515
+ # "Try before buy" - actual eval with adapters on vs off
1516
  self._emit('sample_prompts = ["What is 7+8?", "Explain photosynthesis in one paragraph.", "Write a Python function fib(n)."]')
1517
  self._emit("def run_quick_eval(enable_adapters: bool):")
1518
  self._indent += 1
 
1552
  self._indent -= 1
1553
  self._emit("")
1554
 
1555
+ # Save adapter (don't merge yet - let commit/gates decide)
1556
  self._emit(f'edit_save_dir = os.path.join(output_dir, "{alias}_edit_{method}")')
1557
  self._emit("os.makedirs(edit_save_dir, exist_ok=True)")
1558
  self._emit("model.save_pretrained(edit_save_dir)")
1559
  self._emit(f'print(f"[td_lang] EDIT adapter saved to {{edit_save_dir}}")')
1560
+ self._emit(f'print("[td_lang] Adapter NOT merged - use commit with gates to merge permanently")')
1561
  self._emit("")
1562
 
1563
  # Update models dict
1564
  self._emit(f'models["{alias}"] = model')
1565
 
1566
  def _emit_fork(self, cmd: ForkCmd) -> None:
1567
+ """FORK - branch current model weights for parallel experiments.
1568
 
1569
  From test_18: all 3 AIs say disk-based only on 4090.
1570
  Cheap fork = copy manifest + adapter files, share base weights.
 
1573
  source = cmd.source
1574
  alias = cmd.alias
1575
 
1576
+ self._emit(f'print("[td_lang] FORK - branching {source} as {alias}")')
1577
  self._emit(f'source_model = models["{source}"]')
1578
  self._emit("import torch")
1579
  self._emit("")
 
1586
  self._emit("")
1587
 
1588
  # Write manifest
1589
+ self._emit("# Write fork manifest - tracks lineage")
1590
  self._emit("import json")
1591
  self._emit("fork_manifest = {")
1592
  self._emit(f' "fork_name": "{alias}",')
 
1601
  self._emit("is_peft = hasattr(source_model, 'peft_config')")
1602
  self._emit("if is_peft:")
1603
  self._indent += 1
1604
+ self._emit("# PEFT model - save only adapter weights (small, fast)")
1605
  self._emit('adapter_dir = os.path.join(fork_dir, "adapters")')
1606
  self._emit("source_model.save_pretrained(adapter_dir)")
1607
  self._emit('fork_manifest["fork_type"] = "adapter"')
 
1610
  self._indent -= 1
1611
  self._emit("else:")
1612
  self._indent += 1
1613
+ self._emit("# Full model - clone tensors then save to safetensors")
1614
  self._emit("from safetensors.torch import save_file")
1615
  self._emit("state = {k: v.detach().cpu().clone() for k, v in source_model.state_dict().items()}")
1616
  self._emit('ckpt_path = os.path.join(fork_dir, "model.safetensors")')
 
1650
  self._emit(f'lineage["{alias}"] = {{"forked_from": "{source}", "operations": []}}')
1651
 
1652
  def _emit_reset(self, cmd: ResetCmd) -> None:
1653
+ """RESET - revert model to a previous checkpoint.
1654
 
1655
  From test_18: del model, clear CUDA cache, reload.
1656
  Must also reset optimizer state. Use assign=True to avoid doubling VRAM.
 
1658
  alias = cmd.target
1659
  checkpoint = cmd.checkpoint
1660
 
1661
+ self._emit(f'print("[td_lang] RESET - reverting {alias} to {checkpoint}")')
1662
  self._emit("")
1663
 
1664
  # Delete current model and clear CUDA
 
1683
  self._emit("")
1684
  self._emit("if fork_manifest_path and os.path.exists(fork_manifest_path):")
1685
  self._indent += 1
1686
+ self._emit("# Loading from a fork - read manifest")
1687
  self._emit("import json")
1688
  self._emit("with open(fork_manifest_path) as f:")
1689
  self._indent += 1
 
1707
  self._emit("# Loading from a safetensors file")
1708
  self._emit("from safetensors.torch import load_file")
1709
  self._emit("state = load_file(ckpt_path, device='cpu')")
1710
+ self._emit("# Need base model architecture - reload from original")
1711
  self._emit(f'base_ref = models.get("__base_ref_{alias}", ckpt_path)')
1712
  self._emit("model = AutoModelForCausalLM.from_pretrained(base_ref, torch_dtype=torch.float16, device_map='cuda')")
1713
  self._emit("try:")
 
1724
 
1725
  # Re-register in models dict
1726
  self._emit(f'models["{alias}"] = model')
1727
+ self._emit(f'print(f"[td_lang] RESET complete - {alias} restored from {checkpoint}")')
1728
  self._emit("")
1729
 
1730
  # Optimizer/cache handling and quick smoke eval
 
1746
  self._indent -= 1
1747
 
1748
  def _emit_prune(self, cmd: PruneCmd) -> None:
1749
+ """PRUNE - structural pruning of language backbone.
1750
 
1751
  From test_18: 20% structured max (LLM-Pruner). Wanda metric (Grok).
1752
  Language backbone only, never vision encoder. Recovery: 200-800 steps LoRA.
 
1756
  aggressiveness = cmd.aggressiveness
1757
 
1758
  self._emit("import torch")
1759
+ self._emit(f'print("[td_lang] PRUNE - {method} pruning on {alias}, {aggressiveness*100:.0f}% removal")')
1760
  self._emit(f'model = models["{alias}"]')
1761
  self._emit("")
1762
 
 
1770
  self._emit("")
1771
 
1772
  # Identify language-only layers (skip vision)
1773
+ self._emit("# Target language backbone ONLY - never prune vision encoder")
1774
  self._emit("# Filter for language model linear layers")
1775
  self._emit("target_modules = []")
1776
  self._emit("for name, module in model.named_modules():")
 
1884
  self._indent -= 1
1885
  self._indent -= 1
1886
  else: # taylor
1887
+ self._emit("# Taylor: gradient-based importance (needs backprop - VRAM heavy)")
1888
+ self._emit("# Falling back to magnitude as MVP - Taylor needs calibration + backprop")
1889
  self._emit(f'print("[td_lang] WARNING: Taylor pruning falls back to magnitude on single GPU")')
1890
  self._emit("import torch.nn.utils.prune as prune")
1891
  self._emit("")
 
1937
  # ---------------------------------------------------------------- Phase 7: Loop Control emitters
1938
 
1939
  def _emit_cmd(self, cmd, program: TDProgram) -> None:
1940
+ """Emit a single command - used by repeat/if to emit body commands."""
1941
  if isinstance(cmd, LoadCmd):
1942
  self._emit_load(cmd)
1943
  elif isinstance(cmd, MergeCmd):
 
1980
  self._emit_repeat(cmd, program)
1981
  elif isinstance(cmd, IfBlock):
1982
  self._emit_if(cmd, program)
1983
+ elif isinstance(cmd, ScheduleCmd):
1984
+ self._emit_schedule(cmd, program)
1985
 
1986
  def _emit_repeat(self, cmd: RepeatBlock, program: TDProgram) -> None:
1987
+ """REPEAT - run a block of commands N times.
1988
 
1989
  This is the core of td_loop: the self-improvement cycle.
1990
  Each iteration runs the body commands in order.
1991
  """
1992
  n = cmd.count
1993
+ self._emit(f'print("[td_lang] REPEAT - running {n} iterations")')
1994
  self._emit(f"for _loop_iter in range({n}):")
1995
  self._indent += 1
1996
  self._emit(f'print(f"[td_lang] === Iteration {{_loop_iter + 1}}/{n} ===")')
 
2000
  self._emit("elapsed_hours = (time.time() - start_time) / 3600")
2001
  self._emit(f"if elapsed_hours >= {program.budget.max_gpu_hours}:")
2002
  self._indent += 1
2003
+ self._emit('print("[td_lang] Budget exceeded inside repeat - stopping loop.")')
2004
  self._emit("break")
2005
  self._indent -= 1
2006
  self._emit("")
 
2009
  self._emit("")
2010
  self._emit(f'print(f"[td_lang] Iteration {{_loop_iter + 1}}/{n} complete.")')
2011
  self._indent -= 1
2012
+ self._emit(f'print("[td_lang] REPEAT complete - {n} iterations done.")')
2013
 
2014
  def _emit_if(self, cmd: IfBlock, program: TDProgram) -> None:
2015
+ """IF/ELSE - conditional execution based on eval results.
2016
 
2017
  Conditions:
2018
  - eval_passed: last eval for target had no failures
 
2022
  condition = cmd.condition
2023
  target = cmd.target
2024
 
2025
+ self._emit(f'print("[td_lang] IF - checking {condition} for {target}")')
2026
  self._emit("")
2027
 
2028
  # Emit condition check
 
2065
  self._indent -= 1
2066
 
2067
  def _emit_break_if(self, cmd: BreakIfCmd) -> None:
2068
+ """BREAK_IF - early exit from repeat based on condition."""
2069
  condition = cmd.condition
2070
  target = cmd.target or ""
2071
  self._emit(f'_brk_eval = results.get("{target}_eval", {{}})')
 
2078
  self._emit(f"_brk_met = bool(results.get('{target}_{condition}', False))")
2079
  self._emit("if _brk_met:")
2080
  self._indent += 1
2081
+ self._emit('print("[td_lang] break_if triggered - exiting loop")')
2082
  self._emit("break")
2083
  self._indent -= 1
2084
 
2085
  # ---------------------------------------------------------------- Phase 6: Easy Merge emitters
2086
 
2087
  def _emit_fuse(self, cmd: FuseCmd) -> None:
2088
+ """FUSE - merge multiple models into target in one command.
2089
 
2090
  From TD merge strategy: Transport and Merge (optimal transport cross-arch merging).
2091
+ All 5 source models have different architectures - Transport and Merge handles this.
2092
  Merge into language backbone only, vision encoder stays untouched.
2093
  """
2094
  target = cmd.target
 
2097
  strategy = cmd.strategy
2098
  n = len(sources)
2099
 
2100
+ self._emit(f'print("[td_lang] FUSE - merging {n} models into {target} using {method}")')
2101
  self._emit(f'print("[td_lang] Strategy: {strategy}")')
2102
  self._emit(f"fuse_sources = {sources}")
2103
  self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
 
2113
  self._emit(f"strengths = [round(0.5 * (0.8 ** i), 3) for i in range({n})]")
2114
  self._emit('print(f"[td_lang] Sequential strategy: strengths = {strengths}")')
2115
  else:
2116
+ # weighted - default to equal if no weights specified
2117
  self._emit(f"per_model_strength = round(1.0 / ({n} + 1), 3)")
2118
  self._emit("")
2119
 
 
2202
  self._emit('"timestamp": datetime.now().isoformat(),')
2203
  self._indent -= 1
2204
  self._emit("})")
2205
+ self._emit(f'print("[td_lang] FUSE complete - {n} models merged into {target}")')
2206
 
2207
  def _emit_absorb(self, cmd: AbsorbCmd) -> None:
2208
+ """ABSORB - simplified single-model merge.
2209
 
2210
  One-liner shortcut: absorb "model" into target [strength 0.5]
2211
  Wraps the merge logic with sensible defaults.
 
2214
  target = cmd.target
2215
  strength = cmd.strength
2216
 
2217
+ self._emit(f'print("[td_lang] ABSORB - merging {source} into {target} (strength={strength})")')
2218
  self._emit(f'prev_ckpt = models.get("{target}", {{}}).get("checkpoint")')
2219
  self._emit("")
2220
 
 
2308
  self._emit('"timestamp": datetime.now().isoformat(),')
2309
  self._indent -= 1
2310
  self._emit("})")
2311
+ self._emit(f'print("[td_lang] ABSORB complete - {source} merged into {target}")')
2312
 
2313
  # ---------------------------------------------------------------- Phase 4 emitters
2314
 
2315
  def _emit_data_contract(self, dc: DataContractBlock) -> None:
2316
+ """Emit data contract validation - checked at synth/train time.
2317
 
2318
  From ForgeSpec 2.0 (test_17): data contracts enforce schema on training data.
2319
  Required fields, minimum samples, max perplexity.
 
2381
  self._emit("")
2382
 
2383
  def _emit_reward_contract(self, rc: RewardContractBlock) -> None:
2384
+ """Emit reward contract - enforced during GRPO training.
2385
 
2386
  From test_16: verified rewards only, no learned reward model.
2387
  """
 
2397
  self._emit("")
2398
 
2399
  def _emit_snapshot(self, cmd: SnapshotCmd, program: TDProgram) -> None:
2400
+ """SNAPSHOT - content-hashed model state for artifact lineage.
2401
 
2402
  From ForgeSpec 2.0 (test_17): every model state gets a content-addressed hash.
2403
  Directory contains: model weights/adapters, eval report, prune spec, manifest.
 
2405
  alias = cmd.target
2406
  output_dir = cmd.output or "td_lang_outputs/snapshots"
2407
 
2408
+ self._emit(f'print("[td_lang] SNAPSHOT - saving content-hashed state for {alias}")')
2409
  self._emit("import hashlib, json, time")
2410
  self._emit(f'snap_model = models["{alias}"]')
2411
  self._emit("")
 
2435
  self._emit("")
2436
 
2437
  # Write manifest
2438
+ self._emit("# Snapshot manifest - full provenance record")
2439
  self._emit("snap_manifest = {")
2440
  self._indent += 1
2441
  self._emit(f'"alias": "{alias}",')
 
2486
  self._emit("})")
2487
 
2488
  def _emit_report(self, cmd: ReportCmd, program: TDProgram) -> None:
2489
+ """REPORT - economics report for the run.
2490
 
2491
  Tracks GPU hours, cost, tokens, time per command.
2492
  From test_17 ForgeSpec 2.0: economics reports for cost tracking.
2493
  """
2494
  output = cmd.output or "economics_report.json"
2495
 
2496
+ self._emit('print("[td_lang] REPORT - generating economics report")')
2497
  self._emit("elapsed = time.time() - start_time")
2498
  self._emit("")
2499
  self._emit("report = {")
 
2569
  # ---------------------------------------------------------------- Phase 8: Autopilot emitters
2570
 
2571
  def _emit_setup(self, setup: SetupBlock) -> None:
2572
+ """SETUP - auto-install dependencies and configure environment.
2573
 
2574
  Runs at script start: pip install, HF token, ntfy config.
2575
  """
2576
+ self._emit("# ========== SETUP (Phase 8 - Autopilot) ==========")
2577
+ self._emit('print("[td_lang] SETUP - configuring environment...")')
2578
  self._emit("")
2579
 
2580
  # pip install
 
2593
  self._emit("except Exception as e:")
2594
  self._indent += 1
2595
  self._emit('print(f"[td_lang] WARNING: pip install failed: {e}")')
2596
+ self._emit('print("[td_lang] Continuing anyway - packages may already be installed.")')
2597
  self._indent -= 1
2598
  self._emit("")
2599
 
 
2662
  self._emit("")
2663
 
2664
  def _emit_on_error(self, on_error: OnErrorBlock, program: TDProgram) -> None:
2665
+ """ON_ERROR - wrap each step in retry/fallback logic.
2666
 
2667
  Emits a td_safe_run() helper that wraps any function call with:
2668
  - Retry N times on failure
2669
  - Fallback strategies (reduce batch, skip, snapshot+stop)
2670
  - Optional ntfy notification on error
2671
  """
2672
+ self._emit("# ========== ON_ERROR (Phase 8 - Crash Recovery) ==========")
2673
  self._emit(f"TD_MAX_RETRIES = {on_error.retry}")
2674
  self._emit(f'TD_FALLBACK = "{on_error.fallback}"')
2675
  self._emit(f"TD_NOTIFY_ON_ERROR = {on_error.notify}")
 
2701
  self._indent -= 1
2702
  self._emit('elif TD_FALLBACK == "snapshot_and_stop":')
2703
  self._indent += 1
2704
+ self._emit('print(f"[td_lang] OOM - saving snapshot and stopping.")')
2705
  self._emit("if TD_NOTIFY_ON_ERROR:")
2706
  self._indent += 1
2707
+ self._emit('td_notify(f"OOM on {step_name} - snapshot saved, stopping.")')
2708
  self._indent -= 1
2709
  self._emit("raise")
2710
  self._indent -= 2
 
2716
  self._indent += 1
2717
  self._emit("if TD_NOTIFY_ON_ERROR:")
2718
  self._indent += 1
2719
+ self._emit('td_notify(f"FAILED: {step_name} after {TD_MAX_RETRIES} retries - {e}")')
2720
  self._indent -= 1
2721
  self._emit('if TD_FALLBACK == "skip":')
2722
  self._indent += 1
 
2729
  self._emit("")
2730
 
2731
  def _emit_notify(self, cmd: NotifyCmd, program: TDProgram) -> None:
2732
+ """NOTIFY - send message via ntfy.sh."""
2733
  msg = cmd.message.replace('"', '\\"')
2734
  self._emit(f'td_notify("{msg}")')
2735
 
2736
  def _emit_save(self, cmd: SaveCmd, program: TDProgram) -> None:
2737
+ """SAVE - upload model to cloud storage via rclone.
2738
 
2739
  Uses rclone to copy model checkpoint/adapters to Google Drive or any remote.
2740
  """
2741
  alias = cmd.target
2742
  dest = cmd.destination
2743
 
2744
+ self._emit(f'print("[td_lang] SAVE - uploading {alias} to {dest}")')
2745
  self._emit("")
2746
 
2747
  # Find the model's checkpoint directory
 
2772
  self._indent += 1
2773
  self._emit("import subprocess as _sp")
2774
  self._emit("_sp.check_call(_rclone_cmd)")
2775
+ self._emit(f'print("[td_lang] SAVE complete - {alias} uploaded to {dest}")')
2776
  self._emit(f'td_notify("Model {alias} saved to {dest}")')
2777
  self._indent -= 1
2778
  self._emit("except FileNotFoundError:")
 
2803
  self._indent -= 1
2804
  self._emit("})")
2805
 
2806
+ # ---------------------------------------------------------------- Phase 9: Schedule
2807
+ def _emit_schedule(self, cmd: ScheduleCmd, program: TDProgram) -> None:
2808
+ """SCHEDULE - time-based command execution.
2809
+
2810
+ Patterns:
2811
+ "every 6h" → loop with time.sleep(6*3600)
2812
+ "every 30m" → loop with time.sleep(30*60)
2813
+ "at 02:00" → wait until that time, run once
2814
+ "after 30m" → sleep then run once
2815
+ """
2816
+ timing = cmd.timing.strip()
2817
+ self._emit(f'print("[td_lang] SCHEDULE - timing: {timing}")')
2818
+ self._emit("import time as _time")
2819
+ self._emit("from datetime import datetime as _dt, timedelta as _td")
2820
+ self._emit("")
2821
+
2822
+ if timing.startswith("every "):
2823
+ # Parse interval: "every 6h" or "every 30m"
2824
+ interval_str = timing[6:].strip()
2825
+ self._emit(f'_interval_str = "{interval_str}"')
2826
+ self._emit("if _interval_str.endswith('h'):")
2827
+ self._indent += 1
2828
+ self._emit("_interval_secs = int(_interval_str[:-1]) * 3600")
2829
+ self._indent -= 1
2830
+ self._emit("elif _interval_str.endswith('m'):")
2831
+ self._indent += 1
2832
+ self._emit("_interval_secs = int(_interval_str[:-1]) * 60")
2833
+ self._indent -= 1
2834
+ self._emit("else:")
2835
+ self._indent += 1
2836
+ self._emit("_interval_secs = int(_interval_str) * 3600 # default to hours")
2837
+ self._indent -= 1
2838
+ self._emit('print(f"[td_lang] Running every {_interval_secs}s ({_interval_str}). Ctrl+C to stop.")')
2839
+ self._emit("_sched_iter = 0")
2840
+ self._emit("while True:")
2841
+ self._indent += 1
2842
+ self._emit("_sched_iter += 1")
2843
+ self._emit('print(f"[td_lang] Schedule iteration {_sched_iter} starting at {_dt.now()}")')
2844
+ for body_cmd in cmd.body:
2845
+ self._emit_cmd(body_cmd, program)
2846
+ self._emit('print(f"[td_lang] Iteration {_sched_iter} done. Sleeping {_interval_secs}s...")')
2847
+ self._emit("_time.sleep(_interval_secs)")
2848
+ self._indent -= 1
2849
+
2850
+ elif timing.startswith("at "):
2851
+ # Parse time: "at 02:00"
2852
+ time_str = timing[3:].strip()
2853
+ self._emit(f'_target_time = _dt.strptime("{time_str}", "%H:%M").time()')
2854
+ self._emit("_now = _dt.now()")
2855
+ self._emit("_target = _dt.combine(_now.date(), _target_time)")
2856
+ self._emit("if _target <= _now:")
2857
+ self._indent += 1
2858
+ self._emit("_target += _td(days=1) # schedule for tomorrow if time already passed")
2859
+ self._indent -= 1
2860
+ self._emit("_wait = (_target - _now).total_seconds()")
2861
+ self._emit('print(f"[td_lang] Waiting {_wait:.0f}s until {_target}...")')
2862
+ self._emit("_time.sleep(_wait)")
2863
+ self._emit('print(f"[td_lang] Scheduled time reached: {_dt.now()}")')
2864
+ for body_cmd in cmd.body:
2865
+ self._emit_cmd(body_cmd, program)
2866
+
2867
+ elif timing.startswith("after "):
2868
+ # Parse delay: "after 30m" or "after 2h"
2869
+ delay_str = timing[6:].strip()
2870
+ self._emit(f'_delay_str = "{delay_str}"')
2871
+ self._emit("if _delay_str.endswith('h'):")
2872
+ self._indent += 1
2873
+ self._emit("_delay_secs = int(_delay_str[:-1]) * 3600")
2874
+ self._indent -= 1
2875
+ self._emit("elif _delay_str.endswith('m'):")
2876
+ self._indent += 1
2877
+ self._emit("_delay_secs = int(_delay_str[:-1]) * 60")
2878
+ self._indent -= 1
2879
+ self._emit("else:")
2880
+ self._indent += 1
2881
+ self._emit("_delay_secs = int(_delay_str) * 3600")
2882
+ self._indent -= 1
2883
+ self._emit('print(f"[td_lang] Waiting {_delay_secs}s before running...")')
2884
+ self._emit("_time.sleep(_delay_secs)")
2885
+ self._emit('print(f"[td_lang] Delay complete. Running scheduled commands...")')
2886
+ for body_cmd in cmd.body:
2887
+ self._emit_cmd(body_cmd, program)
2888
+
2889
+ else:
2890
+ self._emit(f'print("[td_lang] WARNING: Unknown schedule pattern: {timing}")')
2891
+ self._emit('print("[td_lang] Supported: every Nh/Nm, at HH:MM, after Nh/Nm")')
2892
+
2893
  # ---------------------------------------------------------------- Budget + summary
2894
  def _emit_budget_check(self, program: TDProgram) -> None:
2895
  budget = program.budget or BudgetBlock()
 
2960
  est_gpu += 0.05 # mostly disk I/O + hashing
2961
  elif isinstance(cmd, ReportCmd):
2962
  est_gpu += 0.01 # just JSON output
2963
+ elif isinstance(cmd, ScheduleCmd):
2964
+ body_est = 1.0 * len(cmd.body)
2965
+ est_gpu += body_est # at least one run
2966
+ elif isinstance(cmd, (NotifyCmd, SaveCmd)):
2967
+ est_gpu += 0.01
2968
 
2969
  est_cost = est_gpu * self.GPU_HOURLY
2970
 
hugging/td_lang/errors.py CHANGED
@@ -87,6 +87,7 @@ COMMON_FIXES = {
87
  "report": "Format: report [-> economics.json]",
88
  "fuse": 'Format: fuse ["model1", "model2"] into target [strategy equal]',
89
  "absorb": 'Format: absorb "model" into target [strength 0.5]',
 
90
  }
91
 
92
 
 
87
  "report": "Format: report [-> economics.json]",
88
  "fuse": 'Format: fuse ["model1", "model2"] into target [strategy equal]',
89
  "absorb": 'Format: absorb "model" into target [strength 0.5]',
90
+ "schedule": 'Format: schedule "every 6h" { commands... } or schedule "at 02:00" { ... }',
91
  }
92
 
93
 
hugging/td_lang/examples/demo_schedule.td ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Demo: Schedule command (Phase 9)
2
+ # Run training at specific times or on repeat
3
+
4
+ setup {
5
+ pip = [torch, transformers, peft, bitsandbytes, trl]
6
+ hf_token = env
7
+ notify = "ntfy.sh/my_ai"
8
+ }
9
+
10
+ on_error {
11
+ retry = 3
12
+ fallback = reduce_batch
13
+ notify = true
14
+ }
15
+
16
+ load "Qwen/Qwen3-VL-8B-Instruct" as base
17
+
18
+ # Run training loop every 6 hours (overnight training)
19
+ schedule "every 6h" {
20
+ diagnose base -> weaknesses.json
21
+ synth base from base filter cherry_llm -> training_data.jsonl
22
+ train base on "training_data.jsonl" using grpo steps 64 lr 5e-5
23
+ eval base -> eval_results.json
24
+ if eval_passed base {
25
+ commit base
26
+ snapshot base -> snapshots/
27
+ save base to "gdrive:TD/models/latest"
28
+ notify "Training cycle passed! Model improved."
29
+ } else {
30
+ reset base to "snapshots/"
31
+ notify "Training cycle failed. Reset to last good."
32
+ }
33
+ }
hugging/td_lang/grammar.py CHANGED
@@ -32,6 +32,7 @@ from .ast_nodes import (
32
  ResetCmd,
33
  RewardContractBlock,
34
  SaveCmd,
 
35
  SetupBlock,
36
  SnapshotCmd,
37
  SynthCmd,
@@ -78,6 +79,7 @@ TD_GRAMMAR = r"""
78
  | reward_contract_block
79
  | setup_block
80
  | on_error_block
 
81
 
82
  // ======================== PHASE 1 COMMANDS ========================
83
 
@@ -151,7 +153,7 @@ TD_GRAMMAR = r"""
151
  | fork_cmd | reset_cmd | prune_cmd | edit_cmd
152
  | fuse_cmd | absorb_cmd | snapshot_cmd | report_cmd
153
  | notify_cmd | save_cmd
154
- | repeat_block_cmd | if_block_cmd) _NL*
155
 
156
  // ======================== PHASE 6 — EASY MERGE COMMANDS ========================
157
 
@@ -224,6 +226,13 @@ TD_GRAMMAR = r"""
224
  onerr_fallback: "fallback" "=" IDENT
225
  onerr_notify: "notify" "=" IDENT
226
 
 
 
 
 
 
 
 
227
  // ======================== SHARED RULES ========================
228
 
229
  // List of names: [name1, name2, name3]
@@ -454,6 +463,11 @@ class TDTransformer(Transformer):
454
  def else_clause(self, *body_cmds) -> list:
455
  return list(body_cmds)
456
 
 
 
 
 
 
457
  # --- Phase 6: Easy Merge Commands ---
458
 
459
  def fuse_cmd(self, sources: list[str], target: str, *opts) -> FuseCmd:
 
32
  ResetCmd,
33
  RewardContractBlock,
34
  SaveCmd,
35
+ ScheduleCmd,
36
  SetupBlock,
37
  SnapshotCmd,
38
  SynthCmd,
 
79
  | reward_contract_block
80
  | setup_block
81
  | on_error_block
82
+ | schedule_cmd
83
 
84
  // ======================== PHASE 1 COMMANDS ========================
85
 
 
153
  | fork_cmd | reset_cmd | prune_cmd | edit_cmd
154
  | fuse_cmd | absorb_cmd | snapshot_cmd | report_cmd
155
  | notify_cmd | save_cmd
156
+ | repeat_block_cmd | if_block_cmd | schedule_cmd) _NL*
157
 
158
  // ======================== PHASE 6 — EASY MERGE COMMANDS ========================
159
 
 
226
  onerr_fallback: "fallback" "=" IDENT
227
  onerr_notify: "notify" "=" IDENT
228
 
229
+ // ======================== PHASE 9 — SCHEDULE ========================
230
+
231
+ // schedule "every 6h" { commands... }
232
+ // schedule "at 02:00" { commands... }
233
+ // schedule "after 30m" { commands... }
234
+ schedule_cmd: "schedule" string "{" _NL* body_cmd+ _NL* "}"
235
+
236
  // ======================== SHARED RULES ========================
237
 
238
  // List of names: [name1, name2, name3]
 
463
  def else_clause(self, *body_cmds) -> list:
464
  return list(body_cmds)
465
 
466
+ # --- Phase 9: Schedule ---
467
+
468
+ def schedule_cmd(self, timing: str, *body_cmds) -> ScheduleCmd:
469
+ return ScheduleCmd(timing=timing, body=list(body_cmds))
470
+
471
  # --- Phase 6: Easy Merge Commands ---
472
 
473
  def fuse_cmd(self, sources: list[str], target: str, *opts) -> FuseCmd: