| """ |
| TD Lang AST Nodes — Dataclass containers for each parsed command. |
| |
| Each .td command becomes one of these nodes after parsing. |
| Phase 1 nodes are compiled into runnable Python; Phase 2 nodes are stubs so |
| the compiler can reject them with a clear error until they are implemented. |
| """ |
|
|
| from dataclasses import dataclass, field |
| from typing import Any, List, Optional |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class LoadCmd: |
| """Load a model and give it a name. |
| |
| Example: load "Qwen/Qwen3-VL-8B-Instruct" as base |
| """ |
| model_ref: str |
| alias: str |
|
|
|
|
| @dataclass |
| class MergeCmd: |
| """Merge a source model into a target using a method. |
| |
| Example: merge "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" into base using transport strength 0.5 |
| """ |
| source: str |
| target: str |
| method: str |
| strength: float = 0.5 |
|
|
|
|
| @dataclass |
| class HealCmd: |
| """Run QLoRA healing fine-tune on a model. |
| |
| Example: heal base lora_r 32 epochs 2 |
| """ |
| target: str |
| lora_r: int = 32 |
| epochs: int = 2 |
|
|
|
|
| @dataclass |
| class EvalCmd: |
| """Run validation/evaluation on a model. |
| |
| Example: eval base on "pile_sample" -> report.json |
| """ |
| target: str |
| dataset: Optional[str] = None |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class CommitCmd: |
| """Save model checkpoint, optionally requiring gates to pass. |
| |
| Example: commit base if [canary, perplexity, thinking_mode] |
| """ |
| target: str |
| gates: Optional[list[str]] = None |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class SynthCmd: |
| """Generate synthetic training data from a model. (Phase 2)""" |
| target: str |
| source: str |
| filter_method: Optional[str] = None |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class TrainCmd: |
| """Train a model on a dataset. (Phase 2)""" |
| target: str |
| dataset: str |
| method: str = "grpo" |
| steps: Optional[int] = None |
| learning_rate: Optional[float] = None |
|
|
|
|
| @dataclass |
| class DebateCmd: |
| """Generate multi-answer debate for preference pairs. (Phase 2)""" |
| target: str |
| rounds: int = 3 |
| candidates: int = 8 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class DiagnoseCmd: |
| """Ask model what it's bad at — self-diagnosis. (Phase 2)""" |
| target: str |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class ForkCmd: |
| """Branch current model weights for parallel experiments. (Phase 3) |
| |
| Example: fork base as experiment_v2 |
| Cheap fork: copies manifest + adapters, shares base weights (default). |
| """ |
| source: str |
| alias: str |
|
|
|
|
| @dataclass |
| class ResetCmd: |
| """Revert model to a previous checkpoint. (Phase 3) |
| |
| Example: reset base to "checkpoint_042" |
| Deletes current model, clears CUDA cache, reloads from disk. |
| Must also reset optimizer state. |
| """ |
| target: str |
| checkpoint: str |
|
|
|
|
| @dataclass |
| class PruneCmd: |
| """Structural pruning — remove low-utility neurons/heads. (Phase 3) |
| |
| Example: prune base using wanda aggressiveness 0.2 |
| Safe zone: ~20% max (LLM-Pruner paper). Language backbone only. |
| """ |
| target: str |
| method: str = "wanda" |
| aggressiveness: float = 0.2 |
|
|
|
|
| @dataclass |
| class EditCmd: |
| """Surgical LoRA/DoRA editing on specific layers. (Phase 3) |
| |
| Example: edit base layers 16-28 using lora lr 1e-4 |
| "Try before buy": eval with adapter enabled vs disabled before merging. |
| """ |
| target: str |
| layers: str = "all" |
| method: str = "lora" |
| learning_rate: Optional[float] = None |
|
|
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| @dataclass |
| class RepeatBlock: |
| """Repeat a block of commands N times. (Phase 7 — Loop Control) |
| |
| Example: |
| repeat 5 { |
| diagnose base |
| synth base from base |
| train base on "data.jsonl" using grpo steps 64 |
| eval base |
| } |
| """ |
| count: int |
| body: List[Any] = field(default_factory=list) |
|
|
|
|
| @dataclass |
| class IfBlock: |
| """Conditional execution based on last eval result. (Phase 7 — Loop Control) |
| |
| Example: |
| if eval_passed { |
| commit base |
| } else { |
| reset base to "last_good" |
| } |
| |
| Condition checks the most recent eval result for the target. |
| """ |
| condition: str |
| target: Optional[str] = None |
| then_body: List[Any] = field(default_factory=list) |
| else_body: List[Any] = field(default_factory=list) |
|
|
|
|
| @dataclass |
| class FuseCmd: |
| """Fuse multiple models into a target in one shot. (Phase 6 — Easy Merge) |
| |
| Example: fuse [deepseek-r1, mimo-7b, llama-3.1] into base |
| Auto-picks Transport and Merge, auto-sets per-model strength. |
| Handles cross-architecture merging (all 5 source models have different archs). |
| """ |
| sources: list[str] |
| target: str |
| method: str = "transport" |
| strategy: str = "equal" |
|
|
|
|
| @dataclass |
| class AbsorbCmd: |
| """Absorb a single model into target — simplified merge. (Phase 6 — Easy Merge) |
| |
| Example: absorb "deepseek-ai/DeepSeek-R1" into base strength 0.5 |
| One-liner for the common case of merging one model in. |
| """ |
| source: str |
| target: str |
| strength: float = 0.5 |
|
|
|
|
| @dataclass |
| class SnapshotCmd: |
| """Save a content-hashed snapshot of model state for lineage tracking. (Phase 4) |
| |
| Example: snapshot base -> snapshots/ |
| Creates a content-addressed directory: snapshots/<sha256_prefix>/ |
| Contains: model state, adapter state, prune spec, eval report, manifest. |
| """ |
| target: str |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class ReportCmd: |
| """Generate an economics report for this run. (Phase 4) |
| |
| Example: report -> economics.json |
| Tracks: GPU hours, cost estimate, tokens processed, experiments run, |
| time per command, cost breakdown by phase. |
| """ |
| output: Optional[str] = None |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class NotifyCmd: |
| """Send a notification via ntfy.sh. (Phase 8 — Autopilot) |
| |
| Example: notify "Training complete!" |
| Uses curl to POST to the configured ntfy topic. |
| """ |
| message: str |
|
|
|
|
| @dataclass |
| class SaveCmd: |
| """Save/upload model to cloud storage via rclone. (Phase 8 — Autopilot) |
| |
| Example: save base to "gdrive:TD/models/v1" |
| Uses rclone to copy model checkpoint to Google Drive (or any rclone remote). |
| """ |
| target: str |
| destination: str |
|
|
|
|
| @dataclass |
| class SetupBlock: |
| """Auto-install dependencies and configure environment. (Phase 8 — Autopilot) |
| |
| Example: |
| setup { |
| pip = [torch, transformers, peft, bitsandbytes, trl] |
| hf_token = env |
| notify = "ntfy.sh/my_ai" |
| } |
| """ |
| pip_packages: list[str] = field(default_factory=list) |
| hf_token: Optional[str] = None |
| notify_url: Optional[str] = None |
|
|
|
|
| @dataclass |
| class OnErrorBlock: |
| """Crash recovery behavior. (Phase 8 — Autopilot) |
| |
| Example: |
| on_error { |
| retry = 3 |
| fallback = reduce_batch |
| notify = true |
| } |
| """ |
| retry: int = 3 |
| fallback: str = "reduce_batch" |
| notify: bool = True |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class ScheduleCmd: |
| """Schedule a block of commands to run at a specific time or interval. (Phase 9) |
| |
| Examples: |
| schedule "every 6h" { diagnose base; train base ... } |
| schedule "at 02:00" { train base on "data.jsonl" using grpo } |
| schedule "after 30m" { eval base -> results.json } |
| |
| Patterns: |
| "every Nh/Nm" — repeat every N hours/minutes |
| "at HH:MM" — run once at that time |
| "after Nh/Nm" — delay then run once |
| """ |
| timing: str |
| body: List[Any] = field(default_factory=list) |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class DownloadCmd: |
| """Download a dataset from HuggingFace. (Phase 10) |
| |
| Example: download "gsm8k" as math_data |
| Pulls a dataset from HuggingFace and stores it for training/eval. |
| """ |
| dataset: str |
| alias: str |
| split: str = "train" |
|
|
|
|
| @dataclass |
| class LogBlock: |
| """Save all pipeline output to a log file. (Phase 10) |
| |
| Example: log "training_log.txt" |
| Everything printed to console also goes to this file. |
| """ |
| filepath: str |
|
|
|
|
| @dataclass |
| class CompareCmd: |
| """Compare source model vs merged model - knowledge retention test. (Phase 10) |
| |
| Example: compare base vs "deepseek-ai/DeepSeek-R1" questions 50 |
| Tests both models on the same questions and shows what % the merged |
| model retained from the source. Proves the merge actually worked. |
| """ |
| target: str |
| source: str |
| questions: int = 50 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class VerifyCmd: |
| """Verify model answers are actually correct. (Phase 10) |
| |
| Example: verify base on "gsm8k" questions 100 -> verify_results.json |
| Runs the model on questions with KNOWN correct answers and checks |
| if the model got them right. Returns accuracy percentage. |
| """ |
| target: str |
| dataset: str |
| questions: int = 100 |
| output: Optional[str] = None |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class VoteCmd: |
| """Majority voting - generate N answers, pick the one most agree on. (Phase 11) |
| |
| Example: vote base "What is 15 * 23?" samples 5 |
| Generates N answers to the same question, then picks the most common one. |
| Proven to boost accuracy 10-20% with zero training. |
| """ |
| target: str |
| question: str |
| samples: int = 5 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class PromptBlock: |
| """Attach a system prompt or chain-of-thought template to a model. (Phase 11) |
| |
| Example: |
| prompt base "Think step by step before answering." |
| Makes the model use this system prompt for all future generations. |
| """ |
| target: str |
| text: str |
|
|
|
|
| @dataclass |
| class DistillCmd: |
| """Distill a big model's knowledge into a smaller one. (Phase 11) |
| |
| Example: distill base into "Qwen/Qwen3-1.7B" steps 200 -> student_model/ |
| Takes the big model's best answers and trains the small model on them. |
| You get a fast model for easy questions, full model for hard ones. |
| """ |
| teacher: str |
| student: str |
| steps: int = 200 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class RollbackCmd: |
| """Undo the last training step. (Phase 11) |
| |
| Example: rollback base |
| Reverts to the most recent snapshot. If training made things worse, |
| one command brings it back. |
| """ |
| target: str |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class CurriculumCmd: |
| """Progressive difficulty training - start easy, get harder. (Phase 12) |
| |
| Example: curriculum base on "gsm8k" using grpo levels 3 steps 64 |
| Splits dataset by difficulty, trains on easy first, then medium, then hard. |
| Each level only starts when the model passes the previous one. |
| """ |
| target: str |
| dataset: str |
| method: str = "grpo" |
| levels: int = 3 |
| steps: int = 64 |
|
|
|
|
| @dataclass |
| class StarCmd: |
| """Self-Taught Reasoner - train on own correct reasoning chains. (Phase 12) |
| |
| Example: star base on "gsm8k" rounds 3 samples 8 |
| Generate N solutions per problem. Keep the ones with correct answers. |
| Train on the correct reasoning chains. Repeat. |
| The model literally learns from its own successes. |
| """ |
| target: str |
| dataset: str |
| rounds: int = 3 |
| samples: int = 8 |
|
|
|
|
| @dataclass |
| class BestOfCmd: |
| """Generate N answers, score all, train on the best. (Phase 12) |
| |
| Example: best_of base on "gsm8k" n 8 steps 32 |
| For each training problem: generate N answers, score them all, |
| keep only the best one, train on that. Like vote but for training. |
| 80-90% of RLHF gains at 5-30% of the cost (test_16). |
| """ |
| target: str |
| dataset: str |
| n: int = 8 |
| steps: int = 32 |
|
|
|
|
| @dataclass |
| class ExploitCmd: |
| """Controlled reward hacking - keep ALL correct solutions regardless of method. (Phase 12) |
| |
| Example: exploit base on "gsm8k" samples 16 -> exploit_data.jsonl |
| Generate many diverse solutions (high temp). Only filter: is the answer correct? |
| Keep ugly solutions, shortcuts, weird reasoning - as long as the answer is right. |
| Train on the diverse set so the model learns multiple paths to correct answers. |
| The "hacks" often turn out to be genuinely clever shortcuts. |
| """ |
| target: str |
| dataset: str |
| samples: int = 16 |
| steps: int = 32 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class ArenaCmd: |
| """Real RL with environment, memory, curiosity, and anti-lying. (Phase 13) |
| |
| The model enters an arena of challenges. For each challenge: |
| 1. It tries to solve it (exploration) |
| 2. Gets immediate reward/punishment (+1 correct, -1 wrong, -2 lying) |
| 3. Remembers what worked and didn't (memory bank persists across episodes) |
| 4. Gets curiosity bonus for trying NEW approaches |
| 5. Creative solutions get cross-checked against standard approaches |
| |
| Example: arena base on "gsm8k" rounds 5 episodes 50 steps 64 curiosity 0.3 |
| """ |
| target: str |
| dataset: str |
| rounds: int = 5 |
| episodes: int = 50 |
| steps: int = 64 |
| curiosity: float = 0.3 |
| output: Optional[str] = None |
|
|
|
|
| @dataclass |
| class ResearchArenaCmd: |
| """Research arena — RL on ANY topic using real-world knowledge. (Phase 13) |
| |
| Unlike arena (which uses a pre-made dataset), research_arena: |
| 1. Takes a TOPIC string ("cancer biology", "number theory", anything) |
| 2. Pulls real papers/sources about that topic (web, arxiv, pubmed, local files) |
| 3. Extracts verifiable facts/claims from those sources |
| 4. Builds increasingly hard questions from the real knowledge |
| 5. Runs the model through the gauntlet, checking EVERY claim against sources |
| 6. Difficulty ESCALATES on failure (fewer hints, stricter checking, harder questions) |
| 7. Memory persists so it doesn't forget what it learned |
| 8. Lying gets punished DOUBLE, curiosity rewarded |
| |
| Example: research_arena base topic "cancer biology" sources "pubmed" rounds 5 |
| """ |
| target: str |
| topic: str |
| sources: str = "web" |
| rounds: int = 5 |
| episodes: int = 30 |
| steps: int = 64 |
| curiosity: float = 0.3 |
| difficulty_scale: float = 0.25 |
| output: Optional[str] = None |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class GateBlock: |
| """Validation gates that must pass before commit. |
| |
| Example: |
| gate { |
| must_pass = [canary, perplexity, thinking_mode] |
| } |
| """ |
| must_pass: list[str] = field(default_factory=list) |
|
|
|
|
| @dataclass |
| class BudgetBlock: |
| """Resource budget — compiler refuses plans that exceed limits. |
| |
| Example: |
| budget { |
| max_gpu_hours = 8 |
| max_cost = 50.00 |
| } |
| """ |
| max_gpu_hours: Optional[float] = None |
| max_cost: Optional[float] = None |
| max_tokens: Optional[int] = None |
| max_experiments: Optional[int] = None |
|
|
|
|
| @dataclass |
| class DataContractBlock: |
| """Schema enforcement on training data. (Phase 4, ForgeSpec 2.0) |
| |
| Example: |
| data_contract { |
| required_fields = [prompt, response] |
| min_samples = 100 |
| max_perplexity = 50.0 |
| } |
| |
| Compiler checks training data at synth/train time. |
| """ |
| required_fields: list[str] = field(default_factory=list) |
| min_samples: Optional[int] = None |
| max_perplexity: Optional[float] = None |
|
|
|
|
| @dataclass |
| class RewardContractBlock: |
| """Verified reward definitions — what counts as "correct". (Phase 4, ForgeSpec 2.0) |
| |
| Example: |
| reward_contract { |
| verifiers = [code_compiles, math_correct, no_hallucination] |
| min_reward = 0.3 |
| } |
| |
| Used by train (GRPO) to enforce reward quality. |
| No learned reward model — verified rewards only (test_16). |
| """ |
| verifiers: list[str] = field(default_factory=list) |
| min_reward: Optional[float] = None |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class TDProgram: |
| """A complete parsed .td file — commands in order plus global blocks.""" |
|
|
| commands: List[Any] = field(default_factory=list) |
| gates: Optional[GateBlock] = None |
| budget: Optional[BudgetBlock] = None |
| data_contract: Optional[DataContractBlock] = None |
| reward_contract: Optional[RewardContractBlock] = None |
| setup: Optional[SetupBlock] = None |
| on_error: Optional[OnErrorBlock] = None |
| log: Optional[LogBlock] = None |
| source_file: Optional[str] = None |
|
|
|
|
| __all__ = [ |
| "LoadCmd", |
| "MergeCmd", |
| "HealCmd", |
| "EvalCmd", |
| "CommitCmd", |
| "SynthCmd", |
| "TrainCmd", |
| "DebateCmd", |
| "DiagnoseCmd", |
| "ForkCmd", |
| "ResetCmd", |
| "PruneCmd", |
| "EditCmd", |
| "RepeatBlock", |
| "IfBlock", |
| "FuseCmd", |
| "AbsorbCmd", |
| "SnapshotCmd", |
| "ReportCmd", |
| "NotifyCmd", |
| "SaveCmd", |
| "SetupBlock", |
| "OnErrorBlock", |
| "GateBlock", |
| "BudgetBlock", |
| "DataContractBlock", |
| "RewardContractBlock", |
| "ScheduleCmd", |
| "DownloadCmd", |
| "LogBlock", |
| "CompareCmd", |
| "VerifyCmd", |
| "VoteCmd", |
| "PromptBlock", |
| "DistillCmd", |
| "RollbackCmd", |
| "CurriculumCmd", |
| "StarCmd", |
| "BestOfCmd", |
| "ExploitCmd", |
| "ArenaCmd", |
| "ResearchArenaCmd", |
| "TDProgram", |
| ] |
|
|