""" TD Lang AST Nodes — Dataclass containers for each parsed command. Each .td command becomes one of these nodes after parsing. Phase 1 nodes are compiled into runnable Python; Phase 2 nodes are stubs so the compiler can reject them with a clear error until they are implemented. """ from dataclasses import dataclass, field from typing import Any, List, Optional # ============================================================================ # PHASE 1 COMMANDS # ============================================================================ @dataclass class LoadCmd: """Load a model and give it a name. Example: load "Qwen/Qwen3-VL-8B-Instruct" as base """ model_ref: str # HuggingFace path or local path alias: str # Name to use in the rest of the script @dataclass class MergeCmd: """Merge a source model into a target using a method. Example: merge "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" into base using transport strength 0.5 """ source: str # Model path or alias to merge from target: str # Alias to merge into (must be loaded first) method: str # "transport", "slerp", "ties", "dare" strength: float = 0.5 # 0.0 = keep target, 1.0 = keep source @dataclass class HealCmd: """Run QLoRA healing fine-tune on a model. Example: heal base lora_r 32 epochs 2 """ target: str # Alias of model to heal lora_r: int = 32 # LoRA rank (higher = more capacity) epochs: int = 2 # Training epochs @dataclass class EvalCmd: """Run validation/evaluation on a model. Example: eval base on "pile_sample" -> report.json """ target: str # Alias of model to evaluate dataset: Optional[str] = None # Optional dataset name/path output: Optional[str] = None # Optional output file path @dataclass class CommitCmd: """Save model checkpoint, optionally requiring gates to pass. Example: commit base if [canary, perplexity, thinking_mode] """ target: str # Alias of model to commit gates: Optional[list[str]] = None # Gate names that must pass # ============================================================================ # PHASE 2 COMMANDS (placeholders — structure ready, not wired up yet) # ============================================================================ @dataclass class SynthCmd: """Generate synthetic training data from a model. (Phase 2)""" target: str source: str filter_method: Optional[str] = None output: Optional[str] = None @dataclass class TrainCmd: """Train a model on a dataset. (Phase 2)""" target: str dataset: str method: str = "grpo" # "grpo", "sft", "dpo" steps: Optional[int] = None learning_rate: Optional[float] = None @dataclass class DebateCmd: """Generate multi-answer debate for preference pairs. (Phase 2)""" target: str rounds: int = 3 candidates: int = 8 output: Optional[str] = None @dataclass class DiagnoseCmd: """Ask model what it's bad at — self-diagnosis. (Phase 2)""" target: str output: Optional[str] = None @dataclass class ForkCmd: """Branch current model weights for parallel experiments. (Phase 3) Example: fork base as experiment_v2 Cheap fork: copies manifest + adapters, shares base weights (default). """ source: str # Alias of model to fork from alias: str # Name for the new branch @dataclass class ResetCmd: """Revert model to a previous checkpoint. (Phase 3) Example: reset base to "checkpoint_042" Deletes current model, clears CUDA cache, reloads from disk. Must also reset optimizer state. """ target: str # Alias of model to reset checkpoint: str # Checkpoint name/path to revert to @dataclass class PruneCmd: """Structural pruning — remove low-utility neurons/heads. (Phase 3) Example: prune base using wanda aggressiveness 0.2 Safe zone: ~20% max (LLM-Pruner paper). Language backbone only. """ target: str method: str = "wanda" # "wanda", "magnitude", "taylor" aggressiveness: float = 0.2 # Fraction to remove (0.0-1.0) @dataclass class EditCmd: """Surgical LoRA/DoRA editing on specific layers. (Phase 3) Example: edit base layers 16-28 using lora lr 1e-4 "Try before buy": eval with adapter enabled vs disabled before merging. """ target: str layers: str = "all" # "all", "16-28", single number method: str = "lora" # "lora" or "dora" learning_rate: Optional[float] = None # ============================================================================ # PHASE 4 COMMANDS — Contracts, Lineage, Economics (ForgeSpec 2.0, test_17) # ============================================================================ # ============================================================================ # PHASE 7 — LOOP CONTROL (repeat, if/else) # ============================================================================ @dataclass class RepeatBlock: """Repeat a block of commands N times. (Phase 7 — Loop Control) Example: repeat 5 { diagnose base synth base from base train base on "data.jsonl" using grpo steps 64 eval base } """ count: int # Number of iterations body: List[Any] = field(default_factory=list) # Commands inside the block @dataclass class IfBlock: """Conditional execution based on last eval result. (Phase 7 — Loop Control) Example: if eval_passed { commit base } else { reset base to "last_good" } Condition checks the most recent eval result for the target. """ condition: str # "eval_passed", "gate_passed", etc. target: Optional[str] = None # Which model's eval to check then_body: List[Any] = field(default_factory=list) else_body: List[Any] = field(default_factory=list) @dataclass class FuseCmd: """Fuse multiple models into a target in one shot. (Phase 6 — Easy Merge) Example: fuse [deepseek-r1, mimo-7b, llama-3.1] into base Auto-picks Transport and Merge, auto-sets per-model strength. Handles cross-architecture merging (all 5 source models have different archs). """ sources: list[str] # List of model names/paths to fuse in target: str # Alias to merge into (must be loaded) method: str = "transport" # Default: transport and merge (cross-arch) strategy: str = "equal" # "equal" (same strength each), "weighted", "sequential" @dataclass class AbsorbCmd: """Absorb a single model into target — simplified merge. (Phase 6 — Easy Merge) Example: absorb "deepseek-ai/DeepSeek-R1" into base strength 0.5 One-liner for the common case of merging one model in. """ source: str # Model path or HF ID target: str # Alias to merge into strength: float = 0.5 # 0.0=keep target, 1.0=keep source, default balanced @dataclass class SnapshotCmd: """Save a content-hashed snapshot of model state for lineage tracking. (Phase 4) Example: snapshot base -> snapshots/ Creates a content-addressed directory: snapshots// Contains: model state, adapter state, prune spec, eval report, manifest. """ target: str output: Optional[str] = None # Output directory (default: td_lang_outputs/snapshots/) @dataclass class ReportCmd: """Generate an economics report for this run. (Phase 4) Example: report -> economics.json Tracks: GPU hours, cost estimate, tokens processed, experiments run, time per command, cost breakdown by phase. """ output: Optional[str] = None # Output file path # ============================================================================ # PHASE 8 — AUTOPILOT (setup, notify, save, on_error, resume) # ============================================================================ @dataclass class NotifyCmd: """Send a notification via ntfy.sh. (Phase 8 — Autopilot) Example: notify "Training complete!" Uses curl to POST to the configured ntfy topic. """ message: str @dataclass class SaveCmd: """Save/upload model to cloud storage via rclone. (Phase 8 — Autopilot) Example: save base to "gdrive:TD/models/v1" Uses rclone to copy model checkpoint to Google Drive (or any rclone remote). """ target: str # Alias of model to save destination: str # rclone destination path @dataclass class SetupBlock: """Auto-install dependencies and configure environment. (Phase 8 — Autopilot) Example: setup { pip = [torch, transformers, peft, bitsandbytes, trl] hf_token = env notify = "ntfy.sh/my_ai" } """ pip_packages: list[str] = field(default_factory=list) hf_token: Optional[str] = None # "env" = read HF_TOKEN from env notify_url: Optional[str] = None # ntfy.sh topic URL @dataclass class OnErrorBlock: """Crash recovery behavior. (Phase 8 — Autopilot) Example: on_error { retry = 3 fallback = reduce_batch notify = true } """ retry: int = 3 # Number of retries per failed step fallback: str = "reduce_batch" # "reduce_batch", "skip", "snapshot_and_stop" notify: bool = True # Send ntfy notification on error # ============================================================================ # PHASE 9 — SCHEDULE (time-based execution) # ============================================================================ @dataclass class ScheduleCmd: """Schedule a block of commands to run at a specific time or interval. (Phase 9) Examples: schedule "every 6h" { diagnose base; train base ... } schedule "at 02:00" { train base on "data.jsonl" using grpo } schedule "after 30m" { eval base -> results.json } Patterns: "every Nh/Nm" — repeat every N hours/minutes "at HH:MM" — run once at that time "after Nh/Nm" — delay then run once """ timing: str # "every 6h", "at 02:00", "after 30m" body: List[Any] = field(default_factory=list) # Commands inside the block # ============================================================================ # PHASE 10 - TOOLBOX (download, log, compare, verify) # ============================================================================ @dataclass class DownloadCmd: """Download a dataset from HuggingFace. (Phase 10) Example: download "gsm8k" as math_data Pulls a dataset from HuggingFace and stores it for training/eval. """ dataset: str # HuggingFace dataset path alias: str # Name to reference it later split: str = "train" # Which split to download @dataclass class LogBlock: """Save all pipeline output to a log file. (Phase 10) Example: log "training_log.txt" Everything printed to console also goes to this file. """ filepath: str # Path to save log @dataclass class CompareCmd: """Compare source model vs merged model - knowledge retention test. (Phase 10) Example: compare base vs "deepseek-ai/DeepSeek-R1" questions 50 Tests both models on the same questions and shows what % the merged model retained from the source. Proves the merge actually worked. """ target: str # The merged model alias source: str # Source model to compare against (HF path) questions: int = 50 # Number of test questions output: Optional[str] = None # Optional output file @dataclass class VerifyCmd: """Verify model answers are actually correct. (Phase 10) Example: verify base on "gsm8k" questions 100 -> verify_results.json Runs the model on questions with KNOWN correct answers and checks if the model got them right. Returns accuracy percentage. """ target: str # Model alias to test dataset: str # Dataset with known answers questions: int = 100 # Number of questions to test output: Optional[str] = None # Optional output file # ============================================================================ # PHASE 11 - INTELLIGENCE (vote, prompt, distill, rollback) # ============================================================================ @dataclass class VoteCmd: """Majority voting - generate N answers, pick the one most agree on. (Phase 11) Example: vote base "What is 15 * 23?" samples 5 Generates N answers to the same question, then picks the most common one. Proven to boost accuracy 10-20% with zero training. """ target: str # Model alias question: str # Question to vote on samples: int = 5 # Number of answers to generate output: Optional[str] = None # Optional output file @dataclass class PromptBlock: """Attach a system prompt or chain-of-thought template to a model. (Phase 11) Example: prompt base "Think step by step before answering." Makes the model use this system prompt for all future generations. """ target: str # Model alias to attach prompt to text: str # The system prompt text @dataclass class DistillCmd: """Distill a big model's knowledge into a smaller one. (Phase 11) Example: distill base into "Qwen/Qwen3-1.7B" steps 200 -> student_model/ Takes the big model's best answers and trains the small model on them. You get a fast model for easy questions, full model for hard ones. """ teacher: str # The big model alias (source of knowledge) student: str # The small model HF path steps: int = 200 # Training steps output: Optional[str] = None # Where to save the student model @dataclass class RollbackCmd: """Undo the last training step. (Phase 11) Example: rollback base Reverts to the most recent snapshot. If training made things worse, one command brings it back. """ target: str # Model alias to rollback # ============================================================================ # PHASE 12 - RL & FINE-TUNING (curriculum, star, best_of, exploit) # ============================================================================ @dataclass class CurriculumCmd: """Progressive difficulty training - start easy, get harder. (Phase 12) Example: curriculum base on "gsm8k" using grpo levels 3 steps 64 Splits dataset by difficulty, trains on easy first, then medium, then hard. Each level only starts when the model passes the previous one. """ target: str # Model alias dataset: str # Dataset to train on method: str = "grpo" # Training method levels: int = 3 # Number of difficulty levels steps: int = 64 # Steps per level @dataclass class StarCmd: """Self-Taught Reasoner - train on own correct reasoning chains. (Phase 12) Example: star base on "gsm8k" rounds 3 samples 8 Generate N solutions per problem. Keep the ones with correct answers. Train on the correct reasoning chains. Repeat. The model literally learns from its own successes. """ target: str # Model alias dataset: str # Dataset with known answers rounds: int = 3 # Number of STaR iterations samples: int = 8 # Solutions to generate per problem @dataclass class BestOfCmd: """Generate N answers, score all, train on the best. (Phase 12) Example: best_of base on "gsm8k" n 8 steps 32 For each training problem: generate N answers, score them all, keep only the best one, train on that. Like vote but for training. 80-90% of RLHF gains at 5-30% of the cost (test_16). """ target: str # Model alias dataset: str # Dataset to train on n: int = 8 # How many answers to generate per problem steps: int = 32 # Training steps on the filtered data @dataclass class ExploitCmd: """Controlled reward hacking - keep ALL correct solutions regardless of method. (Phase 12) Example: exploit base on "gsm8k" samples 16 -> exploit_data.jsonl Generate many diverse solutions (high temp). Only filter: is the answer correct? Keep ugly solutions, shortcuts, weird reasoning - as long as the answer is right. Train on the diverse set so the model learns multiple paths to correct answers. The "hacks" often turn out to be genuinely clever shortcuts. """ target: str # Model alias dataset: str # Dataset with verifiable answers samples: int = 16 # Solutions per problem (higher = more diversity) steps: int = 32 # Training steps on the exploited data output: Optional[str] = None # Save the exploit data for inspection @dataclass class ArenaCmd: """Real RL with environment, memory, curiosity, and anti-lying. (Phase 13) The model enters an arena of challenges. For each challenge: 1. It tries to solve it (exploration) 2. Gets immediate reward/punishment (+1 correct, -1 wrong, -2 lying) 3. Remembers what worked and didn't (memory bank persists across episodes) 4. Gets curiosity bonus for trying NEW approaches 5. Creative solutions get cross-checked against standard approaches Example: arena base on "gsm8k" rounds 5 episodes 50 steps 64 curiosity 0.3 """ target: str # Model alias dataset: str # Dataset with verifiable answers rounds: int = 5 # RL rounds (re-train after each) episodes: int = 50 # Challenges per round steps: int = 64 # Training steps per round curiosity: float = 0.3 # Curiosity bonus weight output: Optional[str] = None # Save arena log @dataclass class ResearchArenaCmd: """Research arena — RL on ANY topic using real-world knowledge. (Phase 13) Unlike arena (which uses a pre-made dataset), research_arena: 1. Takes a TOPIC string ("cancer biology", "number theory", anything) 2. Pulls real papers/sources about that topic (web, arxiv, pubmed, local files) 3. Extracts verifiable facts/claims from those sources 4. Builds increasingly hard questions from the real knowledge 5. Runs the model through the gauntlet, checking EVERY claim against sources 6. Difficulty ESCALATES on failure (fewer hints, stricter checking, harder questions) 7. Memory persists so it doesn't forget what it learned 8. Lying gets punished DOUBLE, curiosity rewarded Example: research_arena base topic "cancer biology" sources "pubmed" rounds 5 """ target: str # Model alias topic: str # Research topic (any field) sources: str = "web" # Where to pull knowledge: "web", "pubmed", "arxiv", or filepath rounds: int = 5 # RL rounds (difficulty increases each round) episodes: int = 30 # Questions per round steps: int = 64 # Training steps per round curiosity: float = 0.3 # Curiosity bonus weight difficulty_scale: float = 0.25 # How much harder each round gets (0.25 = 25% harder) output: Optional[str] = None # Save research log # ============================================================================ # BLOCKS (gates, budget, contracts, etc.) # ============================================================================ @dataclass class GateBlock: """Validation gates that must pass before commit. Example: gate { must_pass = [canary, perplexity, thinking_mode] } """ must_pass: list[str] = field(default_factory=list) @dataclass class BudgetBlock: """Resource budget — compiler refuses plans that exceed limits. Example: budget { max_gpu_hours = 8 max_cost = 50.00 } """ max_gpu_hours: Optional[float] = None max_cost: Optional[float] = None max_tokens: Optional[int] = None max_experiments: Optional[int] = None @dataclass class DataContractBlock: """Schema enforcement on training data. (Phase 4, ForgeSpec 2.0) Example: data_contract { required_fields = [prompt, response] min_samples = 100 max_perplexity = 50.0 } Compiler checks training data at synth/train time. """ required_fields: list[str] = field(default_factory=list) min_samples: Optional[int] = None max_perplexity: Optional[float] = None @dataclass class RewardContractBlock: """Verified reward definitions — what counts as "correct". (Phase 4, ForgeSpec 2.0) Example: reward_contract { verifiers = [code_compiles, math_correct, no_hallucination] min_reward = 0.3 } Used by train (GRPO) to enforce reward quality. No learned reward model — verified rewards only (test_16). """ verifiers: list[str] = field(default_factory=list) min_reward: Optional[float] = None # ============================================================================ # TOP-LEVEL PROGRAM # ============================================================================ @dataclass class TDProgram: """A complete parsed .td file — commands in order plus global blocks.""" commands: List[Any] = field(default_factory=list) gates: Optional[GateBlock] = None budget: Optional[BudgetBlock] = None data_contract: Optional[DataContractBlock] = None reward_contract: Optional[RewardContractBlock] = None setup: Optional[SetupBlock] = None on_error: Optional[OnErrorBlock] = None log: Optional[LogBlock] = None source_file: Optional[str] = None __all__ = [ "LoadCmd", "MergeCmd", "HealCmd", "EvalCmd", "CommitCmd", "SynthCmd", "TrainCmd", "DebateCmd", "DiagnoseCmd", "ForkCmd", "ResetCmd", "PruneCmd", "EditCmd", "RepeatBlock", "IfBlock", "FuseCmd", "AbsorbCmd", "SnapshotCmd", "ReportCmd", "NotifyCmd", "SaveCmd", "SetupBlock", "OnErrorBlock", "GateBlock", "BudgetBlock", "DataContractBlock", "RewardContractBlock", "ScheduleCmd", "DownloadCmd", "LogBlock", "CompareCmd", "VerifyCmd", "VoteCmd", "PromptBlock", "DistillCmd", "RollbackCmd", "CurriculumCmd", "StarCmd", "BestOfCmd", "ExploitCmd", "ArenaCmd", "ResearchArenaCmd", "TDProgram", ]