td-toolkit / td_lang /ast_nodes.py
td-builder's picture
Fixed code: vocab mismatch fix for cross-arch merging (Llama/Falcon)
5d61448 verified
"""
TD Lang AST Nodes — Dataclass containers for each parsed command.
Each .td command becomes one of these nodes after parsing.
Phase 1 nodes are compiled into runnable Python; Phase 2 nodes are stubs so
the compiler can reject them with a clear error until they are implemented.
"""
from dataclasses import dataclass, field
from typing import Any, List, Optional
# ============================================================================
# PHASE 1 COMMANDS
# ============================================================================
@dataclass
class LoadCmd:
"""Load a model and give it a name.
Example: load "Qwen/Qwen3-VL-8B-Instruct" as base
"""
model_ref: str # HuggingFace path or local path
alias: str # Name to use in the rest of the script
@dataclass
class MergeCmd:
"""Merge a source model into a target using a method.
Example: merge "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" into base using transport strength 0.5
"""
source: str # Model path or alias to merge from
target: str # Alias to merge into (must be loaded first)
method: str # "transport", "slerp", "ties", "dare"
strength: float = 0.5 # 0.0 = keep target, 1.0 = keep source
@dataclass
class HealCmd:
"""Run QLoRA healing fine-tune on a model.
Example: heal base lora_r 32 epochs 2
"""
target: str # Alias of model to heal
lora_r: int = 32 # LoRA rank (higher = more capacity)
epochs: int = 2 # Training epochs
@dataclass
class EvalCmd:
"""Run validation/evaluation on a model.
Example: eval base on "pile_sample" -> report.json
"""
target: str # Alias of model to evaluate
dataset: Optional[str] = None # Optional dataset name/path
output: Optional[str] = None # Optional output file path
@dataclass
class CommitCmd:
"""Save model checkpoint, optionally requiring gates to pass.
Example: commit base if [canary, perplexity, thinking_mode]
"""
target: str # Alias of model to commit
gates: Optional[list[str]] = None # Gate names that must pass
# ============================================================================
# PHASE 2 COMMANDS (placeholders — structure ready, not wired up yet)
# ============================================================================
@dataclass
class SynthCmd:
"""Generate synthetic training data from a model. (Phase 2)"""
target: str
source: str
filter_method: Optional[str] = None
output: Optional[str] = None
@dataclass
class TrainCmd:
"""Train a model on a dataset. (Phase 2)"""
target: str
dataset: str
method: str = "grpo" # "grpo", "sft", "dpo"
steps: Optional[int] = None
learning_rate: Optional[float] = None
@dataclass
class DebateCmd:
"""Generate multi-answer debate for preference pairs. (Phase 2)"""
target: str
rounds: int = 3
candidates: int = 8
output: Optional[str] = None
@dataclass
class DiagnoseCmd:
"""Ask model what it's bad at — self-diagnosis. (Phase 2)"""
target: str
output: Optional[str] = None
@dataclass
class ForkCmd:
"""Branch current model weights for parallel experiments. (Phase 3)
Example: fork base as experiment_v2
Cheap fork: copies manifest + adapters, shares base weights (default).
"""
source: str # Alias of model to fork from
alias: str # Name for the new branch
@dataclass
class ResetCmd:
"""Revert model to a previous checkpoint. (Phase 3)
Example: reset base to "checkpoint_042"
Deletes current model, clears CUDA cache, reloads from disk.
Must also reset optimizer state.
"""
target: str # Alias of model to reset
checkpoint: str # Checkpoint name/path to revert to
@dataclass
class PruneCmd:
"""Structural pruning — remove low-utility neurons/heads. (Phase 3)
Example: prune base using wanda aggressiveness 0.2
Safe zone: ~20% max (LLM-Pruner paper). Language backbone only.
"""
target: str
method: str = "wanda" # "wanda", "magnitude", "taylor"
aggressiveness: float = 0.2 # Fraction to remove (0.0-1.0)
@dataclass
class EditCmd:
"""Surgical LoRA/DoRA editing on specific layers. (Phase 3)
Example: edit base layers 16-28 using lora lr 1e-4
"Try before buy": eval with adapter enabled vs disabled before merging.
"""
target: str
layers: str = "all" # "all", "16-28", single number
method: str = "lora" # "lora" or "dora"
learning_rate: Optional[float] = None
# ============================================================================
# PHASE 4 COMMANDS — Contracts, Lineage, Economics (ForgeSpec 2.0, test_17)
# ============================================================================
# ============================================================================
# PHASE 7 — LOOP CONTROL (repeat, if/else)
# ============================================================================
@dataclass
class RepeatBlock:
"""Repeat a block of commands N times. (Phase 7 — Loop Control)
Example:
repeat 5 {
diagnose base
synth base from base
train base on "data.jsonl" using grpo steps 64
eval base
}
"""
count: int # Number of iterations
body: List[Any] = field(default_factory=list) # Commands inside the block
@dataclass
class IfBlock:
"""Conditional execution based on last eval result. (Phase 7 — Loop Control)
Example:
if eval_passed {
commit base
} else {
reset base to "last_good"
}
Condition checks the most recent eval result for the target.
"""
condition: str # "eval_passed", "gate_passed", etc.
target: Optional[str] = None # Which model's eval to check
then_body: List[Any] = field(default_factory=list)
else_body: List[Any] = field(default_factory=list)
@dataclass
class FuseCmd:
"""Fuse multiple models into a target in one shot. (Phase 6 — Easy Merge)
Example: fuse [deepseek-r1, mimo-7b, llama-3.1] into base
Auto-picks Transport and Merge, auto-sets per-model strength.
Handles cross-architecture merging (all 5 source models have different archs).
"""
sources: list[str] # List of model names/paths to fuse in
target: str # Alias to merge into (must be loaded)
method: str = "transport" # Default: transport and merge (cross-arch)
strategy: str = "equal" # "equal" (same strength each), "weighted", "sequential"
@dataclass
class AbsorbCmd:
"""Absorb a single model into target — simplified merge. (Phase 6 — Easy Merge)
Example: absorb "deepseek-ai/DeepSeek-R1" into base strength 0.5
One-liner for the common case of merging one model in.
"""
source: str # Model path or HF ID
target: str # Alias to merge into
strength: float = 0.5 # 0.0=keep target, 1.0=keep source, default balanced
@dataclass
class SnapshotCmd:
"""Save a content-hashed snapshot of model state for lineage tracking. (Phase 4)
Example: snapshot base -> snapshots/
Creates a content-addressed directory: snapshots/<sha256_prefix>/
Contains: model state, adapter state, prune spec, eval report, manifest.
"""
target: str
output: Optional[str] = None # Output directory (default: td_lang_outputs/snapshots/)
@dataclass
class ReportCmd:
"""Generate an economics report for this run. (Phase 4)
Example: report -> economics.json
Tracks: GPU hours, cost estimate, tokens processed, experiments run,
time per command, cost breakdown by phase.
"""
output: Optional[str] = None # Output file path
# ============================================================================
# PHASE 8 — AUTOPILOT (setup, notify, save, on_error, resume)
# ============================================================================
@dataclass
class NotifyCmd:
"""Send a notification via ntfy.sh. (Phase 8 — Autopilot)
Example: notify "Training complete!"
Uses curl to POST to the configured ntfy topic.
"""
message: str
@dataclass
class SaveCmd:
"""Save/upload model to cloud storage via rclone. (Phase 8 — Autopilot)
Example: save base to "gdrive:TD/models/v1"
Uses rclone to copy model checkpoint to Google Drive (or any rclone remote).
"""
target: str # Alias of model to save
destination: str # rclone destination path
@dataclass
class SetupBlock:
"""Auto-install dependencies and configure environment. (Phase 8 — Autopilot)
Example:
setup {
pip = [torch, transformers, peft, bitsandbytes, trl]
hf_token = env
notify = "ntfy.sh/my_ai"
}
"""
pip_packages: list[str] = field(default_factory=list)
hf_token: Optional[str] = None # "env" = read HF_TOKEN from env
notify_url: Optional[str] = None # ntfy.sh topic URL
@dataclass
class OnErrorBlock:
"""Crash recovery behavior. (Phase 8 — Autopilot)
Example:
on_error {
retry = 3
fallback = reduce_batch
notify = true
}
"""
retry: int = 3 # Number of retries per failed step
fallback: str = "reduce_batch" # "reduce_batch", "skip", "snapshot_and_stop"
notify: bool = True # Send ntfy notification on error
# ============================================================================
# PHASE 9 — SCHEDULE (time-based execution)
# ============================================================================
@dataclass
class ScheduleCmd:
"""Schedule a block of commands to run at a specific time or interval. (Phase 9)
Examples:
schedule "every 6h" { diagnose base; train base ... }
schedule "at 02:00" { train base on "data.jsonl" using grpo }
schedule "after 30m" { eval base -> results.json }
Patterns:
"every Nh/Nm" — repeat every N hours/minutes
"at HH:MM" — run once at that time
"after Nh/Nm" — delay then run once
"""
timing: str # "every 6h", "at 02:00", "after 30m"
body: List[Any] = field(default_factory=list) # Commands inside the block
# ============================================================================
# PHASE 10 - TOOLBOX (download, log, compare, verify)
# ============================================================================
@dataclass
class DownloadCmd:
"""Download a dataset from HuggingFace. (Phase 10)
Example: download "gsm8k" as math_data
Pulls a dataset from HuggingFace and stores it for training/eval.
"""
dataset: str # HuggingFace dataset path
alias: str # Name to reference it later
split: str = "train" # Which split to download
@dataclass
class LogBlock:
"""Save all pipeline output to a log file. (Phase 10)
Example: log "training_log.txt"
Everything printed to console also goes to this file.
"""
filepath: str # Path to save log
@dataclass
class CompareCmd:
"""Compare source model vs merged model - knowledge retention test. (Phase 10)
Example: compare base vs "deepseek-ai/DeepSeek-R1" questions 50
Tests both models on the same questions and shows what % the merged
model retained from the source. Proves the merge actually worked.
"""
target: str # The merged model alias
source: str # Source model to compare against (HF path)
questions: int = 50 # Number of test questions
output: Optional[str] = None # Optional output file
@dataclass
class VerifyCmd:
"""Verify model answers are actually correct. (Phase 10)
Example: verify base on "gsm8k" questions 100 -> verify_results.json
Runs the model on questions with KNOWN correct answers and checks
if the model got them right. Returns accuracy percentage.
"""
target: str # Model alias to test
dataset: str # Dataset with known answers
questions: int = 100 # Number of questions to test
output: Optional[str] = None # Optional output file
# ============================================================================
# PHASE 11 - INTELLIGENCE (vote, prompt, distill, rollback)
# ============================================================================
@dataclass
class VoteCmd:
"""Majority voting - generate N answers, pick the one most agree on. (Phase 11)
Example: vote base "What is 15 * 23?" samples 5
Generates N answers to the same question, then picks the most common one.
Proven to boost accuracy 10-20% with zero training.
"""
target: str # Model alias
question: str # Question to vote on
samples: int = 5 # Number of answers to generate
output: Optional[str] = None # Optional output file
@dataclass
class PromptBlock:
"""Attach a system prompt or chain-of-thought template to a model. (Phase 11)
Example:
prompt base "Think step by step before answering."
Makes the model use this system prompt for all future generations.
"""
target: str # Model alias to attach prompt to
text: str # The system prompt text
@dataclass
class DistillCmd:
"""Distill a big model's knowledge into a smaller one. (Phase 11)
Example: distill base into "Qwen/Qwen3-1.7B" steps 200 -> student_model/
Takes the big model's best answers and trains the small model on them.
You get a fast model for easy questions, full model for hard ones.
"""
teacher: str # The big model alias (source of knowledge)
student: str # The small model HF path
steps: int = 200 # Training steps
output: Optional[str] = None # Where to save the student model
@dataclass
class RollbackCmd:
"""Undo the last training step. (Phase 11)
Example: rollback base
Reverts to the most recent snapshot. If training made things worse,
one command brings it back.
"""
target: str # Model alias to rollback
# ============================================================================
# PHASE 12 - RL & FINE-TUNING (curriculum, star, best_of, exploit)
# ============================================================================
@dataclass
class CurriculumCmd:
"""Progressive difficulty training - start easy, get harder. (Phase 12)
Example: curriculum base on "gsm8k" using grpo levels 3 steps 64
Splits dataset by difficulty, trains on easy first, then medium, then hard.
Each level only starts when the model passes the previous one.
"""
target: str # Model alias
dataset: str # Dataset to train on
method: str = "grpo" # Training method
levels: int = 3 # Number of difficulty levels
steps: int = 64 # Steps per level
@dataclass
class StarCmd:
"""Self-Taught Reasoner - train on own correct reasoning chains. (Phase 12)
Example: star base on "gsm8k" rounds 3 samples 8
Generate N solutions per problem. Keep the ones with correct answers.
Train on the correct reasoning chains. Repeat.
The model literally learns from its own successes.
"""
target: str # Model alias
dataset: str # Dataset with known answers
rounds: int = 3 # Number of STaR iterations
samples: int = 8 # Solutions to generate per problem
@dataclass
class BestOfCmd:
"""Generate N answers, score all, train on the best. (Phase 12)
Example: best_of base on "gsm8k" n 8 steps 32
For each training problem: generate N answers, score them all,
keep only the best one, train on that. Like vote but for training.
80-90% of RLHF gains at 5-30% of the cost (test_16).
"""
target: str # Model alias
dataset: str # Dataset to train on
n: int = 8 # How many answers to generate per problem
steps: int = 32 # Training steps on the filtered data
@dataclass
class ExploitCmd:
"""Controlled reward hacking - keep ALL correct solutions regardless of method. (Phase 12)
Example: exploit base on "gsm8k" samples 16 -> exploit_data.jsonl
Generate many diverse solutions (high temp). Only filter: is the answer correct?
Keep ugly solutions, shortcuts, weird reasoning - as long as the answer is right.
Train on the diverse set so the model learns multiple paths to correct answers.
The "hacks" often turn out to be genuinely clever shortcuts.
"""
target: str # Model alias
dataset: str # Dataset with verifiable answers
samples: int = 16 # Solutions per problem (higher = more diversity)
steps: int = 32 # Training steps on the exploited data
output: Optional[str] = None # Save the exploit data for inspection
@dataclass
class ArenaCmd:
"""Real RL with environment, memory, curiosity, and anti-lying. (Phase 13)
The model enters an arena of challenges. For each challenge:
1. It tries to solve it (exploration)
2. Gets immediate reward/punishment (+1 correct, -1 wrong, -2 lying)
3. Remembers what worked and didn't (memory bank persists across episodes)
4. Gets curiosity bonus for trying NEW approaches
5. Creative solutions get cross-checked against standard approaches
Example: arena base on "gsm8k" rounds 5 episodes 50 steps 64 curiosity 0.3
"""
target: str # Model alias
dataset: str # Dataset with verifiable answers
rounds: int = 5 # RL rounds (re-train after each)
episodes: int = 50 # Challenges per round
steps: int = 64 # Training steps per round
curiosity: float = 0.3 # Curiosity bonus weight
output: Optional[str] = None # Save arena log
@dataclass
class ResearchArenaCmd:
"""Research arena — RL on ANY topic using real-world knowledge. (Phase 13)
Unlike arena (which uses a pre-made dataset), research_arena:
1. Takes a TOPIC string ("cancer biology", "number theory", anything)
2. Pulls real papers/sources about that topic (web, arxiv, pubmed, local files)
3. Extracts verifiable facts/claims from those sources
4. Builds increasingly hard questions from the real knowledge
5. Runs the model through the gauntlet, checking EVERY claim against sources
6. Difficulty ESCALATES on failure (fewer hints, stricter checking, harder questions)
7. Memory persists so it doesn't forget what it learned
8. Lying gets punished DOUBLE, curiosity rewarded
Example: research_arena base topic "cancer biology" sources "pubmed" rounds 5
"""
target: str # Model alias
topic: str # Research topic (any field)
sources: str = "web" # Where to pull knowledge: "web", "pubmed", "arxiv", or filepath
rounds: int = 5 # RL rounds (difficulty increases each round)
episodes: int = 30 # Questions per round
steps: int = 64 # Training steps per round
curiosity: float = 0.3 # Curiosity bonus weight
difficulty_scale: float = 0.25 # How much harder each round gets (0.25 = 25% harder)
output: Optional[str] = None # Save research log
# ============================================================================
# BLOCKS (gates, budget, contracts, etc.)
# ============================================================================
@dataclass
class GateBlock:
"""Validation gates that must pass before commit.
Example:
gate {
must_pass = [canary, perplexity, thinking_mode]
}
"""
must_pass: list[str] = field(default_factory=list)
@dataclass
class BudgetBlock:
"""Resource budget — compiler refuses plans that exceed limits.
Example:
budget {
max_gpu_hours = 8
max_cost = 50.00
}
"""
max_gpu_hours: Optional[float] = None
max_cost: Optional[float] = None
max_tokens: Optional[int] = None
max_experiments: Optional[int] = None
@dataclass
class DataContractBlock:
"""Schema enforcement on training data. (Phase 4, ForgeSpec 2.0)
Example:
data_contract {
required_fields = [prompt, response]
min_samples = 100
max_perplexity = 50.0
}
Compiler checks training data at synth/train time.
"""
required_fields: list[str] = field(default_factory=list)
min_samples: Optional[int] = None
max_perplexity: Optional[float] = None
@dataclass
class RewardContractBlock:
"""Verified reward definitions — what counts as "correct". (Phase 4, ForgeSpec 2.0)
Example:
reward_contract {
verifiers = [code_compiles, math_correct, no_hallucination]
min_reward = 0.3
}
Used by train (GRPO) to enforce reward quality.
No learned reward model — verified rewards only (test_16).
"""
verifiers: list[str] = field(default_factory=list)
min_reward: Optional[float] = None
# ============================================================================
# TOP-LEVEL PROGRAM
# ============================================================================
@dataclass
class TDProgram:
"""A complete parsed .td file — commands in order plus global blocks."""
commands: List[Any] = field(default_factory=list)
gates: Optional[GateBlock] = None
budget: Optional[BudgetBlock] = None
data_contract: Optional[DataContractBlock] = None
reward_contract: Optional[RewardContractBlock] = None
setup: Optional[SetupBlock] = None
on_error: Optional[OnErrorBlock] = None
log: Optional[LogBlock] = None
source_file: Optional[str] = None
__all__ = [
"LoadCmd",
"MergeCmd",
"HealCmd",
"EvalCmd",
"CommitCmd",
"SynthCmd",
"TrainCmd",
"DebateCmd",
"DiagnoseCmd",
"ForkCmd",
"ResetCmd",
"PruneCmd",
"EditCmd",
"RepeatBlock",
"IfBlock",
"FuseCmd",
"AbsorbCmd",
"SnapshotCmd",
"ReportCmd",
"NotifyCmd",
"SaveCmd",
"SetupBlock",
"OnErrorBlock",
"GateBlock",
"BudgetBlock",
"DataContractBlock",
"RewardContractBlock",
"ScheduleCmd",
"DownloadCmd",
"LogBlock",
"CompareCmd",
"VerifyCmd",
"VoteCmd",
"PromptBlock",
"DistillCmd",
"RollbackCmd",
"CurriculumCmd",
"StarCmd",
"BestOfCmd",
"ExploitCmd",
"ArenaCmd",
"ResearchArenaCmd",
"TDProgram",
]