Fixed code: vocab mismatch fix for cross-arch merging (Llama/Falcon)

5d61448 verified 3 months ago

23.3 kB

	"""
	TD Lang AST Nodes — Dataclass containers for each parsed command.

	Each .td command becomes one of these nodes after parsing.
	Phase 1 nodes are compiled into runnable Python; Phase 2 nodes are stubs so
	the compiler can reject them with a clear error until they are implemented.
	"""

	from dataclasses import dataclass, field
	from typing import Any, List, Optional


	# ============================================================================
	# PHASE 1 COMMANDS
	# ============================================================================

	@dataclass
	class LoadCmd:
	"""Load a model and give it a name.

	Example: load "Qwen/Qwen3-VL-8B-Instruct" as base
	"""
	model_ref: str # HuggingFace path or local path
	alias: str # Name to use in the rest of the script


	@dataclass
	class MergeCmd:
	"""Merge a source model into a target using a method.

	Example: merge "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" into base using transport strength 0.5
	"""
	source: str # Model path or alias to merge from
	target: str # Alias to merge into (must be loaded first)
	method: str # "transport", "slerp", "ties", "dare"
	strength: float = 0.5 # 0.0 = keep target, 1.0 = keep source


	@dataclass
	class HealCmd:
	"""Run QLoRA healing fine-tune on a model.

	Example: heal base lora_r 32 epochs 2
	"""
	target: str # Alias of model to heal
	lora_r: int = 32 # LoRA rank (higher = more capacity)
	epochs: int = 2 # Training epochs


	@dataclass
	class EvalCmd:
	"""Run validation/evaluation on a model.

	Example: eval base on "pile_sample" -> report.json
	"""
	target: str # Alias of model to evaluate
	dataset: Optional[str] = None # Optional dataset name/path
	output: Optional[str] = None # Optional output file path


	@dataclass
	class CommitCmd:
	"""Save model checkpoint, optionally requiring gates to pass.

	Example: commit base if [canary, perplexity, thinking_mode]
	"""
	target: str # Alias of model to commit
	gates: Optional[list[str]] = None # Gate names that must pass


	# ============================================================================
	# PHASE 2 COMMANDS (placeholders — structure ready, not wired up yet)
	# ============================================================================

	@dataclass
	class SynthCmd:
	"""Generate synthetic training data from a model. (Phase 2)"""
	target: str
	source: str
	filter_method: Optional[str] = None
	output: Optional[str] = None


	@dataclass
	class TrainCmd:
	"""Train a model on a dataset. (Phase 2)"""
	target: str
	dataset: str
	method: str = "grpo" # "grpo", "sft", "dpo"
	steps: Optional[int] = None
	learning_rate: Optional[float] = None


	@dataclass
	class DebateCmd:
	"""Generate multi-answer debate for preference pairs. (Phase 2)"""
	target: str
	rounds: int = 3
	candidates: int = 8
	output: Optional[str] = None


	@dataclass
	class DiagnoseCmd:
	"""Ask model what it's bad at — self-diagnosis. (Phase 2)"""
	target: str
	output: Optional[str] = None


	@dataclass
	class ForkCmd:
	"""Branch current model weights for parallel experiments. (Phase 3)

	Example: fork base as experiment_v2
	Cheap fork: copies manifest + adapters, shares base weights (default).
	"""
	source: str # Alias of model to fork from
	alias: str # Name for the new branch


	@dataclass
	class ResetCmd:
	"""Revert model to a previous checkpoint. (Phase 3)

	Example: reset base to "checkpoint_042"
	Deletes current model, clears CUDA cache, reloads from disk.
	Must also reset optimizer state.
	"""
	target: str # Alias of model to reset
	checkpoint: str # Checkpoint name/path to revert to


	@dataclass
	class PruneCmd:
	"""Structural pruning — remove low-utility neurons/heads. (Phase 3)

	Example: prune base using wanda aggressiveness 0.2
	Safe zone: ~20% max (LLM-Pruner paper). Language backbone only.
	"""
	target: str
	method: str = "wanda" # "wanda", "magnitude", "taylor"
	aggressiveness: float = 0.2 # Fraction to remove (0.0-1.0)


	@dataclass
	class EditCmd:
	"""Surgical LoRA/DoRA editing on specific layers. (Phase 3)

	Example: edit base layers 16-28 using lora lr 1e-4
	"Try before buy": eval with adapter enabled vs disabled before merging.
	"""
	target: str
	layers: str = "all" # "all", "16-28", single number
	method: str = "lora" # "lora" or "dora"
	learning_rate: Optional[float] = None


	# ============================================================================
	# PHASE 4 COMMANDS — Contracts, Lineage, Economics (ForgeSpec 2.0, test_17)
	# ============================================================================

	# ============================================================================
	# PHASE 7 — LOOP CONTROL (repeat, if/else)
	# ============================================================================

	@dataclass
	class RepeatBlock:
	"""Repeat a block of commands N times. (Phase 7 — Loop Control)

	Example:
	repeat 5 {
	diagnose base
	synth base from base
	train base on "data.jsonl" using grpo steps 64
	eval base
	}
	"""
	count: int # Number of iterations
	body: List[Any] = field(default_factory=list) # Commands inside the block


	@dataclass
	class IfBlock:
	"""Conditional execution based on last eval result. (Phase 7 — Loop Control)

	Example:
	if eval_passed {
	commit base
	} else {
	reset base to "last_good"
	}

	Condition checks the most recent eval result for the target.
	"""
	condition: str # "eval_passed", "gate_passed", etc.
	target: Optional[str] = None # Which model's eval to check
	then_body: List[Any] = field(default_factory=list)
	else_body: List[Any] = field(default_factory=list)


	@dataclass
	class FuseCmd:
	"""Fuse multiple models into a target in one shot. (Phase 6 — Easy Merge)

	Example: fuse [deepseek-r1, mimo-7b, llama-3.1] into base
	Auto-picks Transport and Merge, auto-sets per-model strength.
	Handles cross-architecture merging (all 5 source models have different archs).
	"""
	sources: list[str] # List of model names/paths to fuse in
	target: str # Alias to merge into (must be loaded)
	method: str = "transport" # Default: transport and merge (cross-arch)
	strategy: str = "equal" # "equal" (same strength each), "weighted", "sequential"


	@dataclass
	class AbsorbCmd:
	"""Absorb a single model into target — simplified merge. (Phase 6 — Easy Merge)

	Example: absorb "deepseek-ai/DeepSeek-R1" into base strength 0.5
	One-liner for the common case of merging one model in.
	"""
	source: str # Model path or HF ID
	target: str # Alias to merge into
	strength: float = 0.5 # 0.0=keep target, 1.0=keep source, default balanced


	@dataclass
	class SnapshotCmd:
	"""Save a content-hashed snapshot of model state for lineage tracking. (Phase 4)

	Example: snapshot base -> snapshots/
	Creates a content-addressed directory: snapshots/<sha256_prefix>/
	Contains: model state, adapter state, prune spec, eval report, manifest.
	"""
	target: str
	output: Optional[str] = None # Output directory (default: td_lang_outputs/snapshots/)


	@dataclass
	class ReportCmd:
	"""Generate an economics report for this run. (Phase 4)

	Example: report -> economics.json
	Tracks: GPU hours, cost estimate, tokens processed, experiments run,
	time per command, cost breakdown by phase.
	"""
	output: Optional[str] = None # Output file path


	# ============================================================================
	# PHASE 8 — AUTOPILOT (setup, notify, save, on_error, resume)
	# ============================================================================

	@dataclass
	class NotifyCmd:
	"""Send a notification via ntfy.sh. (Phase 8 — Autopilot)

	Example: notify "Training complete!"
	Uses curl to POST to the configured ntfy topic.
	"""
	message: str


	@dataclass
	class SaveCmd:
	"""Save/upload model to cloud storage via rclone. (Phase 8 — Autopilot)

	Example: save base to "gdrive:TD/models/v1"
	Uses rclone to copy model checkpoint to Google Drive (or any rclone remote).
	"""
	target: str # Alias of model to save
	destination: str # rclone destination path


	@dataclass
	class SetupBlock:
	"""Auto-install dependencies and configure environment. (Phase 8 — Autopilot)

	Example:
	setup {
	pip = [torch, transformers, peft, bitsandbytes, trl]
	hf_token = env
	notify = "ntfy.sh/my_ai"
	}
	"""
	pip_packages: list[str] = field(default_factory=list)
	hf_token: Optional[str] = None # "env" = read HF_TOKEN from env
	notify_url: Optional[str] = None # ntfy.sh topic URL


	@dataclass
	class OnErrorBlock:
	"""Crash recovery behavior. (Phase 8 — Autopilot)

	Example:
	on_error {
	retry = 3
	fallback = reduce_batch
	notify = true
	}
	"""
	retry: int = 3 # Number of retries per failed step
	fallback: str = "reduce_batch" # "reduce_batch", "skip", "snapshot_and_stop"
	notify: bool = True # Send ntfy notification on error


	# ============================================================================
	# PHASE 9 — SCHEDULE (time-based execution)
	# ============================================================================

	@dataclass
	class ScheduleCmd:
	"""Schedule a block of commands to run at a specific time or interval. (Phase 9)

	Examples:
	schedule "every 6h" { diagnose base; train base ... }
	schedule "at 02:00" { train base on "data.jsonl" using grpo }
	schedule "after 30m" { eval base -> results.json }

	Patterns:
	"every Nh/Nm" — repeat every N hours/minutes
	"at HH:MM" — run once at that time
	"after Nh/Nm" — delay then run once
	"""
	timing: str # "every 6h", "at 02:00", "after 30m"
	body: List[Any] = field(default_factory=list) # Commands inside the block


	# ============================================================================
	# PHASE 10 - TOOLBOX (download, log, compare, verify)
	# ============================================================================

	@dataclass
	class DownloadCmd:
	"""Download a dataset from HuggingFace. (Phase 10)

	Example: download "gsm8k" as math_data
	Pulls a dataset from HuggingFace and stores it for training/eval.
	"""
	dataset: str # HuggingFace dataset path
	alias: str # Name to reference it later
	split: str = "train" # Which split to download


	@dataclass
	class LogBlock:
	"""Save all pipeline output to a log file. (Phase 10)

	Example: log "training_log.txt"
	Everything printed to console also goes to this file.
	"""
	filepath: str # Path to save log


	@dataclass
	class CompareCmd:
	"""Compare source model vs merged model - knowledge retention test. (Phase 10)

	Example: compare base vs "deepseek-ai/DeepSeek-R1" questions 50
	Tests both models on the same questions and shows what % the merged
	model retained from the source. Proves the merge actually worked.
	"""
	target: str # The merged model alias
	source: str # Source model to compare against (HF path)
	questions: int = 50 # Number of test questions
	output: Optional[str] = None # Optional output file


	@dataclass
	class VerifyCmd:
	"""Verify model answers are actually correct. (Phase 10)

	Example: verify base on "gsm8k" questions 100 -> verify_results.json
	Runs the model on questions with KNOWN correct answers and checks
	if the model got them right. Returns accuracy percentage.
	"""
	target: str # Model alias to test
	dataset: str # Dataset with known answers
	questions: int = 100 # Number of questions to test
	output: Optional[str] = None # Optional output file


	# ============================================================================
	# PHASE 11 - INTELLIGENCE (vote, prompt, distill, rollback)
	# ============================================================================

	@dataclass
	class VoteCmd:
	"""Majority voting - generate N answers, pick the one most agree on. (Phase 11)

	Example: vote base "What is 15 * 23?" samples 5
	Generates N answers to the same question, then picks the most common one.
	Proven to boost accuracy 10-20% with zero training.
	"""
	target: str # Model alias
	question: str # Question to vote on
	samples: int = 5 # Number of answers to generate
	output: Optional[str] = None # Optional output file


	@dataclass
	class PromptBlock:
	"""Attach a system prompt or chain-of-thought template to a model. (Phase 11)

	Example:
	prompt base "Think step by step before answering."
	Makes the model use this system prompt for all future generations.
	"""
	target: str # Model alias to attach prompt to
	text: str # The system prompt text


	@dataclass
	class DistillCmd:
	"""Distill a big model's knowledge into a smaller one. (Phase 11)

	Example: distill base into "Qwen/Qwen3-1.7B" steps 200 -> student_model/
	Takes the big model's best answers and trains the small model on them.
	You get a fast model for easy questions, full model for hard ones.
	"""
	teacher: str # The big model alias (source of knowledge)
	student: str # The small model HF path
	steps: int = 200 # Training steps
	output: Optional[str] = None # Where to save the student model


	@dataclass
	class RollbackCmd:
	"""Undo the last training step. (Phase 11)

	Example: rollback base
	Reverts to the most recent snapshot. If training made things worse,
	one command brings it back.
	"""
	target: str # Model alias to rollback


	# ============================================================================
	# PHASE 12 - RL & FINE-TUNING (curriculum, star, best_of, exploit)
	# ============================================================================

	@dataclass
	class CurriculumCmd:
	"""Progressive difficulty training - start easy, get harder. (Phase 12)

	Example: curriculum base on "gsm8k" using grpo levels 3 steps 64
	Splits dataset by difficulty, trains on easy first, then medium, then hard.
	Each level only starts when the model passes the previous one.
	"""
	target: str # Model alias
	dataset: str # Dataset to train on
	method: str = "grpo" # Training method
	levels: int = 3 # Number of difficulty levels
	steps: int = 64 # Steps per level


	@dataclass
	class StarCmd:
	"""Self-Taught Reasoner - train on own correct reasoning chains. (Phase 12)

	Example: star base on "gsm8k" rounds 3 samples 8
	Generate N solutions per problem. Keep the ones with correct answers.
	Train on the correct reasoning chains. Repeat.
	The model literally learns from its own successes.
	"""
	target: str # Model alias
	dataset: str # Dataset with known answers
	rounds: int = 3 # Number of STaR iterations
	samples: int = 8 # Solutions to generate per problem


	@dataclass
	class BestOfCmd:
	"""Generate N answers, score all, train on the best. (Phase 12)

	Example: best_of base on "gsm8k" n 8 steps 32
	For each training problem: generate N answers, score them all,
	keep only the best one, train on that. Like vote but for training.
	80-90% of RLHF gains at 5-30% of the cost (test_16).
	"""
	target: str # Model alias
	dataset: str # Dataset to train on
	n: int = 8 # How many answers to generate per problem
	steps: int = 32 # Training steps on the filtered data


	@dataclass
	class ExploitCmd:
	"""Controlled reward hacking - keep ALL correct solutions regardless of method. (Phase 12)

	Example: exploit base on "gsm8k" samples 16 -> exploit_data.jsonl
	Generate many diverse solutions (high temp). Only filter: is the answer correct?
	Keep ugly solutions, shortcuts, weird reasoning - as long as the answer is right.
	Train on the diverse set so the model learns multiple paths to correct answers.
	The "hacks" often turn out to be genuinely clever shortcuts.
	"""
	target: str # Model alias
	dataset: str # Dataset with verifiable answers
	samples: int = 16 # Solutions per problem (higher = more diversity)
	steps: int = 32 # Training steps on the exploited data
	output: Optional[str] = None # Save the exploit data for inspection


	@dataclass
	class ArenaCmd:
	"""Real RL with environment, memory, curiosity, and anti-lying. (Phase 13)

	The model enters an arena of challenges. For each challenge:
	1. It tries to solve it (exploration)
	2. Gets immediate reward/punishment (+1 correct, -1 wrong, -2 lying)
	3. Remembers what worked and didn't (memory bank persists across episodes)
	4. Gets curiosity bonus for trying NEW approaches
	5. Creative solutions get cross-checked against standard approaches

	Example: arena base on "gsm8k" rounds 5 episodes 50 steps 64 curiosity 0.3
	"""
	target: str # Model alias
	dataset: str # Dataset with verifiable answers
	rounds: int = 5 # RL rounds (re-train after each)
	episodes: int = 50 # Challenges per round
	steps: int = 64 # Training steps per round
	curiosity: float = 0.3 # Curiosity bonus weight
	output: Optional[str] = None # Save arena log


	@dataclass
	class ResearchArenaCmd:
	"""Research arena — RL on ANY topic using real-world knowledge. (Phase 13)

	Unlike arena (which uses a pre-made dataset), research_arena:
	1. Takes a TOPIC string ("cancer biology", "number theory", anything)
	2. Pulls real papers/sources about that topic (web, arxiv, pubmed, local files)
	3. Extracts verifiable facts/claims from those sources
	4. Builds increasingly hard questions from the real knowledge
	5. Runs the model through the gauntlet, checking EVERY claim against sources
	6. Difficulty ESCALATES on failure (fewer hints, stricter checking, harder questions)
	7. Memory persists so it doesn't forget what it learned
	8. Lying gets punished DOUBLE, curiosity rewarded

	Example: research_arena base topic "cancer biology" sources "pubmed" rounds 5
	"""
	target: str # Model alias
	topic: str # Research topic (any field)
	sources: str = "web" # Where to pull knowledge: "web", "pubmed", "arxiv", or filepath
	rounds: int = 5 # RL rounds (difficulty increases each round)
	episodes: int = 30 # Questions per round
	steps: int = 64 # Training steps per round
	curiosity: float = 0.3 # Curiosity bonus weight
	difficulty_scale: float = 0.25 # How much harder each round gets (0.25 = 25% harder)
	output: Optional[str] = None # Save research log


	# ============================================================================
	# BLOCKS (gates, budget, contracts, etc.)
	# ============================================================================

	@dataclass
	class GateBlock:
	"""Validation gates that must pass before commit.

	Example:
	gate {
	must_pass = [canary, perplexity, thinking_mode]
	}
	"""
	must_pass: list[str] = field(default_factory=list)


	@dataclass
	class BudgetBlock:
	"""Resource budget — compiler refuses plans that exceed limits.

	Example:
	budget {
	max_gpu_hours = 8
	max_cost = 50.00
	}
	"""
	max_gpu_hours: Optional[float] = None
	max_cost: Optional[float] = None
	max_tokens: Optional[int] = None
	max_experiments: Optional[int] = None


	@dataclass
	class DataContractBlock:
	"""Schema enforcement on training data. (Phase 4, ForgeSpec 2.0)

	Example:
	data_contract {
	required_fields = [prompt, response]
	min_samples = 100
	max_perplexity = 50.0
	}

	Compiler checks training data at synth/train time.
	"""
	required_fields: list[str] = field(default_factory=list)
	min_samples: Optional[int] = None
	max_perplexity: Optional[float] = None


	@dataclass
	class RewardContractBlock:
	"""Verified reward definitions — what counts as "correct". (Phase 4, ForgeSpec 2.0)

	Example:
	reward_contract {
	verifiers = [code_compiles, math_correct, no_hallucination]
	min_reward = 0.3
	}

	Used by train (GRPO) to enforce reward quality.
	No learned reward model — verified rewards only (test_16).
	"""
	verifiers: list[str] = field(default_factory=list)
	min_reward: Optional[float] = None


	# ============================================================================
	# TOP-LEVEL PROGRAM
	# ============================================================================

	@dataclass
	class TDProgram:
	"""A complete parsed .td file — commands in order plus global blocks."""

	commands: List[Any] = field(default_factory=list)
	gates: Optional[GateBlock] = None
	budget: Optional[BudgetBlock] = None
	data_contract: Optional[DataContractBlock] = None
	reward_contract: Optional[RewardContractBlock] = None
	setup: Optional[SetupBlock] = None
	on_error: Optional[OnErrorBlock] = None
	log: Optional[LogBlock] = None
	source_file: Optional[str] = None


	__all__ = [
	"LoadCmd",
	"MergeCmd",
	"HealCmd",
	"EvalCmd",
	"CommitCmd",
	"SynthCmd",
	"TrainCmd",
	"DebateCmd",
	"DiagnoseCmd",
	"ForkCmd",
	"ResetCmd",
	"PruneCmd",
	"EditCmd",
	"RepeatBlock",
	"IfBlock",
	"FuseCmd",
	"AbsorbCmd",
	"SnapshotCmd",
	"ReportCmd",
	"NotifyCmd",
	"SaveCmd",
	"SetupBlock",
	"OnErrorBlock",
	"GateBlock",
	"BudgetBlock",
	"DataContractBlock",
	"RewardContractBlock",
	"ScheduleCmd",
	"DownloadCmd",
	"LogBlock",
	"CompareCmd",
	"VerifyCmd",
	"VoteCmd",
	"PromptBlock",
	"DistillCmd",
	"RollbackCmd",
	"CurriculumCmd",
	"StarCmd",
	"BestOfCmd",
	"ExploitCmd",
	"ArenaCmd",
	"ResearchArenaCmd",
	"TDProgram",
	]