"""
TD Lang AST Nodes — Dataclass containers for each parsed command.

Each .td command becomes one of these nodes after parsing.
Phase 1 nodes are compiled into runnable Python; Phase 2 nodes are stubs so
the compiler can reject them with a clear error until they are implemented.
"""

from dataclasses import dataclass, field
from typing import Any, List, Optional


# ============================================================================
# PHASE 1 COMMANDS
# ============================================================================

@dataclass
class LoadCmd:
    """Load a model and give it a name.

    Example: load "Qwen/Qwen3-VL-8B-Instruct" as base
    """
    model_ref: str          # HuggingFace path or local path
    alias: str              # Name to use in the rest of the script


@dataclass
class MergeCmd:
    """Merge a source model into a target using a method.

    Example: merge "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" into base using transport strength 0.5
    """
    source: str             # Model path or alias to merge from
    target: str             # Alias to merge into (must be loaded first)
    method: str             # "transport", "slerp", "ties", "dare"
    strength: float = 0.5   # 0.0 = keep target, 1.0 = keep source


@dataclass
class HealCmd:
    """Run QLoRA healing fine-tune on a model.

    Example: heal base lora_r 32 epochs 2
    """
    target: str             # Alias of model to heal
    lora_r: int = 32        # LoRA rank (higher = more capacity)
    epochs: int = 2         # Training epochs


@dataclass
class EvalCmd:
    """Run validation/evaluation on a model.

    Example: eval base on "pile_sample" -> report.json
    """
    target: str                         # Alias of model to evaluate
    dataset: Optional[str] = None       # Optional dataset name/path
    output: Optional[str] = None        # Optional output file path


@dataclass
class CommitCmd:
    """Save model checkpoint, optionally requiring gates to pass.

    Example: commit base if [canary, perplexity, thinking_mode]
    """
    target: str                                 # Alias of model to commit
    gates: Optional[list[str]] = None           # Gate names that must pass


# ============================================================================
# PHASE 2 COMMANDS (placeholders — structure ready, not wired up yet)
# ============================================================================

@dataclass
class SynthCmd:
    """Generate synthetic training data from a model. (Phase 2)"""
    target: str
    source: str
    filter_method: Optional[str] = None
    output: Optional[str] = None


@dataclass
class TrainCmd:
    """Train a model on a dataset. (Phase 2)"""
    target: str
    dataset: str
    method: str = "grpo"            # "grpo", "sft", "dpo"
    steps: Optional[int] = None
    learning_rate: Optional[float] = None


@dataclass
class DebateCmd:
    """Generate multi-answer debate for preference pairs. (Phase 2)"""
    target: str
    rounds: int = 3
    candidates: int = 8
    output: Optional[str] = None


@dataclass
class DiagnoseCmd:
    """Ask model what it's bad at — self-diagnosis. (Phase 2)"""
    target: str
    output: Optional[str] = None


@dataclass
class ForkCmd:
    """Branch current model weights for parallel experiments. (Phase 3)

    Example: fork base as experiment_v2
    Cheap fork: copies manifest + adapters, shares base weights (default).
    """
    source: str             # Alias of model to fork from
    alias: str              # Name for the new branch


@dataclass
class ResetCmd:
    """Revert model to a previous checkpoint. (Phase 3)

    Example: reset base to "checkpoint_042"
    Deletes current model, clears CUDA cache, reloads from disk.
    Must also reset optimizer state.
    """
    target: str             # Alias of model to reset
    checkpoint: str         # Checkpoint name/path to revert to


@dataclass
class PruneCmd:
    """Structural pruning — remove low-utility neurons/heads. (Phase 3)

    Example: prune base using wanda aggressiveness 0.2
    Safe zone: ~20% max (LLM-Pruner paper). Language backbone only.
    """
    target: str
    method: str = "wanda"               # "wanda", "magnitude", "taylor"
    aggressiveness: float = 0.2         # Fraction to remove (0.0-1.0)


@dataclass
class EditCmd:
    """Surgical LoRA/DoRA editing on specific layers. (Phase 3)

    Example: edit base layers 16-28 using lora lr 1e-4
    "Try before buy": eval with adapter enabled vs disabled before merging.
    """
    target: str
    layers: str = "all"                 # "all", "16-28", single number
    method: str = "lora"                # "lora" or "dora"
    learning_rate: Optional[float] = None


# ============================================================================
# PHASE 4 COMMANDS — Contracts, Lineage, Economics (ForgeSpec 2.0, test_17)
# ============================================================================

# ============================================================================
# PHASE 7 — LOOP CONTROL (repeat, if/else)
# ============================================================================

@dataclass
class RepeatBlock:
    """Repeat a block of commands N times. (Phase 7 — Loop Control)

    Example:
        repeat 5 {
            diagnose base
            synth base from base
            train base on "data.jsonl" using grpo steps 64
            eval base
        }
    """
    count: int                      # Number of iterations
    body: List[Any] = field(default_factory=list)  # Commands inside the block


@dataclass
class IfBlock:
    """Conditional execution based on last eval result. (Phase 7 — Loop Control)

    Example:
        if eval_passed {
            commit base
        } else {
            reset base to "last_good"
        }

    Condition checks the most recent eval result for the target.
    """
    condition: str                  # "eval_passed", "gate_passed", etc.
    target: Optional[str] = None    # Which model's eval to check
    then_body: List[Any] = field(default_factory=list)
    else_body: List[Any] = field(default_factory=list)


@dataclass
class FuseCmd:
    """Fuse multiple models into a target in one shot. (Phase 6 — Easy Merge)

    Example: fuse [deepseek-r1, mimo-7b, llama-3.1] into base
    Auto-picks Transport and Merge, auto-sets per-model strength.
    Handles cross-architecture merging (all 5 source models have different archs).
    """
    sources: list[str]          # List of model names/paths to fuse in
    target: str                 # Alias to merge into (must be loaded)
    method: str = "transport"   # Default: transport and merge (cross-arch)
    strategy: str = "equal"     # "equal" (same strength each), "weighted", "sequential"


@dataclass
class AbsorbCmd:
    """Absorb a single model into target — simplified merge. (Phase 6 — Easy Merge)

    Example: absorb "deepseek-ai/DeepSeek-R1" into base strength 0.5
    One-liner for the common case of merging one model in.
    """
    source: str                 # Model path or HF ID
    target: str                 # Alias to merge into
    strength: float = 0.5       # 0.0=keep target, 1.0=keep source, default balanced


@dataclass
class SnapshotCmd:
    """Save a content-hashed snapshot of model state for lineage tracking. (Phase 4)

    Example: snapshot base -> snapshots/
    Creates a content-addressed directory: snapshots/<sha256_prefix>/
    Contains: model state, adapter state, prune spec, eval report, manifest.
    """
    target: str
    output: Optional[str] = None  # Output directory (default: td_lang_outputs/snapshots/)


@dataclass
class ReportCmd:
    """Generate an economics report for this run. (Phase 4)

    Example: report -> economics.json
    Tracks: GPU hours, cost estimate, tokens processed, experiments run,
    time per command, cost breakdown by phase.
    """
    output: Optional[str] = None  # Output file path


# ============================================================================
# PHASE 8 — AUTOPILOT (setup, notify, save, on_error, resume)
# ============================================================================

@dataclass
class NotifyCmd:
    """Send a notification via ntfy.sh. (Phase 8 — Autopilot)

    Example: notify "Training complete!"
    Uses curl to POST to the configured ntfy topic.
    """
    message: str


@dataclass
class SaveCmd:
    """Save/upload model to cloud storage via rclone. (Phase 8 — Autopilot)

    Example: save base to "gdrive:TD/models/v1"
    Uses rclone to copy model checkpoint to Google Drive (or any rclone remote).
    """
    target: str                     # Alias of model to save
    destination: str                # rclone destination path


@dataclass
class SetupBlock:
    """Auto-install dependencies and configure environment. (Phase 8 — Autopilot)

    Example:
        setup {
            pip = [torch, transformers, peft, bitsandbytes, trl]
            hf_token = env
            notify = "ntfy.sh/my_ai"
        }
    """
    pip_packages: list[str] = field(default_factory=list)
    hf_token: Optional[str] = None   # "env" = read HF_TOKEN from env
    notify_url: Optional[str] = None  # ntfy.sh topic URL


@dataclass
class OnErrorBlock:
    """Crash recovery behavior. (Phase 8 — Autopilot)

    Example:
        on_error {
            retry = 3
            fallback = reduce_batch
            notify = true
        }
    """
    retry: int = 3                   # Number of retries per failed step
    fallback: str = "reduce_batch"   # "reduce_batch", "skip", "snapshot_and_stop"
    notify: bool = True              # Send ntfy notification on error


# ============================================================================
# PHASE 9 — SCHEDULE (time-based execution)
# ============================================================================

@dataclass
class ScheduleCmd:
    """Schedule a block of commands to run at a specific time or interval. (Phase 9)

    Examples:
        schedule "every 6h" { diagnose base; train base ... }
        schedule "at 02:00" { train base on "data.jsonl" using grpo }
        schedule "after 30m" { eval base -> results.json }

    Patterns:
        "every Nh/Nm" — repeat every N hours/minutes
        "at HH:MM"    — run once at that time
        "after Nh/Nm" — delay then run once
    """
    timing: str                     # "every 6h", "at 02:00", "after 30m"
    body: List[Any] = field(default_factory=list)  # Commands inside the block


# ============================================================================
# PHASE 10 - TOOLBOX (download, log, compare, verify)
# ============================================================================

@dataclass
class DownloadCmd:
    """Download a dataset from HuggingFace. (Phase 10)

    Example: download "gsm8k" as math_data
    Pulls a dataset from HuggingFace and stores it for training/eval.
    """
    dataset: str                    # HuggingFace dataset path
    alias: str                      # Name to reference it later
    split: str = "train"            # Which split to download


@dataclass
class LogBlock:
    """Save all pipeline output to a log file. (Phase 10)

    Example: log "training_log.txt"
    Everything printed to console also goes to this file.
    """
    filepath: str                   # Path to save log


@dataclass
class CompareCmd:
    """Compare source model vs merged model - knowledge retention test. (Phase 10)

    Example: compare base vs "deepseek-ai/DeepSeek-R1" questions 50
    Tests both models on the same questions and shows what % the merged
    model retained from the source. Proves the merge actually worked.
    """
    target: str                     # The merged model alias
    source: str                     # Source model to compare against (HF path)
    questions: int = 50             # Number of test questions
    output: Optional[str] = None    # Optional output file


@dataclass
class VerifyCmd:
    """Verify model answers are actually correct. (Phase 10)

    Example: verify base on "gsm8k" questions 100 -> verify_results.json
    Runs the model on questions with KNOWN correct answers and checks
    if the model got them right. Returns accuracy percentage.
    """
    target: str                     # Model alias to test
    dataset: str                    # Dataset with known answers
    questions: int = 100            # Number of questions to test
    output: Optional[str] = None    # Optional output file


# ============================================================================
# PHASE 11 - INTELLIGENCE (vote, prompt, distill, rollback)
# ============================================================================

@dataclass
class VoteCmd:
    """Majority voting - generate N answers, pick the one most agree on. (Phase 11)

    Example: vote base "What is 15 * 23?" samples 5
    Generates N answers to the same question, then picks the most common one.
    Proven to boost accuracy 10-20% with zero training.
    """
    target: str                     # Model alias
    question: str                   # Question to vote on
    samples: int = 5               # Number of answers to generate
    output: Optional[str] = None    # Optional output file


@dataclass
class PromptBlock:
    """Attach a system prompt or chain-of-thought template to a model. (Phase 11)

    Example:
        prompt base "Think step by step before answering."
    Makes the model use this system prompt for all future generations.
    """
    target: str                     # Model alias to attach prompt to
    text: str                       # The system prompt text


@dataclass
class DistillCmd:
    """Distill a big model's knowledge into a smaller one. (Phase 11)

    Example: distill base into "Qwen/Qwen3-1.7B" steps 200 -> student_model/
    Takes the big model's best answers and trains the small model on them.
    You get a fast model for easy questions, full model for hard ones.
    """
    teacher: str                    # The big model alias (source of knowledge)
    student: str                    # The small model HF path
    steps: int = 200               # Training steps
    output: Optional[str] = None    # Where to save the student model


@dataclass
class RollbackCmd:
    """Undo the last training step. (Phase 11)

    Example: rollback base
    Reverts to the most recent snapshot. If training made things worse,
    one command brings it back.
    """
    target: str                     # Model alias to rollback


# ============================================================================
# PHASE 12 - RL & FINE-TUNING (curriculum, star, best_of, exploit)
# ============================================================================

@dataclass
class CurriculumCmd:
    """Progressive difficulty training - start easy, get harder. (Phase 12)

    Example: curriculum base on "gsm8k" using grpo levels 3 steps 64
    Splits dataset by difficulty, trains on easy first, then medium, then hard.
    Each level only starts when the model passes the previous one.
    """
    target: str                     # Model alias
    dataset: str                    # Dataset to train on
    method: str = "grpo"            # Training method
    levels: int = 3                 # Number of difficulty levels
    steps: int = 64                 # Steps per level


@dataclass
class StarCmd:
    """Self-Taught Reasoner - train on own correct reasoning chains. (Phase 12)

    Example: star base on "gsm8k" rounds 3 samples 8
    Generate N solutions per problem. Keep the ones with correct answers.
    Train on the correct reasoning chains. Repeat.
    The model literally learns from its own successes.
    """
    target: str                     # Model alias
    dataset: str                    # Dataset with known answers
    rounds: int = 3                 # Number of STaR iterations
    samples: int = 8               # Solutions to generate per problem


@dataclass
class BestOfCmd:
    """Generate N answers, score all, train on the best. (Phase 12)

    Example: best_of base on "gsm8k" n 8 steps 32
    For each training problem: generate N answers, score them all,
    keep only the best one, train on that. Like vote but for training.
    80-90% of RLHF gains at 5-30% of the cost (test_16).
    """
    target: str                     # Model alias
    dataset: str                    # Dataset to train on
    n: int = 8                      # How many answers to generate per problem
    steps: int = 32                 # Training steps on the filtered data


@dataclass
class ExploitCmd:
    """Controlled reward hacking - keep ALL correct solutions regardless of method. (Phase 12)

    Example: exploit base on "gsm8k" samples 16 -> exploit_data.jsonl
    Generate many diverse solutions (high temp). Only filter: is the answer correct?
    Keep ugly solutions, shortcuts, weird reasoning - as long as the answer is right.
    Train on the diverse set so the model learns multiple paths to correct answers.
    The "hacks" often turn out to be genuinely clever shortcuts.
    """
    target: str                     # Model alias
    dataset: str                    # Dataset with verifiable answers
    samples: int = 16              # Solutions per problem (higher = more diversity)
    steps: int = 32                 # Training steps on the exploited data
    output: Optional[str] = None    # Save the exploit data for inspection


@dataclass
class ArenaCmd:
    """Real RL with environment, memory, curiosity, and anti-lying. (Phase 13)

    The model enters an arena of challenges. For each challenge:
    1. It tries to solve it (exploration)
    2. Gets immediate reward/punishment (+1 correct, -1 wrong, -2 lying)
    3. Remembers what worked and didn't (memory bank persists across episodes)
    4. Gets curiosity bonus for trying NEW approaches
    5. Creative solutions get cross-checked against standard approaches

    Example: arena base on "gsm8k" rounds 5 episodes 50 steps 64 curiosity 0.3
    """
    target: str                     # Model alias
    dataset: str                    # Dataset with verifiable answers
    rounds: int = 5                # RL rounds (re-train after each)
    episodes: int = 50             # Challenges per round
    steps: int = 64                 # Training steps per round
    curiosity: float = 0.3         # Curiosity bonus weight
    output: Optional[str] = None    # Save arena log


@dataclass
class ResearchArenaCmd:
    """Research arena — RL on ANY topic using real-world knowledge. (Phase 13)

    Unlike arena (which uses a pre-made dataset), research_arena:
    1. Takes a TOPIC string ("cancer biology", "number theory", anything)
    2. Pulls real papers/sources about that topic (web, arxiv, pubmed, local files)
    3. Extracts verifiable facts/claims from those sources
    4. Builds increasingly hard questions from the real knowledge
    5. Runs the model through the gauntlet, checking EVERY claim against sources
    6. Difficulty ESCALATES on failure (fewer hints, stricter checking, harder questions)
    7. Memory persists so it doesn't forget what it learned
    8. Lying gets punished DOUBLE, curiosity rewarded

    Example: research_arena base topic "cancer biology" sources "pubmed" rounds 5
    """
    target: str                     # Model alias
    topic: str                      # Research topic (any field)
    sources: str = "web"           # Where to pull knowledge: "web", "pubmed", "arxiv", or filepath
    rounds: int = 5                # RL rounds (difficulty increases each round)
    episodes: int = 30             # Questions per round
    steps: int = 64                 # Training steps per round
    curiosity: float = 0.3         # Curiosity bonus weight
    difficulty_scale: float = 0.25 # How much harder each round gets (0.25 = 25% harder)
    output: Optional[str] = None    # Save research log


# ============================================================================
# BLOCKS (gates, budget, contracts, etc.)
# ============================================================================

@dataclass
class GateBlock:
    """Validation gates that must pass before commit.

    Example:
        gate {
            must_pass = [canary, perplexity, thinking_mode]
        }
    """
    must_pass: list[str] = field(default_factory=list)


@dataclass
class BudgetBlock:
    """Resource budget — compiler refuses plans that exceed limits.

    Example:
        budget {
            max_gpu_hours = 8
            max_cost = 50.00
        }
    """
    max_gpu_hours: Optional[float] = None
    max_cost: Optional[float] = None
    max_tokens: Optional[int] = None
    max_experiments: Optional[int] = None


@dataclass
class DataContractBlock:
    """Schema enforcement on training data. (Phase 4, ForgeSpec 2.0)

    Example:
        data_contract {
            required_fields = [prompt, response]
            min_samples = 100
            max_perplexity = 50.0
        }

    Compiler checks training data at synth/train time.
    """
    required_fields: list[str] = field(default_factory=list)
    min_samples: Optional[int] = None
    max_perplexity: Optional[float] = None


@dataclass
class RewardContractBlock:
    """Verified reward definitions — what counts as "correct". (Phase 4, ForgeSpec 2.0)

    Example:
        reward_contract {
            verifiers = [code_compiles, math_correct, no_hallucination]
            min_reward = 0.3
        }

    Used by train (GRPO) to enforce reward quality.
    No learned reward model — verified rewards only (test_16).
    """
    verifiers: list[str] = field(default_factory=list)
    min_reward: Optional[float] = None


# ============================================================================
# TOP-LEVEL PROGRAM
# ============================================================================

@dataclass
class TDProgram:
    """A complete parsed .td file — commands in order plus global blocks."""

    commands: List[Any] = field(default_factory=list)
    gates: Optional[GateBlock] = None
    budget: Optional[BudgetBlock] = None
    data_contract: Optional[DataContractBlock] = None
    reward_contract: Optional[RewardContractBlock] = None
    setup: Optional[SetupBlock] = None
    on_error: Optional[OnErrorBlock] = None
    log: Optional[LogBlock] = None
    source_file: Optional[str] = None


__all__ = [
    "LoadCmd",
    "MergeCmd",
    "HealCmd",
    "EvalCmd",
    "CommitCmd",
    "SynthCmd",
    "TrainCmd",
    "DebateCmd",
    "DiagnoseCmd",
    "ForkCmd",
    "ResetCmd",
    "PruneCmd",
    "EditCmd",
    "RepeatBlock",
    "IfBlock",
    "FuseCmd",
    "AbsorbCmd",
    "SnapshotCmd",
    "ReportCmd",
    "NotifyCmd",
    "SaveCmd",
    "SetupBlock",
    "OnErrorBlock",
    "GateBlock",
    "BudgetBlock",
    "DataContractBlock",
    "RewardContractBlock",
    "ScheduleCmd",
    "DownloadCmd",
    "LogBlock",
    "CompareCmd",
    "VerifyCmd",
    "VoteCmd",
    "PromptBlock",
    "DistillCmd",
    "RollbackCmd",
    "CurriculumCmd",
    "StarCmd",
    "BestOfCmd",
    "ExploitCmd",
    "ArenaCmd",
    "ResearchArenaCmd",
    "TDProgram",
]