#!/usr/bin/env python3
"""
WebICoder v3 — Quick Start Example
Generate HTML websites from natural language prompts using MLX on Apple Silicon.

⚠️  MANDATORY: This script implements all 5 required rules for correct output.
    See README.md for full documentation.

Usage:
    python example.py "Create a landing page for a coffee shop"
    python example.py --interactive
"""

import sys
import re

from mlx_lm import load, stream_generate
from mlx_lm.sample_utils import make_sampler, make_logits_processors


# ─── Configuration ──────────────────────────────────────────────────────────
MODEL_PATH = "."  # Current directory (the model repo)

# RULE 1: System prompt + Alpaca format (### Instruction / ### Response)
SYSTEM_PROMPT = (
    "You are WebICoder, an expert frontend web developer specializing in premium, "
    "Apple-inspired design. You create stunning websites using only HTML, CSS, and "
    "vanilla JavaScript. Your designs feature: minimalist layouts, elegant typography, "
    "smooth animations, glassmorphism effects, generous whitespace, and a refined "
    "color palette. You always produce complete, production-ready code."
)

# RULE 2: Stop sequences — MANDATORY to prevent infinite loops
STOP_SEQUENCES = ["</html>", "### Instruction:", "You are Deepcoder", "You are WebICoder"]

# RULE 4: Low temperature — MANDATORY for coherent HTML
DEFAULT_TEMP = 0.4
DEFAULT_MAX_TOKENS = 4096


# ─── RULE 1: Prompt Formatting (MANDATORY) ──────────────────────────────────

def format_prompt(user_input: str) -> str:
    """
    MANDATORY: Format user input into the model's training prompt format.

    The model was trained with Alpaca-style prompts. Sending raw text
    without this formatting will produce garbage output.
    """
    return f"{SYSTEM_PROMPT}\n\n### Instruction:\n{user_input}\n\n### Response:\n"


# ─── RULE 5: Post-Processing (MANDATORY) ────────────────────────────────────

def clean_html(text: str) -> str:
    """
    MANDATORY: Extract clean HTML from model output.

    The model may leak training artifacts (system prompt, instruction markers).
    This function strips them and returns only valid HTML.
    """
    # Remove system prompt leaks
    for pattern in [
        r"You are (?:Deep|Web[iI])coder.*?production-ready code\.\n*",
        r"### Instruction:.*",
        r"### Response:\s*",
    ]:
        text = re.sub(pattern, "", text, flags=re.DOTALL)

    # Extract complete HTML document
    html_match = re.search(r"(<(?:!DOCTYPE\s+html|html)[\s\S]*?</html>)", text, re.IGNORECASE)
    if html_match:
        return html_match.group(1).strip()

    # Fallback: find any HTML content and wrap it
    html_start = re.search(r"<(?:!DOCTYPE|html|head|body|link)", text, re.IGNORECASE)
    if html_start:
        html = text[html_start.start():].strip()
        if not html.lower().startswith("<!doctype"):
            html = "<!DOCTYPE html>\n<html>\n" + html
            if "</html>" not in html.lower():
                html += "\n</html>"
        return html

    return text.strip()


# ─── Generation ─────────────────────────────────────────────────────────────

def generate_html(prompt: str, temperature: float = DEFAULT_TEMP, max_tokens: int = DEFAULT_MAX_TOKENS) -> str:
    """
    Generate HTML from a natural language prompt.

    Implements all 5 mandatory rules:
    1. Prompt formatting (### Instruction / ### Response)
    2. Stop at </html>
    3. Repetition penalty (1.2, context=256)
    4. Low temperature (0.4)
    5. Post-processing (clean_html)
    """
    print(f"[INFO] Loading model from: {MODEL_PATH}")
    model, tokenizer = load(MODEL_PATH)

    # RULE 1: Format the prompt
    formatted_prompt = format_prompt(prompt)

    # RULE 4: Low temperature sampler
    sampler = make_sampler(temp=temperature)

    # RULE 3: Repetition penalty — MANDATORY
    logits_processors = make_logits_processors(
        repetition_penalty=1.2,
        repetition_context_size=256,
    )

    print(f"[INFO] Generating (temp={temperature}, max_tokens={max_tokens}, rep_penalty=1.2)...")
    print("─" * 60)

    full_text = ""
    last_response = None

    for response in stream_generate(
        model, tokenizer,
        prompt=formatted_prompt,
        max_tokens=max_tokens,
        sampler=sampler,
        logits_processors=logits_processors,  # RULE 3
    ):
        last_response = response
        token_str = response.text
        full_text += token_str
        print(token_str, end="", flush=True)

        # RULE 2: Stop at </html> — MANDATORY
        should_stop = False
        for stop_seq in STOP_SEQUENCES:
            if stop_seq in full_text:
                idx = full_text.find(stop_seq)
                if stop_seq == "</html>":
                    full_text = full_text[:idx + len(stop_seq)]
                else:
                    full_text = full_text[:idx]
                should_stop = True
                break

        if should_stop or response.finish_reason is not None:
            break

    print("\n" + "─" * 60)
    if last_response:
        print(f"[INFO] Generated {last_response.generation_tokens} tokens at {last_response.generation_tps:.1f} tok/s")
        print(f"[INFO] Peak memory: {last_response.peak_memory:.2f} GB")

    # RULE 5: Clean the output — MANDATORY
    return clean_html(full_text)


# ─── Main ────────────────────────────────────────────────────────────────────

def main():
    if len(sys.argv) > 1 and sys.argv[1] != "--interactive":
        # Single prompt mode
        prompt = " ".join(sys.argv[1:])
        html = generate_html(prompt)

        output_file = "output.html"
        with open(output_file, "w") as f:
            f.write(html)
        print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)")

    else:
        # Interactive mode
        print("=" * 60)
        print("  ⚡ WebICoder v3 — Interactive Mode")
        print("  Type a website description, press Enter to generate.")
        print("  Type 'quit' to exit.")
        print("=" * 60)

        while True:
            try:
                prompt = input("\n🌐 Describe your website: ").strip()
                if not prompt or prompt.lower() in ("quit", "exit", "q"):
                    break

                html = generate_html(prompt)

                output_file = "output.html"
                with open(output_file, "w") as f:
                    f.write(html)
                print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)")

            except KeyboardInterrupt:
                print("\n[INFO] Bye!")
                break


if __name__ == "__main__":
    main()