"""
CoPA (Contrastive Paraphrase Attack) — training-free evasion prototype.

Based on Fang et al., EMNLP 2025: "Your Language Model Can Secretly
Write Like Humans: Contrastive Paraphrase Attacks on LLM-Generated
Text Detectors."

Contrastive decoding formula:
    P_final = (1+lambda) * P_human_style - lambda * P_machine_style

Only the inference-time contrastive decoding is implemented here.
No training required — runs on Modal T4 (~$0.60/h).
"""

from __future__ import annotations

import argparse
import json
import os
import time
from dataclasses import dataclass, field
from typing import Any, TYPE_CHECKING

if TYPE_CHECKING:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer


# ---------------------------------------------------------------------------
# Data structures
# ---------------------------------------------------------------------------

@dataclass
class CopaConfig:
    """CoPA decoding configuration — tuned for token dispersion maximization."""

    # Model: Instruct-tuned (best quality) + CoPA dispersion compensates for detection
    model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"

    # Contrastive decoding
    lambda_contrast: float = 0.5       # CoPA original — best quality balance
    alpha_truncation: float = 1e-5      # adaptive truncation threshold

    # Generation
    max_new_tokens: int = 768           # was 256 — prevents cutoff
    temperature: float = 1.0            # sampling temperature
    top_p: float = 0.92                 # nucleus sampling for diversity
    repetition_penalty: float = 1.15    # penalize repeated n-grams
    diversity_bonus_strength: float = 0.5  # penalty for recently used tokens

    # Prompts: simple style transfer (post-processing strips any artifacts)
    human_style_prompt: str = (
        "Rewrite this to sound like a natural human wrote it, "
        "with varied sentences and conversational wording:\n\n{input_text}"
    )
    machine_style_prompt: str = (
        "Repeat the following text exactly, word for word, "
        "maintaining the original formal structure:\n\n{input_text}"
    )

    device: str = "cuda"


@dataclass
class CopaResult:
    """Single CoPA rewriting result."""
    original_text: str
    rewritten_text: str
    tokens_generated: int
    time_seconds: float
    contrast_strength: float


# ---------------------------------------------------------------------------
# Model loading
# ---------------------------------------------------------------------------

def _lazy_import_torch():
    """Lazy import torch — only when actually running inference (Modal GPU)."""
    import torch  # noqa: F811
    from transformers import AutoModelForCausalLM, AutoTokenizer  # noqa: F811
    return torch, AutoModelForCausalLM, AutoTokenizer


def load_model(config: CopaConfig):
    """Load model and tokenizer once for both scoring and generation."""
    torch, AutoModelForCausalLM, AutoTokenizer = _lazy_import_torch()
    tokenizer = AutoTokenizer.from_pretrained(
        config.model_name, trust_remote_code=True
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        config.model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )
    model.eval()
    return model, tokenizer


# ---------------------------------------------------------------------------
# Output cleaning
# ---------------------------------------------------------------------------

def _clean_output(text: str, original: str) -> str:
    """Remove template artifacts, repeated prompts, and truncated sentences.

    Common artifacts from CoPA/LLM generation:
    - Repeated few-shot templates ("Text: ...", "Human version:")
    - Instruction echoes ("### Informal Natural Language Rewritten:")
    - Trailing mid-word cutoffs
    """
    import re

    # Cut at common template repetition patterns
    cut_patterns = [
        r"\n\s*Text:\s",           # Few-shot template repetition
        r"\n\s*Human version:",    # Few-shot output label
        r"\n\s*Formal academic",   # Machine-style prompt leak
        r"\n\s*Formal explanation",
        r"###\s",                  # Markdown headings (meta-artifacts)
        r"\n\s*You are an AI",    # System prompt leak
        r"\n\s*Here is a more",    # Prompt repetition
        r"\n\s*Rewrite the",       # Instruction echo
    ]
    for pattern in cut_patterns:
        m = re.search(pattern, text)
        if m:
            text = text[: m.start()].strip()
            break

    # Remove trailing incomplete sentence (no ending punctuation)
    text = text.rstrip()
    if text and text[-1] not in '.!?":' "'" ')' ']':
        # Find last complete sentence
        last_period = max(text.rfind('.'), text.rfind('!'), text.rfind('?'))
        if last_period > len(text) * 0.6:  # Only if we have enough content
            text = text[: last_period + 1]

    return text.strip()


# ---------------------------------------------------------------------------
# CoPA contrastive decoding
# ---------------------------------------------------------------------------

def copa_rewrite(
    text: str,
    model,
    tokenizer,
    config: CopaConfig,
) -> CopaResult:
    """Rewrite `text` using contrastive decoding.

    Algorithm (from CoPA paper, Algorithm 1):
      1. Build human-style prompt (few-shot) and machine-style prompt (academic).
      2. For each token position t:
         a. Compute P_h = model(x_h + y_<t)
         b. Compute P_m = model(x_m + y_<t)
         c. P_c = softmax((1+lambda)*log P_h - lambda*log P_m)
         d. Apply adaptive truncation + top-p nucleus filtering
         e. Apply diversity bonus (penalize recent tokens)
         f. Sample from truncated P_c with temperature + repetition penalty
    """
    torch, _, _ = _lazy_import_torch()
    start_time = time.time()

    # Format prompts with the actual input text
    human_prompt = config.human_style_prompt.replace("{input_text}", text)
    machine_prompt = config.machine_style_prompt.replace("{input_text}", text)

    h_inputs = tokenizer(human_prompt, return_tensors="pt").to(model.device)
    m_inputs = tokenizer(machine_prompt, return_tensors="pt").to(model.device)

    generated_ids: list[int] = []
    lambda_ = config.lambda_contrast
    alpha = config.alpha_truncation
    temp = config.temperature
    top_p = config.top_p
    rep_penalty = config.repetition_penalty
    div_strength = config.diversity_bonus_strength

    for step in range(config.max_new_tokens):
        # --- Human-style logits ---
        h_out = model(**h_inputs)
        h_logits = h_out.logits[0, -1, :] / temp   # (vocab_size,)

        # --- Machine-style logits ---
        m_out = model(**m_inputs)
        m_logits = m_out.logits[0, -1, :] / temp

        # --- Contrastive combination ---
        h_log_probs = torch.log_softmax(h_logits, dim=-1)
        m_log_probs = torch.log_softmax(m_logits, dim=-1)
        contrastive_logits = (1 + lambda_) * h_log_probs - lambda_ * m_log_probs

        # --- Repetition penalty ---
        if rep_penalty != 1.0 and generated_ids:
            for gid in set(generated_ids):
                if contrastive_logits[gid] > 0:
                    contrastive_logits[gid] /= rep_penalty
                else:
                    contrastive_logits[gid] *= rep_penalty

        # --- Adaptive truncation (keep tokens with P_h >= alpha * max(P_h)) ---
        h_probs = torch.softmax(h_logits, dim=-1)
        max_prob = h_probs.max()
        mask = h_probs >= alpha * max_prob
        contrastive_logits[~mask] = float("-inf")

        # --- Top-p (nucleus) filtering ---
        if top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(contrastive_logits, descending=True)
            cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
            sorted_indices_to_remove = cumulative_probs > top_p
            sorted_indices_to_remove[0] = False  # keep at least one token
            indices_to_remove = sorted_indices[sorted_indices_to_remove]
            contrastive_logits[indices_to_remove] = float("-inf")

        # --- Diversity bonus: penalize tokens used in last 20 positions ---
        if div_strength > 0 and len(generated_ids) >= 3:
            recent_window = generated_ids[-20:]
            for gid in set(recent_window):
                contrastive_logits[gid] -= div_strength * recent_window.count(gid)

        # --- Sample ---
        probs = torch.softmax(contrastive_logits, dim=-1)
        next_token_id = torch.multinomial(probs, num_samples=1).item()
        generated_ids.append(next_token_id)

        # --- Append to both contexts ---
        h_inputs["input_ids"] = torch.cat(
            [h_inputs["input_ids"], torch.tensor([[next_token_id]], device=model.device)], dim=1
        )
        h_inputs["attention_mask"] = torch.ones_like(h_inputs["input_ids"])
        m_inputs["input_ids"] = torch.cat(
            [m_inputs["input_ids"], torch.tensor([[next_token_id]], device=model.device)], dim=1
        )
        m_inputs["attention_mask"] = torch.ones_like(m_inputs["input_ids"])

        # --- Stop conditions ---
        if next_token_id == tokenizer.eos_token_id:
            break

    rewritten = tokenizer.decode(
        generated_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True,
    )

    # Post-processing: strip repeated template artifacts
    rewritten = _clean_output(rewritten, text)

    elapsed = time.time() - start_time
    return CopaResult(
        original_text=text,
        rewritten_text=rewritten,
        tokens_generated=len(generated_ids),
        time_seconds=elapsed,
        contrast_strength=lambda_,
    )


# ---------------------------------------------------------------------------
# Batch processing
# ---------------------------------------------------------------------------

@dataclass
class CopaBatchResult:
    results: list[CopaResult] = field(default_factory=list)
    total_time: float = 0.0
    total_tokens: int = 0
    avg_tokens_per_second: float = 0.0


def run_copa_batch(
    texts: list[str],
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    config: CopaConfig,
) -> CopaBatchResult:
    """Run CoPA rewriting on a batch of texts."""
    batch = CopaBatchResult()
    start = time.time()

    for i, text in enumerate(texts):
        print(f"[CoPA] {i+1}/{len(texts)}: rewriting {len(text.split())} words...")
        try:
            result = copa_rewrite(text, model, tokenizer, config)
            batch.results.append(result)
            batch.total_tokens += result.tokens_generated
        except Exception as e:
            print(f"[CoPA] ERROR on sample {i}: {e}")
            batch.results.append(CopaResult(
                original_text=text,
                rewritten_text=text,  # fallback to original
                tokens_generated=0,
                time_seconds=0,
                contrast_strength=config.lambda_contrast,
            ))

    batch.total_time = time.time() - start
    if batch.total_time > 0:
        batch.avg_tokens_per_second = batch.total_tokens / batch.total_time
    return batch


# ---------------------------------------------------------------------------
# Test data generation
# ---------------------------------------------------------------------------

def generate_test_texts(n: int = 50) -> list[str]:
    """Generate synthetic AI-like texts for testing.

    In production, replace with real AI-generated texts from HC3 or similar.
    """
    templates = [
        "Artificial intelligence has revolutionized the field of natural language processing in recent years. The development of large language models has enabled unprecedented capabilities in text generation, translation, and summarization tasks.",
        "Climate change represents one of the most significant challenges facing humanity in the twenty-first century. Rising global temperatures have led to increasingly severe weather events, sea level rise, and disruptions to ecosystems worldwide.",
        "The history of computer science can be traced back to the early twentieth century, with the foundational work of Alan Turing and others. Their theoretical contributions laid the groundwork for the digital revolution that followed.",
        "Machine learning algorithms have demonstrated remarkable success across a wide range of applications, from image recognition to natural language understanding. These systems learn patterns from large datasets.",
        "The Renaissance period marked a profound transformation in European art, science, and philosophy. This cultural movement began in Italy during the fourteenth century and spread throughout the continent.",
    ]
    # Repeat/cycle to reach n
    result = []
    for i in range(n):
        result.append(templates[i % len(templates)])
    return result


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="CoPA: Contrastive Paraphrase Attack")
    parser.add_argument("--model", default="Qwen/Qwen2.5-1.5B-Instruct")
    parser.add_argument("--lambda", type=float, default=0.5, dest="lambda_contrast")
    parser.add_argument("--alpha", type=float, default=1e-5, dest="alpha_truncation")
    parser.add_argument("--max-tokens", type=int, default=256)
    parser.add_argument("--temperature", type=float, default=1.0)
    parser.add_argument("--num-samples", type=int, default=50)
    parser.add_argument("--output", default="output/copa_results.json")
    parser.add_argument("--device", default="cuda")
    args = parser.parse_args()

    config = CopaConfig(
        model_name=args.model,
        lambda_contrast=args.lambda_contrast,
        alpha_truncation=args.alpha_truncation,
        max_new_tokens=args.max_tokens,
        temperature=args.temperature,
        device=args.device,
    )

    print(f"[CoPA] Loading model: {config.model_name}")
    model, tokenizer = load_model(config)

    print(f"[CoPA] Generating {args.num_samples} test texts...")
    test_texts = generate_test_texts(args.num_samples)

    print(f"[CoPA] Running contrastive rewriting...")
    batch_result = run_copa_batch(test_texts, model, tokenizer, config)

    # Save results
    os.makedirs(os.path.dirname(args.output), exist_ok=True)
    output_data = {
        "config": {
            "model": config.model_name,
            "lambda": config.lambda_contrast,
            "alpha": config.alpha_truncation,
        },
        "summary": {
            "num_samples": len(batch_result.results),
            "total_time_s": batch_result.total_time,
            "total_tokens": batch_result.total_tokens,
            "avg_tokens_per_second": batch_result.avg_tokens_per_second,
        },
        "results": [
            {
                "original": r.original_text,
                "rewritten": r.rewritten_text,
                "tokens": r.tokens_generated,
                "time_s": r.time_seconds,
            }
            for r in batch_result.results
        ],
    }

    with open(args.output, "w", encoding="utf-8") as f:
        json.dump(output_data, f, indent=2, ensure_ascii=False)

    print(f"[CoPA] Done. {len(batch_result.results)} samples in {batch_result.total_time:.1f}s")
    print(f"[CoPA] Saved to {args.output}")


if __name__ == "__main__":
    main()