"""
Prompt Optimizer — reduces token usage in prompts while preserving
semantic content.

V1 strategy:
  - Whitespace normalization
  - Redundant preamble removal
  - Instruction conciseness rewriting
  - Conversation history summarization (stub)
  - System prompt selection

V2: LLMLingua semantic compression (if llmlingua installed)
    Falls back to V1 heuristics if not available.
"""

from __future__ import annotations

import logging
import re
from dataclasses import dataclass
from typing import Optional

logger = logging.getLogger(__name__)


@dataclass
class OptimizedPrompt:
    original_query: str
    optimized_query: str
    system_prompt: str
    original_tokens: int
    optimized_tokens: int
    tokens_saved: int
    compression_ratio: float
    techniques_applied: list[str]

    def to_dict(self) -> dict:
        return self.__dict__.copy()


# ---------------------------------------------------------------------------
# System prompt templates
# ---------------------------------------------------------------------------

_SYSTEM_PROMPTS = {
    "verbose": (
        "You are a helpful, accurate, and thorough assistant. "
        "Provide complete, well-structured answers. "
        "Think step by step when solving complex problems."
    ),
    "concise": (
        "You are a helpful assistant. Be clear and accurate. "
        "Avoid unnecessary verbosity."
    ),
    "minimal": "Answer concisely and accurately.",
}

# Patterns that add no semantic value
_FILLER_PATTERNS = [
    r"(?i)^(please\s+)?can\s+you\s+(please\s+)?",
    r"(?i)^i\s+(would\s+like|want|need)\s+(you\s+to\s+)?",
    r"(?i)^could\s+you\s+(please\s+)?",
    r"(?i)^hey\s+(there\s+)?(claude|assistant|ai|chatgpt)[\s,!]*",
    r"(?i)^(hi|hello|hey)[,!.\s]+",
    r"(?i)\s+(please|thank\s+you|thanks)[.!]?\s*$",
    r"(?i)^(i\s+)?(was\s+)?wondering\s+(if\s+)?(you\s+)?(could|can)\s+",
]

# Verbose instruction phrases → concise alternatives
_INSTRUCTION_REWRITES = [
    (r"(?i)provide\s+a\s+detailed\s+explanation\s+of", "explain"),
    (r"(?i)give\s+me\s+a\s+comprehensive\s+overview\s+of", "overview:"),
    (r"(?i)i\s+need\s+you\s+to\s+write\s+a\s+", "write a "),
    (r"(?i)could\s+you\s+please\s+explain", "explain"),
    (r"(?i)what\s+is\s+the\s+best\s+way\s+to", "best way to"),
    (r"(?i)in\s+simple\s+terms,?\s+", ""),
    (r"(?i)for\s+a\s+(complete\s+)?beginner[\s,]+", ""),
]


class PromptOptimizer:
    """
    Optimizes prompts to minimize token usage.

    V2: Uses LLMLingua for semantic compression when installed.
    Falls back to V1 heuristic compression (filler removal, rewrites) if not.
    """

    def __init__(self):
        self._llmlingua = None
        try:
            from llmlingua import PromptCompressor  # type: ignore
            logger.info("PromptOptimizer: Loading LLMLingua compressor...")
            self._llmlingua = PromptCompressor(
                model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank",
                use_llmlingua2=True,
                device_map="cpu",
            )
            logger.info("PromptOptimizer: LLMLingua ready!")
        except ImportError:
            logger.info("PromptOptimizer: llmlingua not installed. Using V1 heuristic compression.")
        except Exception as e:
            logger.warning(f"PromptOptimizer: Failed to load LLMLingua: {e}. Using V1 heuristics.")

    def optimize(
        self,
        query: str,
        system_prompt_style: str = "concise",
        compression_enabled: bool = True,
        conversation_history: Optional[list[dict]] = None,
    ) -> OptimizedPrompt:
        """
        Returns an OptimizedPrompt with the reduced query and chosen system prompt.
        """
        techniques: list[str] = []
        optimized = query

        if compression_enabled:
            optimized, applied = self._compress(optimized)
            techniques.extend(applied)

        system_prompt = _SYSTEM_PROMPTS.get(system_prompt_style, _SYSTEM_PROMPTS["concise"])

        # Summarize history if provided (stub — V2 will use LLM summarization)
        if conversation_history:
            system_prompt += self._summarize_history(conversation_history)
            techniques.append("history_summarization_stub")

        orig_tokens = self._count_tokens(query)
        opt_tokens = self._count_tokens(optimized)
        saved = max(0, orig_tokens - opt_tokens)

        return OptimizedPrompt(
            original_query=query,
            optimized_query=optimized,
            system_prompt=system_prompt,
            original_tokens=orig_tokens,
            optimized_tokens=opt_tokens,
            tokens_saved=saved,
            compression_ratio=round(saved / max(orig_tokens, 1), 3),
            techniques_applied=techniques,
        )

    # ------------------------------------------------------------------
    # Compression pipeline
    # ------------------------------------------------------------------

    def _compress(self, text: str) -> tuple[str, list[str]]:
        techniques = []

        # --- V2: LLMLingua Semantic Compression ---
        if self._llmlingua and len(text.split()) > 15:
            try:
                result = self._llmlingua.compress_prompt(
                    [text],
                    rate=0.6,           # Keep 60% of tokens
                    force_tokens=["?"],  # Always keep question marks
                )
                compressed = result["compressed_prompt"].strip()
                # Only use if it actually saved tokens and isn't empty
                if compressed and len(compressed.split()) < len(text.split()):
                    techniques.append("llmlingua_semantic_compression")
                    return compressed, techniques
            except Exception as e:
                logger.warning(f"LLMLingua compression failed: {e}. Falling back to V1.")

        # --- V1: Heuristic Compression ---
        original = text

        # 1. Whitespace normalization
        cleaned = re.sub(r"\s+", " ", text).strip()
        if cleaned != text:
            techniques.append("whitespace_normalization")

        # 2. Remove filler preambles
        for pattern in _FILLER_PATTERNS:
            new = re.sub(pattern, "", cleaned).strip()
            if new != cleaned:
                cleaned = new
                if "filler_removal" not in techniques:
                    techniques.append("filler_removal")

        # Capitalize first letter after removal
        if cleaned and cleaned[0].islower():
            cleaned = cleaned[0].upper() + cleaned[1:]

        # 3. Instruction conciseness rewrites
        for pattern, replacement in _INSTRUCTION_REWRITES:
            new = re.sub(pattern, replacement, cleaned)
            if new != cleaned:
                cleaned = new
                if "instruction_rewrite" not in techniques:
                    techniques.append("instruction_rewrite")

        # 4. Deduplicate consecutive whitespace again after rewrites
        cleaned = re.sub(r"\s+", " ", cleaned).strip()

        return cleaned, techniques

    # ------------------------------------------------------------------
    # History summarization (stub)
    # ------------------------------------------------------------------

    def _summarize_history(self, history: list[dict]) -> str:
        """
        V1: truncate to last 3 turns.
        V2: call a cheap LLM to produce a compressed memory string.
        """
        if len(history) <= 3:
            context = "\n".join(
                f"{m['role'].upper()}: {m['content'][:200]}" for m in history
            )
        else:
            recent = history[-3:]
            context = f"[{len(history) - 3} earlier turns omitted]\n" + "\n".join(
                f"{m['role'].upper()}: {m['content'][:200]}" for m in recent
            )
        return f"\n\nConversation context:\n{context}"

    # ------------------------------------------------------------------
    # Token counting
    # ------------------------------------------------------------------

    @staticmethod
    def _count_tokens(text: str) -> int:
        """
        Approximate token count.  Uses tiktoken if available and network
        accessible, otherwise falls back to word-based estimate (÷ 0.75).
        The fallback is accurate to within ~10% for English text.
        """
        try:
            import tiktoken  # type: ignore
            enc = tiktoken.get_encoding("cl100k_base")
            return len(enc.encode(text))
        except Exception:
            # Fallback: GPT tokenizers average ~0.75 words per token
            return int(len(text.split()) / 0.75)