"""
src/models/qa_model.py
Fixes:
  #2 — Sliding-window to overcome 512-token BERT limit
  #4 — Multilingual: auto-detects language, uses xlm-roberta for non-English
"""

import logging
from typing import Optional
from transformers import pipeline

try:
    from langdetect import detect as _detect_lang
    LANGDETECT_AVAILABLE = True
except ImportError:
    LANGDETECT_AVAILABLE = False

logger = logging.getLogger(__name__)

# Model identifiers
EN_MODEL   = "deepset/roberta-base-squad2"             # Upgrade: RoBERTa > BERT on SQuAD2 (~10% better F1)
MULTI_MODEL = "deepset/xlm-roberta-base-squad2"        # 100+ languages  — Fix #4

CONFIDENCE_LABELS = {
    (0.70, 1.01): "High",
    (0.40, 0.70): "Medium",
    (0.00, 0.40): "Low",
}


def _confidence_label(score: float) -> str:
    for (lo, hi), label in CONFIDENCE_LABELS.items():
        if lo <= score < hi:
            return label
    return "Low"


class QAModel:
    """
    Wraps HuggingFace extractive QA with:
    - Automatic language detection → English BERT vs multilingual XLM-RoBERTa
    - Sliding-window chunking to handle contexts longer than 512 tokens
    """

    def __init__(self):
        self._en_pipe    = None   # lazy
        self._multi_pipe = None   # lazy

    # ── Lazy loaders ─────────────────────────────────────────────────────────
    def _get_en_pipe(self):
        if self._en_pipe is None:
            logger.info("Loading English BERT QA model (%s)…", EN_MODEL)
            self._en_pipe = pipeline(
                "question-answering",
                model=EN_MODEL,
                tokenizer=EN_MODEL,
                handle_impossible_answer=True,
            )
        return self._en_pipe

    def _get_multi_pipe(self):
        if self._multi_pipe is None:
            logger.info("Loading multilingual XLM-RoBERTa QA model (%s)…", MULTI_MODEL)
            self._multi_pipe = pipeline(
                "question-answering",
                model=MULTI_MODEL,
                tokenizer=MULTI_MODEL,
                handle_impossible_answer=True,
            )
        return self._multi_pipe

    # ── Language detection ────────────────────────────────────────────────────
    @staticmethod
    def _detect_language(text: str) -> str:
        """Returns ISO 639-1 language code, defaults to 'en' on failure."""
        if not LANGDETECT_AVAILABLE:
            return "en"
        try:
            sample = text[:500]
            return _detect_lang(sample)
        except Exception:
            return "en"

    # ── Sliding-window QA  ────────────────────────────────────────────────────
    def _sliding_window_answer(self, pipe, question: str, context: str,
                                chunk_size: int = 380, overlap: int = 60):
        """
        Fix #2: Splits context into overlapping chunks, runs QA on each,
        returns the span with the highest score.
        chunk_size / overlap are measured in whitespace-split words (fast proxy
        for tokens).  Real tokenisation would be more precise but this is a good
        practical approximation without needing a tokenizer call per chunk.
        """
        words = context.split()
        if len(words) <= chunk_size:
            # Short enough — single pass
            return pipe(question=question, context=context)

        best: Optional[dict] = None
        step = chunk_size - overlap

        for start in range(0, len(words), step):
            chunk = " ".join(words[start: start + chunk_size])
            try:
                result = pipe(question=question, context=chunk)
                # 'no_answer' scores are mapped to score=0 by HF when impossible
                if best is None or result["score"] > best["score"]:
                    best = result
                    best["_chunk_start"] = start
            except Exception as e:
                logger.warning("Chunk [%d:%d] failed: %s", start, start + chunk_size, e)
            if start + chunk_size >= len(words):
                break

        return best or {"answer": "", "score": 0.0, "start": 0, "end": 0}

    # ── Context expansion (Upgrade #5) ───────────────────────────────────────
    @staticmethod
    def _expand_with_context(answer_span: str, full_context: str, max_chars: int = 400) -> str:
        """
        Find the answer span in the context and return the full sentence(s)
        surrounding it.  Makes answers feel descriptive rather than fragmentary.
        """
        if not answer_span or len(answer_span) > 200:
            return answer_span

        idx = full_context.find(answer_span)
        if idx == -1:
            return answer_span

        # Find sentence boundaries (., !, ?) before and after the answer
        start = idx
        for _ in range(max_chars // 2):
            if start <= 0:
                break
            if full_context[start - 1] in ".!?\n" and start < idx:
                break
            start -= 1

        end = idx + len(answer_span)
        for _ in range(max_chars // 2):
            if end >= len(full_context):
                break
            if full_context[end - 1] in ".!?\n" and end > idx + len(answer_span):
                break
            end += 1

        expanded = full_context[start:end].strip(" .,;:\n")
        # Ensure the answer span is included; if expansion drifted, fall back
        if answer_span not in expanded:
            return answer_span
        # Add trailing punctuation
        if expanded and expanded[-1] not in ".!?":
            expanded += "."
        return expanded

    # ── Public interface ──────────────────────────────────────────────────────
    def answer(self, question: str, context: str) -> dict:
        """
        Returns a dict with:
            answer, confidence_score, confidence_label,
            start, end, language, model_used
        """
        if not context.strip():
            return {"answer": "No context provided.", "confidence_score": 0.0,
                    "confidence_label": "Low", "start": 0, "end": 0}

        lang = self._detect_language(context)
        is_english = lang == "en"

        pipe = self._get_en_pipe() if is_english else self._get_multi_pipe()
        model_used = EN_MODEL if is_english else MULTI_MODEL

        raw = self._sliding_window_answer(pipe, question, context)

        score   = float(raw.get("score", 0.0))
        answer  = raw.get("answer", "").strip()

        # HuggingFace returns "" for unanswerable (SQuAD 2.0 style)
        if not answer or answer.lower() in ("", "[cls]"):
            answer = "The answer could not be found in the provided text."
            score  = 0.0
            expanded_answer = answer
        else:
            # Upgrade #5: Expand the answer with surrounding sentence(s)
            expanded_answer = self._expand_with_context(answer, context)

        return {
            "answer":           expanded_answer,
            "answer_span":      answer,         # original verbatim span (for highlighting)
            "confidence_score": round(score, 4),
            "confidence_label": _confidence_label(score),
            "start":            raw.get("start", 0),
            "end":              raw.get("end", 0),
            "language":         lang,
            "model_used":       model_used,
        }