from __future__ import annotations

import re
from typing import Protocol

DOC_REF_RE = re.compile(r'<doc-ref\s+id="(?P<id>[^"]+)"\s*/>')
QUOTE_RE = re.compile('[\u201c\u201d"](?P<value>[^\u201c\u201d"]+)[\u201c\u201d"]')
NON_ALNUM_RE = re.compile(r"[^0-9A-Za-zÀ-ÖØ-öø-ÿÄÖÅäöå]+")


class DocumentLike(Protocol):
    title: str
    score: float
    vector_score: float
    overlap_count: int
    is_title_only: bool


def extract_quoted_title(text: str) -> str | None:
    matches = [match.group("value").strip() for match in QUOTE_RE.finditer(text)]
    if not matches:
        return None
    return max(matches, key=len)


def normalize_match_text(text: str) -> str:
    lowered = NON_ALNUM_RE.sub(" ", text.lower())
    return " ".join(lowered.split())


def normalize_text(text: str) -> str:
    return " ".join(text.lower().split())


def has_sufficient_context(question: str, documents: list[DocumentLike]) -> bool:
    if not documents:
        return False

    top_hit = documents[0]
    quoted_title = extract_quoted_title(question)
    quoted_title_matches = quoted_title and normalize_match_text(quoted_title) == normalize_match_text(top_hit.title)
    if top_hit.is_title_only:
        return bool(quoted_title_matches)
    return True


def insufficiency_fallback() -> str:
    return "I cannot answer that confidently based on Blink Helsinki's published material alone."


def system_error_fallback() -> str:
    return "I could not finish a grounded answer right now. Please try again in a moment."


def input_guardrail_fallback() -> str:
    return "I can help with Blink Helsinki, branding, marketing, and related implementation discussions. I cannot help with that request."


def output_guardrail_fallback() -> str:
    return "I could not complete a brand-safe answer for that request. Please ask again in a more direct way."


def redact_personal_info(text: str) -> str:
    return text