from __future__ import annotations import re from typing import Protocol DOC_REF_RE = re.compile(r'[^"]+)"\s*/>') QUOTE_RE = re.compile('[\u201c\u201d"](?P[^\u201c\u201d"]+)[\u201c\u201d"]') NON_ALNUM_RE = re.compile(r"[^0-9A-Za-zÀ-ÖØ-öø-ÿÄÖÅäöå]+") class DocumentLike(Protocol): title: str score: float vector_score: float overlap_count: int is_title_only: bool def extract_quoted_title(text: str) -> str | None: matches = [match.group("value").strip() for match in QUOTE_RE.finditer(text)] if not matches: return None return max(matches, key=len) def normalize_match_text(text: str) -> str: lowered = NON_ALNUM_RE.sub(" ", text.lower()) return " ".join(lowered.split()) def normalize_text(text: str) -> str: return " ".join(text.lower().split()) def has_sufficient_context(question: str, documents: list[DocumentLike]) -> bool: if not documents: return False top_hit = documents[0] quoted_title = extract_quoted_title(question) quoted_title_matches = quoted_title and normalize_match_text(quoted_title) == normalize_match_text(top_hit.title) if top_hit.is_title_only: return bool(quoted_title_matches) return True def insufficiency_fallback() -> str: return "I cannot answer that confidently based on Blink Helsinki's published material alone." def system_error_fallback() -> str: return "I could not finish a grounded answer right now. Please try again in a moment." def input_guardrail_fallback() -> str: return "I can help with Blink Helsinki, branding, marketing, and related implementation discussions. I cannot help with that request." def output_guardrail_fallback() -> str: return "I could not complete a brand-safe answer for that request. Please ask again in a more direct way." def redact_personal_info(text: str) -> str: return text