chatbot-rag-fi / src /utils /agent_utils.py
ABAO77's picture
Upload 147 files
0df80b4 verified
from __future__ import annotations
import re
from typing import Protocol
DOC_REF_RE = re.compile(r'<doc-ref\s+id="(?P<id>[^"]+)"\s*/>')
QUOTE_RE = re.compile('[\u201c\u201d"](?P<value>[^\u201c\u201d"]+)[\u201c\u201d"]')
NON_ALNUM_RE = re.compile(r"[^0-9A-Za-zÀ-ÖØ-öø-ÿÄÖÅäöå]+")
class DocumentLike(Protocol):
title: str
score: float
vector_score: float
overlap_count: int
is_title_only: bool
def extract_quoted_title(text: str) -> str | None:
matches = [match.group("value").strip() for match in QUOTE_RE.finditer(text)]
if not matches:
return None
return max(matches, key=len)
def normalize_match_text(text: str) -> str:
lowered = NON_ALNUM_RE.sub(" ", text.lower())
return " ".join(lowered.split())
def normalize_text(text: str) -> str:
return " ".join(text.lower().split())
def has_sufficient_context(question: str, documents: list[DocumentLike]) -> bool:
if not documents:
return False
top_hit = documents[0]
quoted_title = extract_quoted_title(question)
quoted_title_matches = quoted_title and normalize_match_text(quoted_title) == normalize_match_text(top_hit.title)
if top_hit.is_title_only:
return bool(quoted_title_matches)
return True
def insufficiency_fallback() -> str:
return "I cannot answer that confidently based on Blink Helsinki's published material alone."
def system_error_fallback() -> str:
return "I could not finish a grounded answer right now. Please try again in a moment."
def input_guardrail_fallback() -> str:
return "I can help with Blink Helsinki, branding, marketing, and related implementation discussions. I cannot help with that request."
def output_guardrail_fallback() -> str:
return "I could not complete a brand-safe answer for that request. Please ask again in a more direct way."
def redact_personal_info(text: str) -> str:
return text