from __future__ import annotations import re import uuid from src.schemas import MemoryExtractionResult, MemoryFact PATTERNS: list[tuple[str, str]] = [ ("profile", r"\bmy name is\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß -]{1,60}"), ("profile", r"\bI am called\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß -]{1,60}"), ("profile", r"\bI am\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß-]{1,60}\b"), ("preference", r"\bI (?:like|prefer|love|enjoy|want)\b[^.!?]*"), ("avoid", r"\bI (?:dislike|hate|avoid|do not want|don't want)\b[^.!?]*"), ("constraint", r"\b(?:I do not have|I don't have|no local GPU|without a local GPU|my constraint is)\b[^.!?]*"), ("skill", r"\bI (?:can|know|am comfortable with|work with)\b[^.!?]*"), ("goal", r"\b(?:my goal is|I want to build|I want the internet|I need to)\b[^.!?]*"), ("value", r"\b(?:matters to me|I care about|free again|open internet|local-first)\b[^.!?]*"), ] def _clean(text: str) -> str: return re.sub(r"\s+", " ", text).strip(" .") def _normalize_fact_text(kind: str, text: str) -> str: if kind == "profile": name_match = re.search(r"\b(?:my name is|I am called|I am)\s+(.+)$", text, flags=re.IGNORECASE) if name_match: return f"User name: {name_match.group(1).strip()}" return text def extract_memory_candidates(message: str) -> MemoryExtractionResult: candidates: list[MemoryFact] = [] seen: set[str] = set() bare_name = re.fullmatch(r"\s*([A-ZÄÖÜ][A-Za-zÄÖÜäöüß-]{1,60})\s*", message) if bare_name and bare_name.group(1).lower() not in {"hello", "hi", "hey", "yes", "no", "ok", "okay"}: candidates.append( MemoryFact( id=f"mem_{uuid.uuid4().hex[:10]}", kind="profile", text=f"User name: {bare_name.group(1)}", evidence=message[:240], confidence=0.68, ) ) seen.add(candidates[-1].text.lower()) for kind, pattern in PATTERNS: for match in re.finditer(pattern, message, flags=re.IGNORECASE): text = _clean(match.group(0)) if len(text) < 8: continue text = _normalize_fact_text(kind, text) normalized = text.lower() if normalized in seen: continue seen.add(normalized) candidates.append( MemoryFact( id=f"mem_{uuid.uuid4().hex[:10]}", kind=kind, text=text, evidence=message[:240], confidence=0.72, ) ) return MemoryExtractionResult(candidates)