Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import re | |
| import uuid | |
| from src.schemas import MemoryExtractionResult, MemoryFact | |
| PATTERNS: list[tuple[str, str]] = [ | |
| ("profile", r"\bmy name is\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß -]{1,60}"), | |
| ("profile", r"\bI am called\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß -]{1,60}"), | |
| ("profile", r"\bI am\s+[A-ZÄÖÜ][A-Za-zÄÖÜäöüß-]{1,60}\b"), | |
| ("preference", r"\bI (?:like|prefer|love|enjoy|want)\b[^.!?]*"), | |
| ("avoid", r"\bI (?:dislike|hate|avoid|do not want|don't want)\b[^.!?]*"), | |
| ("constraint", r"\b(?:I do not have|I don't have|no local GPU|without a local GPU|my constraint is)\b[^.!?]*"), | |
| ("skill", r"\bI (?:can|know|am comfortable with|work with)\b[^.!?]*"), | |
| ("goal", r"\b(?:my goal is|I want to build|I want the internet|I need to)\b[^.!?]*"), | |
| ("value", r"\b(?:matters to me|I care about|free again|open internet|local-first)\b[^.!?]*"), | |
| ] | |
| def _clean(text: str) -> str: | |
| return re.sub(r"\s+", " ", text).strip(" .") | |
| def _normalize_fact_text(kind: str, text: str) -> str: | |
| if kind == "profile": | |
| name_match = re.search(r"\b(?:my name is|I am called|I am)\s+(.+)$", text, flags=re.IGNORECASE) | |
| if name_match: | |
| return f"User name: {name_match.group(1).strip()}" | |
| return text | |
| def extract_memory_candidates(message: str) -> MemoryExtractionResult: | |
| candidates: list[MemoryFact] = [] | |
| seen: set[str] = set() | |
| bare_name = re.fullmatch(r"\s*([A-ZÄÖÜ][A-Za-zÄÖÜäöüß-]{1,60})\s*", message) | |
| if bare_name and bare_name.group(1).lower() not in {"hello", "hi", "hey", "yes", "no", "ok", "okay"}: | |
| candidates.append( | |
| MemoryFact( | |
| id=f"mem_{uuid.uuid4().hex[:10]}", | |
| kind="profile", | |
| text=f"User name: {bare_name.group(1)}", | |
| evidence=message[:240], | |
| confidence=0.68, | |
| ) | |
| ) | |
| seen.add(candidates[-1].text.lower()) | |
| for kind, pattern in PATTERNS: | |
| for match in re.finditer(pattern, message, flags=re.IGNORECASE): | |
| text = _clean(match.group(0)) | |
| if len(text) < 8: | |
| continue | |
| text = _normalize_fact_text(kind, text) | |
| normalized = text.lower() | |
| if normalized in seen: | |
| continue | |
| seen.add(normalized) | |
| candidates.append( | |
| MemoryFact( | |
| id=f"mem_{uuid.uuid4().hex[:10]}", | |
| kind=kind, | |
| text=text, | |
| evidence=message[:240], | |
| confidence=0.72, | |
| ) | |
| ) | |
| return MemoryExtractionResult(candidates) | |