"""Free-text vibe -> category affinity via small-model sentence embeddings. This is where the nuance lives (spec §9.1): a small CPU text encoder maps the user's vibe to affinities over the *finite* OSM category vocabulary by cosine similarity to each category's human-readable gloss. The output is interpretable weights — the scoring path downstream stays a transparent weighted sum. Backends (same bge-small model either way): 1. **fastembed / ONNXRuntime** (preferred) — no torch import. Torch is ~1 GB of shared libraries; loading it lazily mid-request froze the app for minutes on a memory-pressured laptop and would slow Space cold-starts too. 2. **sentence-transformers** (fallback) — used only if fastembed is absent. Everything loads lazily and is cached; gloss embeddings are computed once. """ from __future__ import annotations import functools import numpy as np from discoverroute import config from discoverroute.data import taxonomy @functools.lru_cache(maxsize=1) def _encoder(): """Return (name, encode_fn) where encode_fn(list[str]) -> normalized ndarray.""" try: from fastembed import TextEmbedding model = TextEmbedding(model_name=config.EMBED_MODEL) def encode(texts: list[str]) -> np.ndarray: vecs = np.stack(list(model.embed(texts))) return vecs / np.linalg.norm(vecs, axis=1, keepdims=True) return "fastembed", encode except Exception: # noqa: BLE001 - fall back to the torch stack from sentence_transformers import SentenceTransformer model = SentenceTransformer(config.EMBED_MODEL) def encode(texts: list[str]) -> np.ndarray: return model.encode(texts, normalize_embeddings=True) return "sentence-transformers", encode @functools.lru_cache(maxsize=1) def _gloss_matrix(): """(categories, normalized gloss embedding matrix) computed once.""" cats = list(taxonomy.CATEGORY_GLOSS.keys()) glosses = [taxonomy.CATEGORY_GLOSS[c] for c in cats] _, encode = _encoder() return cats, encode(glosses) @functools.lru_cache(maxsize=256) def vibe_to_affinity(vibe: str) -> dict[str, float]: """Map a free-text vibe to a {category: affinity in [floor, 1]} dict. Cosine similarities are min-max rescaled across categories so the best match is 1.0 and the weakest is the configured floor — guaranteeing measurable contrast between different vibes while keeping a little exploration room. Cached per vibe text (repeated demo prompts don't re-encode). """ vibe = (vibe or "").strip() cats, gloss_emb = _gloss_matrix() if not vibe: return {c: 1.0 for c in cats} # neutral: equal interest _, encode = _encoder() q = encode([config.EMBED_QUERY_INSTRUCTION + vibe])[0] sims = gloss_emb @ q # cosine (both normalized) lo, hi = float(sims.min()), float(sims.max()) span = hi - lo # If the vibe is off-domain (e.g. "I'm hungry on a Tuesday"), the similarities # are nearly flat across categories. Don't manufacture confident preferences # from noise — treat it as neutral (equal interest) instead. if span < config.MIN_AFFINITY_SPAN: return {c: 1.0 for c in cats} floor = config.AFFINITY_FLOOR aff = {c: floor + (1.0 - floor) * (float(s) - lo) / span for c, s in zip(cats, sims)} # Keep only the top-N categories; zero the long tail so off-vibe categories # can't backfill route slots once on-vibe candidates run out. keep = set(sorted(aff, key=aff.get, reverse=True)[: config.TOP_AFFINITY_CATEGORIES]) return {c: (v if c in keep else 0.0) for c, v in aff.items()} @functools.lru_cache(maxsize=256) def raw_top_similarity(vibe: str) -> float: """Best raw cosine of the vibe to any category gloss — a match-confidence signal (independent of the min-max rescale, which always forces a 1.0).""" vibe = (vibe or "").strip() if not vibe: return 0.0 _, gloss_emb = _gloss_matrix() _, encode = _encoder() q = encode([config.EMBED_QUERY_INSTRUCTION + vibe])[0] return float((gloss_emb @ q).max())