"""Multilingual embeddings via fastembed (ONNX-based, no torch dependency). We use `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`: • 120MB ONNX model — small enough for the free HF Space • 384-dim output, well-supported in fastembed • Covers ~50 languages including French/Spanish/Arabic → close enough to Mauritian Kreol for retrieval to work (Creole shares heavy French-derived vocabulary) • Comparable retrieval quality to e5-small at similar size To see fastembed's full supported-model list: from fastembed import TextEmbedding TextEmbedding.list_supported_models() """ from __future__ import annotations from functools import lru_cache from typing import Iterable import numpy as np DEFAULT_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" EMBED_DIM = 384 @lru_cache(maxsize=2) def _model(model_name: str): from fastembed import TextEmbedding print(f"[knowledge] Loading embedder: {model_name}") return TextEmbedding(model_name=model_name) def embed_texts( texts: Iterable[str], model_name: str = DEFAULT_MODEL ) -> np.ndarray: """Embed a list of strings. Returns (N, EMBED_DIM) float32 normalised.""" texts = list(texts) if not texts: return np.zeros((0, EMBED_DIM), dtype=np.float32) model = _model(model_name) embeddings = list(model.embed(texts)) arr = np.array(embeddings, dtype=np.float32) norms = np.linalg.norm(arr, axis=1, keepdims=True) norms[norms == 0] = 1.0 return arr / norms def embed_passages(texts: Iterable[str]) -> np.ndarray: """Embed text chunks for storage. MiniLM has no required prefix.""" return embed_texts(texts) def embed_query(text: str) -> np.ndarray: """Embed a search query. MiniLM uses the same encoding as passages.""" return embed_texts([text])[0]