"""Pluggable embedding interface. Provides simple char-histogram fallback and an optional sentence-transformers adapter if available. """ from typing import List import math try: from sentence_transformers import SentenceTransformer SBER_AVAILABLE = True except Exception: SBER_AVAILABLE = False class EmbeddingBackend: def embed(self, texts: List[str]) -> List[List[float]]: raise NotImplementedError() class CharHistogramEmbedding(EmbeddingBackend): def __init__(self, dim: int = 32): self.dim = dim def embed(self, texts: List[str]) -> List[List[float]]: def _embed(text: str): vec = [0.0] * self.dim for ch in text[:4096]: vec[ord(ch) % self.dim] += 1.0 norm = math.sqrt(sum(v * v for v in vec)) or 1.0 return [v / norm for v in vec] return [_embed(t) for t in texts] class SBERTEmbedding(EmbeddingBackend): def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"): if not SBER_AVAILABLE: raise RuntimeError("sentence-transformers not installed") self.model = SentenceTransformer(model_name) def embed(self, texts: List[str]) -> List[List[float]]: arr = self.model.encode(texts) return [list(map(float, vec)) for vec in arr] def make_default_backend() -> EmbeddingBackend: if SBER_AVAILABLE: try: return SBERTEmbedding() except Exception: pass return CharHistogramEmbedding()