Axiovora-X / backend /embeddings.py
ZAIDX11's picture
Add files using upload-large-folder tool
b657fcc verified
"""Pluggable embedding interface. Provides simple char-histogram fallback and
an optional sentence-transformers adapter if available.
"""
from typing import List
import math
try:
from sentence_transformers import SentenceTransformer
SBER_AVAILABLE = True
except Exception:
SBER_AVAILABLE = False
class EmbeddingBackend:
def embed(self, texts: List[str]) -> List[List[float]]:
raise NotImplementedError()
class CharHistogramEmbedding(EmbeddingBackend):
def __init__(self, dim: int = 32):
self.dim = dim
def embed(self, texts: List[str]) -> List[List[float]]:
def _embed(text: str):
vec = [0.0] * self.dim
for ch in text[:4096]:
vec[ord(ch) % self.dim] += 1.0
norm = math.sqrt(sum(v * v for v in vec)) or 1.0
return [v / norm for v in vec]
return [_embed(t) for t in texts]
class SBERTEmbedding(EmbeddingBackend):
def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
if not SBER_AVAILABLE:
raise RuntimeError("sentence-transformers not installed")
self.model = SentenceTransformer(model_name)
def embed(self, texts: List[str]) -> List[List[float]]:
arr = self.model.encode(texts)
return [list(map(float, vec)) for vec in arr]
def make_default_backend() -> EmbeddingBackend:
if SBER_AVAILABLE:
try:
return SBERTEmbedding()
except Exception:
pass
return CharHistogramEmbedding()