Hadith_Search / hf_model.py
NightPrince's picture
Deploy Vanilla HTML Hadith Engine via FastAPI (with LFS)
fde590b
"""
Local SentenceTransformer wrapper - drop-in replacement for HF API.
Uses sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 locally.
Includes in-memory LRU embedding cache (cachetools).
"""
import logging
import threading
import numpy as np
from cachetools import TTLCache
logger = logging.getLogger(__name__)
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
_model = None
# Embedding cache: deterministic
_embedding_cache = TTLCache(maxsize=1024, ttl=float('inf'))
_embedding_lock = threading.Lock()
def _get_model():
global _model
if _model is None:
logger.info(f"Loading local SentenceTransformer model: {EMBEDDING_MODEL_NAME}")
from sentence_transformers import SentenceTransformer
_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
return _model
class HFEmbeddingModel:
"""Model-like object with encode() for use with retrieval.hybrid_search_fixed."""
def encode(self, text, normalize_embeddings=True):
"""Encode text via local SentenceTransformer. Returns 1D numpy array."""
cache_key = f"{text}::{normalize_embeddings}"
with _embedding_lock:
cached = _embedding_cache.get(cache_key)
if cached is not None:
logger.debug(f"Hadith embedding cache HIT: {text[:50]}...")
return cached
model = _get_model()
emb = model.encode(text, convert_to_numpy=True)
if emb.ndim == 2:
emb = emb[0]
if normalize_embeddings:
norm = np.linalg.norm(emb)
if norm > 0:
emb = emb / norm
with _embedding_lock:
_embedding_cache[cache_key] = emb
logger.debug(f"Hadith embedding cache MISS → stored locally: {text[:50]}...")
return emb