import logging from FlagEmbedding import FlagReranker from ..config import get_settings logger = logging.getLogger(__name__) _reranker: FlagReranker | None = None def _get_reranker() -> FlagReranker: global _reranker if _reranker is None: settings = get_settings() logger.info(f"[Reranker] Loading model: {settings.reranker_model}") _reranker = FlagReranker(settings.reranker_model, use_fp16=False) logger.info(f"[Reranker] Model loaded successfully: {settings.reranker_model}") return _reranker def rerank(query: str, passages: list[str]) -> list[float]: if not passages: return [] reranker = _get_reranker() pairs = [[query, p] for p in passages] # batch_size=8 limits peak RAM usage — critical for cloud CPU-only environments scores = reranker.compute_score(pairs, normalize=True, batch_size=8) if isinstance(scores, float): scores = [scores] return [float(s) for s in scores]