from typing import List from langchain_core.documents import Document from flashrank import Ranker, RerankRequest from app.config import config from app.utils.logger import logger class DocumentReranker: def __init__(self): self.ranker = None self.enabled = config["models"]["reranker"]["enabled"] self.top_k = config["models"]["reranker"]["top_n"] if self.enabled: model_name = config["models"]["reranker"]["model"] self.ranker = Ranker(model_name=model_name) logger.info(f"FlashRank reranker initialized: {model_name}") def rerank(self, query: str, documents: List[Document], top_k: int = None) -> List[Document]: if not self.enabled or not documents: return documents if top_k is None: top_k = self.top_k passages = [ {"id": i, "text": doc.page_content} for i, doc in enumerate(documents) ] rerank_request = RerankRequest(query=query, passages=passages) results = self.ranker.rerank(rerank_request) reranked_docs = [] for result in results[:top_k]: doc_idx = result["id"] doc = documents[doc_idx] doc.metadata["rerank_score"] = result["score"] reranked_docs.append(doc) logger.info(f"Reranked {len(documents)} → {len(reranked_docs)} documents") return reranked_docs document_reranker = DocumentReranker()