Spaces:
Sleeping
Sleeping
| from typing import List | |
| from langchain_core.documents import Document | |
| from flashrank import Ranker, RerankRequest | |
| from app.config import config | |
| from app.utils.logger import logger | |
| class DocumentReranker: | |
| def __init__(self): | |
| self.ranker = None | |
| self.enabled = config["models"]["reranker"]["enabled"] | |
| self.top_k = config["models"]["reranker"]["top_n"] | |
| if self.enabled: | |
| model_name = config["models"]["reranker"]["model"] | |
| self.ranker = Ranker(model_name=model_name) | |
| logger.info(f"FlashRank reranker initialized: {model_name}") | |
| def rerank(self, query: str, documents: List[Document], top_k: int = None) -> List[Document]: | |
| if not self.enabled or not documents: | |
| return documents | |
| if top_k is None: | |
| top_k = self.top_k | |
| passages = [ | |
| {"id": i, "text": doc.page_content} | |
| for i, doc in enumerate(documents) | |
| ] | |
| rerank_request = RerankRequest(query=query, passages=passages) | |
| results = self.ranker.rerank(rerank_request) | |
| reranked_docs = [] | |
| for result in results[:top_k]: | |
| doc_idx = result["id"] | |
| doc = documents[doc_idx] | |
| doc.metadata["rerank_score"] = result["score"] | |
| reranked_docs.append(doc) | |
| logger.info(f"Reranked {len(documents)} → {len(reranked_docs)} documents") | |
| return reranked_docs | |
| document_reranker = DocumentReranker() | |