Spaces:
Sleeping
Sleeping
| from typing import List | |
| from .models import cross_encoder | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| MAX_RERANK_CHARS = 800 | |
| def advanced_rerank(question: str, docs: List, top_k: int = 5) -> List: | |
| if not docs: | |
| return [] | |
| MAX_DOCS_TO_RERANK = 15 | |
| pruned_docs = docs[:MAX_DOCS_TO_RERANK] | |
| logger.info("Đang rerank %s tài liệu với Cross-Encoder...", len(pruned_docs)) | |
| pairs = [(question, (doc.page_content or "")[:MAX_RERANK_CHARS]) for doc in pruned_docs] | |
| scores = cross_encoder.predict(pairs, show_progress_bar=False) | |
| ranked = sorted(zip(scores, pruned_docs), key=lambda x: x[0], reverse=True) | |
| logger.info("Top 3 điểm: %s", [f"{s:.3f}" for s, _ in ranked[:3]]) | |
| return [doc for score, doc in ranked[:top_k]] | |