File size: 781 Bytes
d326834
 
95e3d4a
 
 
e9e68a0
d326834
 
 
95e3d4a
 
 
 
 
 
 
 
 
 
d326834
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from typing import List
from .models import cross_encoder
import logging
logger = logging.getLogger(__name__)
MAX_RERANK_CHARS = 800

def advanced_rerank(question: str, docs: List, top_k: int = 5) -> List:
    if not docs:
        return []
    MAX_DOCS_TO_RERANK = 15
    pruned_docs = docs[:MAX_DOCS_TO_RERANK]
    
    logger.info("Đang rerank %s tài liệu với Cross-Encoder...", len(pruned_docs))
    pairs = [(question, (doc.page_content or "")[:MAX_RERANK_CHARS]) for doc in pruned_docs]
    
    scores = cross_encoder.predict(pairs, show_progress_bar=False)
    ranked = sorted(zip(scores, pruned_docs), key=lambda x: x[0], reverse=True)
    
    logger.info("Top 3 điểm: %s", [f"{s:.3f}" for s, _ in ranked[:3]])
    return [doc for score, doc in ranked[:top_k]]