Spaces:
Sleeping
Sleeping
File size: 3,575 Bytes
1904012 6a14fa9 1904012 6a14fa9 1904012 6a14fa9 1904012 6a14fa9 1904012 3c1cd35 1904012 69934b0 1904012 69934b0 1904012 6a14fa9 1904012 6a14fa9 3c1cd35 6a14fa9 1904012 6a14fa9 1904012 69934b0 1904012 5d75507 1904012 5d75507 1904012 5d75507 1904012 5d75507 1904012 5d75507 1904012 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from typing import List, Dict, Any
import math
import logging
import time
from config.settings import get_settings
logger = logging.getLogger(__name__)
def rerank_mentors(
similar_mentors: List[Dict[str, Any]],
mentee_data: Dict[str, Any],
final_count: int = 8
) -> List[Dict[str, Any]]:
start_time = time.perf_counter()
logger.info(f"[SCORING] Starting final scoring for {len(similar_mentors)} mentors, final_count={final_count}")
settings = get_settings()
scored_mentors = []
scoring_start = time.perf_counter()
for mentor in similar_mentors:
metadata = mentor.get("metadata", {})
reranker_score = mentor.get("reranker_score")
if reranker_score is None:
reranker_score = mentor.get("score", 0.0)
semantic_score = reranker_score * settings.SEMANTIC_WEIGHT
rating = metadata.get("rating", 0.0)
rating_score = _calculate_rating_score(rating)
rule_based_score = rating_score * settings.RULE_BASED_WEIGHT
final_score = semantic_score + rule_based_score
original_score = mentor.get("score", 0.0)
reason = _generate_reason(
reranker_score,
metadata,
mentee_data,
rating_score
)
scored_mentors.append({
"mentor_id": mentor["mentor_id"],
"score": final_score,
"semantic_similarity": original_score,
"reranker_score": reranker_score,
"metadata": metadata,
"reason": reason
})
scoring_time = time.perf_counter() - scoring_start
logger.info(f"[SCORING] Scoring completed in {scoring_time:.3f}s")
sort_start = time.perf_counter()
scored_mentors.sort(key=lambda x: x["score"], reverse=True)
sort_time = time.perf_counter() - sort_start
logger.info(f"[SCORING] Sorting completed in {sort_time:.3f}s")
result = scored_mentors[:final_count]
for idx, mentor in enumerate(result, 1):
metadata = mentor.get("metadata", {})
logger.info(
f"[SCORING] Final rank #{idx}: mentor_id={mentor['mentor_id']}, "
f"rating={metadata.get('rating', 0.0)}, "
f"pinecone_score={mentor.get('semantic_similarity', 0.0):.4f}, "
f"reranker_score={mentor.get('reranker_score', 0.0):.4f}, "
f"final_score={mentor['score']:.4f}, "
f"semantic_weight={settings.SEMANTIC_WEIGHT}, "
f"rule_based_weight={settings.RULE_BASED_WEIGHT}"
)
total_time = time.perf_counter() - start_time
logger.info(f"[SCORING] Final scoring completed in {total_time:.3f}s: {len(result)} mentors returned")
return result
def _calculate_rating_score(rating: float) -> float:
if rating <= 0:
return 0.0
return min(rating / 5.0, 1.0)
def _generate_reason(
reranker_score: float,
metadata: Dict[str, Any],
mentee_data: Dict[str, Any],
rating_score: float
) -> str:
reasons = []
if reranker_score >= 0.8:
reasons.append("Chuyên môn rất phù hợp")
elif reranker_score >= 0.6:
reasons.append("Phù hợp với mục tiêu của bạn")
rating = metadata.get("rating", 0.0)
if rating >= 4.5:
reasons.append("Đánh giá xuất sắc")
elif rating >= 4.0:
reasons.append("Đánh giá cao")
if not reasons:
reasons.append("Phù hợp tổng thể")
return "; ".join(reasons[:3])
|