mentorme / utils /scoring.py
Doanh Van Vu
Remove evaluation scripts and sample data for MentorMe recommendation system
5d75507
from typing import List, Dict, Any
import math
import logging
import time
from config.settings import get_settings
logger = logging.getLogger(__name__)
def rerank_mentors(
similar_mentors: List[Dict[str, Any]],
mentee_data: Dict[str, Any],
final_count: int = 8
) -> List[Dict[str, Any]]:
start_time = time.perf_counter()
logger.info(f"[SCORING] Starting final scoring for {len(similar_mentors)} mentors, final_count={final_count}")
settings = get_settings()
scored_mentors = []
scoring_start = time.perf_counter()
for mentor in similar_mentors:
metadata = mentor.get("metadata", {})
reranker_score = mentor.get("reranker_score")
if reranker_score is None:
reranker_score = mentor.get("score", 0.0)
semantic_score = reranker_score * settings.SEMANTIC_WEIGHT
rating = metadata.get("rating", 0.0)
rating_score = _calculate_rating_score(rating)
rule_based_score = rating_score * settings.RULE_BASED_WEIGHT
final_score = semantic_score + rule_based_score
original_score = mentor.get("score", 0.0)
reason = _generate_reason(
reranker_score,
metadata,
mentee_data,
rating_score
)
scored_mentors.append({
"mentor_id": mentor["mentor_id"],
"score": final_score,
"semantic_similarity": original_score,
"reranker_score": reranker_score,
"metadata": metadata,
"reason": reason
})
scoring_time = time.perf_counter() - scoring_start
logger.info(f"[SCORING] Scoring completed in {scoring_time:.3f}s")
sort_start = time.perf_counter()
scored_mentors.sort(key=lambda x: x["score"], reverse=True)
sort_time = time.perf_counter() - sort_start
logger.info(f"[SCORING] Sorting completed in {sort_time:.3f}s")
result = scored_mentors[:final_count]
for idx, mentor in enumerate(result, 1):
metadata = mentor.get("metadata", {})
logger.info(
f"[SCORING] Final rank #{idx}: mentor_id={mentor['mentor_id']}, "
f"rating={metadata.get('rating', 0.0)}, "
f"pinecone_score={mentor.get('semantic_similarity', 0.0):.4f}, "
f"reranker_score={mentor.get('reranker_score', 0.0):.4f}, "
f"final_score={mentor['score']:.4f}, "
f"semantic_weight={settings.SEMANTIC_WEIGHT}, "
f"rule_based_weight={settings.RULE_BASED_WEIGHT}"
)
total_time = time.perf_counter() - start_time
logger.info(f"[SCORING] Final scoring completed in {total_time:.3f}s: {len(result)} mentors returned")
return result
def _calculate_rating_score(rating: float) -> float:
if rating <= 0:
return 0.0
return min(rating / 5.0, 1.0)
def _generate_reason(
reranker_score: float,
metadata: Dict[str, Any],
mentee_data: Dict[str, Any],
rating_score: float
) -> str:
reasons = []
if reranker_score >= 0.8:
reasons.append("Chuyên môn rất phù hợp")
elif reranker_score >= 0.6:
reasons.append("Phù hợp với mục tiêu của bạn")
rating = metadata.get("rating", 0.0)
if rating >= 4.5:
reasons.append("Đánh giá xuất sắc")
elif rating >= 4.0:
reasons.append("Đánh giá cao")
if not reasons:
reasons.append("Phù hợp tổng thể")
return "; ".join(reasons[:3])