Spaces:

mentorme666
/

mentorme

Sleeping

File size: 3,575 Bytes

from typing import List, Dict, Any
import math
import logging
import time
from config.settings import get_settings

logger = logging.getLogger(__name__)

def rerank_mentors(
    similar_mentors: List[Dict[str, Any]],
    mentee_data: Dict[str, Any],
    final_count: int = 8
) -> List[Dict[str, Any]]:
    start_time = time.perf_counter()
    logger.info(f"[SCORING] Starting final scoring for {len(similar_mentors)} mentors, final_count={final_count}")
    
    settings = get_settings()
    scored_mentors = []
    
    scoring_start = time.perf_counter()
    for mentor in similar_mentors:
        metadata = mentor.get("metadata", {})
        
        reranker_score = mentor.get("reranker_score")
        if reranker_score is None:
            reranker_score = mentor.get("score", 0.0)
        
        semantic_score = reranker_score * settings.SEMANTIC_WEIGHT
        
        rating = metadata.get("rating", 0.0)
        rating_score = _calculate_rating_score(rating)
        
        rule_based_score = rating_score * settings.RULE_BASED_WEIGHT
        
        final_score = semantic_score + rule_based_score
        
        original_score = mentor.get("score", 0.0)
        
        reason = _generate_reason(
            reranker_score,
            metadata,
            mentee_data,
            rating_score
        )
        
        scored_mentors.append({
            "mentor_id": mentor["mentor_id"],
            "score": final_score,
            "semantic_similarity": original_score,
            "reranker_score": reranker_score,
            "metadata": metadata,
            "reason": reason
        })
    
    scoring_time = time.perf_counter() - scoring_start
    logger.info(f"[SCORING] Scoring completed in {scoring_time:.3f}s")
    
    sort_start = time.perf_counter()
    scored_mentors.sort(key=lambda x: x["score"], reverse=True)
    sort_time = time.perf_counter() - sort_start
    logger.info(f"[SCORING] Sorting completed in {sort_time:.3f}s")
    
    result = scored_mentors[:final_count]
    
    for idx, mentor in enumerate(result, 1):
        metadata = mentor.get("metadata", {})
        logger.info(
            f"[SCORING] Final rank #{idx}: mentor_id={mentor['mentor_id']}, "
            f"rating={metadata.get('rating', 0.0)}, "
            f"pinecone_score={mentor.get('semantic_similarity', 0.0):.4f}, "
            f"reranker_score={mentor.get('reranker_score', 0.0):.4f}, "
            f"final_score={mentor['score']:.4f}, "
            f"semantic_weight={settings.SEMANTIC_WEIGHT}, "
            f"rule_based_weight={settings.RULE_BASED_WEIGHT}"
        )
    
    total_time = time.perf_counter() - start_time
    logger.info(f"[SCORING] Final scoring completed in {total_time:.3f}s: {len(result)} mentors returned")
    
    return result

def _calculate_rating_score(rating: float) -> float:
    if rating <= 0:
        return 0.0
    return min(rating / 5.0, 1.0)


def _generate_reason(
    reranker_score: float,
    metadata: Dict[str, Any],
    mentee_data: Dict[str, Any],
    rating_score: float
) -> str:
    reasons = []
    
    if reranker_score >= 0.8:
        reasons.append("Chuyên môn rất phù hợp")
    elif reranker_score >= 0.6:
        reasons.append("Phù hợp với mục tiêu của bạn")
    
    rating = metadata.get("rating", 0.0)
    if rating >= 4.5:
        reasons.append("Đánh giá xuất sắc")
    elif rating >= 4.0:
        reasons.append("Đánh giá cao")
    
    if not reasons:
        reasons.append("Phù hợp tổng thể")
    
    return "; ".join(reasons[:3])