Spaces:

mentorme666
/

mentorme

Sleeping

mentorme / utils /scoring.py

Doanh Van Vu

Remove evaluation scripts and sample data for MentorMe recommendation system

5d75507 4 days ago

3.58 kB

	from typing import List, Dict, Any
	import math
	import logging
	import time
	from config.settings import get_settings

	logger = logging.getLogger(__name__)

	def rerank_mentors(
	similar_mentors: List[Dict[str, Any]],
	mentee_data: Dict[str, Any],
	final_count: int = 8
	) -> List[Dict[str, Any]]:
	start_time = time.perf_counter()
	logger.info(f"[SCORING] Starting final scoring for {len(similar_mentors)} mentors, final_count={final_count}")

	settings = get_settings()
	scored_mentors = []

	scoring_start = time.perf_counter()
	for mentor in similar_mentors:
	metadata = mentor.get("metadata", {})

	reranker_score = mentor.get("reranker_score")
	if reranker_score is None:
	reranker_score = mentor.get("score", 0.0)

	semantic_score = reranker_score * settings.SEMANTIC_WEIGHT

	rating = metadata.get("rating", 0.0)
	rating_score = _calculate_rating_score(rating)

	rule_based_score = rating_score * settings.RULE_BASED_WEIGHT

	final_score = semantic_score + rule_based_score

	original_score = mentor.get("score", 0.0)

	reason = _generate_reason(
	reranker_score,
	metadata,
	mentee_data,
	rating_score
	)

	scored_mentors.append({
	"mentor_id": mentor["mentor_id"],
	"score": final_score,
	"semantic_similarity": original_score,
	"reranker_score": reranker_score,
	"metadata": metadata,
	"reason": reason
	})

	scoring_time = time.perf_counter() - scoring_start
	logger.info(f"[SCORING] Scoring completed in {scoring_time:.3f}s")

	sort_start = time.perf_counter()
	scored_mentors.sort(key=lambda x: x["score"], reverse=True)
	sort_time = time.perf_counter() - sort_start
	logger.info(f"[SCORING] Sorting completed in {sort_time:.3f}s")

	result = scored_mentors[:final_count]

	for idx, mentor in enumerate(result, 1):
	metadata = mentor.get("metadata", {})
	logger.info(
	f"[SCORING] Final rank #{idx}: mentor_id={mentor['mentor_id']}, "
	f"rating={metadata.get('rating', 0.0)}, "
	f"pinecone_score={mentor.get('semantic_similarity', 0.0):.4f}, "
	f"reranker_score={mentor.get('reranker_score', 0.0):.4f}, "
	f"final_score={mentor['score']:.4f}, "
	f"semantic_weight={settings.SEMANTIC_WEIGHT}, "
	f"rule_based_weight={settings.RULE_BASED_WEIGHT}"
	)

	total_time = time.perf_counter() - start_time
	logger.info(f"[SCORING] Final scoring completed in {total_time:.3f}s: {len(result)} mentors returned")

	return result

	def _calculate_rating_score(rating: float) -> float:
	if rating <= 0:
	return 0.0
	return min(rating / 5.0, 1.0)


	def _generate_reason(
	reranker_score: float,
	metadata: Dict[str, Any],
	mentee_data: Dict[str, Any],
	rating_score: float
	) -> str:
	reasons = []

	if reranker_score >= 0.8:
	reasons.append("Chuyên môn rất phù hợp")
	elif reranker_score >= 0.6:
	reasons.append("Phù hợp với mục tiêu của bạn")

	rating = metadata.get("rating", 0.0)
	if rating >= 4.5:
	reasons.append("Đánh giá xuất sắc")
	elif rating >= 4.0:
	reasons.append("Đánh giá cao")

	if not reasons:
	reasons.append("Phù hợp tổng thể")

	return "; ".join(reasons[:3])