Doanh Van Vu commited on
Commit
6a14fa9
·
1 Parent(s): 1904012

Add test for recommend mentors endpoint and enhance logging in services

Browse files

- Introduced a new test script `test_api.py` to validate the recommend mentors API endpoint.
- Enhanced logging in `embedding_service.py`, `pinecone_service.py`, `recommendation_service.py`, `reranker_service.py`, and `scoring.py` to include timing information for various operations, improving traceability and performance monitoring.
- Improved error handling and logging messages for better clarity and debugging support.

services/embedding_service.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  from sentence_transformers import SentenceTransformer
3
  import logging
 
4
  from typing import List, Union
5
  from config.settings import get_settings
6
 
@@ -22,9 +23,10 @@ class EmbeddingService:
22
  def _load_model(self):
23
  settings = get_settings()
24
  try:
25
- logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL_NAME}")
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
- logger.info(f"Using device: {device}")
28
 
29
  EmbeddingService._model = SentenceTransformer(
30
  settings.EMBEDDING_MODEL_NAME,
@@ -33,9 +35,10 @@ class EmbeddingService:
33
 
34
  EmbeddingService._model.max_seq_length = 2048
35
 
36
- logger.info("Embedding model loaded successfully")
 
37
  except Exception as e:
38
- logger.error(f"Failed to load embedding model: {str(e)}")
39
  raise
40
 
41
  def encode(
@@ -58,6 +61,7 @@ class EmbeddingService:
58
  raise ValueError("Texts cannot be empty")
59
 
60
  try:
 
61
  embeddings = EmbeddingService._model.encode(
62
  texts,
63
  batch_size=batch_size,
@@ -65,24 +69,26 @@ class EmbeddingService:
65
  convert_to_numpy=True,
66
  normalize_embeddings=False
67
  )
 
 
68
 
69
  expected_dim = 1024
70
  if single_text:
71
  embedding_list = embeddings[0].tolist()
72
  if len(embedding_list) != expected_dim:
73
- logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(embedding_list)}")
74
  return embedding_list
75
 
76
  result = []
77
  for emb in embeddings:
78
  emb_list = emb.tolist()
79
  if len(emb_list) != expected_dim:
80
- logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(emb_list)}")
81
  result.append(emb_list)
82
 
83
  return result
84
  except Exception as e:
85
- logger.error(f"Error encoding texts: {str(e)}")
86
  raise
87
 
88
  def get_model_info(self) -> dict:
 
1
  import torch
2
  from sentence_transformers import SentenceTransformer
3
  import logging
4
+ import time
5
  from typing import List, Union
6
  from config.settings import get_settings
7
 
 
23
  def _load_model(self):
24
  settings = get_settings()
25
  try:
26
+ start_time = time.perf_counter()
27
+ logger.info(f"[EMBEDDING] Starting to load embedding model: {settings.EMBEDDING_MODEL_NAME}")
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ logger.info(f"[EMBEDDING] Using device: {device}")
30
 
31
  EmbeddingService._model = SentenceTransformer(
32
  settings.EMBEDDING_MODEL_NAME,
 
35
 
36
  EmbeddingService._model.max_seq_length = 2048
37
 
38
+ load_time = time.perf_counter() - start_time
39
+ logger.info(f"[EMBEDDING] Embedding model loaded successfully in {load_time:.3f}s")
40
  except Exception as e:
41
+ logger.error(f"[EMBEDDING] Failed to load embedding model: {str(e)}", exc_info=True)
42
  raise
43
 
44
  def encode(
 
61
  raise ValueError("Texts cannot be empty")
62
 
63
  try:
64
+ encode_start = time.perf_counter()
65
  embeddings = EmbeddingService._model.encode(
66
  texts,
67
  batch_size=batch_size,
 
69
  convert_to_numpy=True,
70
  normalize_embeddings=False
71
  )
72
+ encode_time = time.perf_counter() - encode_start
73
+ logger.info(f"[EMBEDDING] Encoded {len(texts)} text(s) in {encode_time:.3f}s (is_query={is_query})")
74
 
75
  expected_dim = 1024
76
  if single_text:
77
  embedding_list = embeddings[0].tolist()
78
  if len(embedding_list) != expected_dim:
79
+ logger.warning(f"[EMBEDDING] Embedding dimension mismatch: expected {expected_dim}, got {len(embedding_list)}")
80
  return embedding_list
81
 
82
  result = []
83
  for emb in embeddings:
84
  emb_list = emb.tolist()
85
  if len(emb_list) != expected_dim:
86
+ logger.warning(f"[EMBEDDING] Embedding dimension mismatch: expected {expected_dim}, got {len(emb_list)}")
87
  result.append(emb_list)
88
 
89
  return result
90
  except Exception as e:
91
+ logger.error(f"[EMBEDDING] Error encoding texts: {str(e)}", exc_info=True)
92
  raise
93
 
94
  def get_model_info(self) -> dict:
services/pinecone_service.py CHANGED
@@ -1,6 +1,7 @@
1
  from pinecone import Pinecone, ServerlessSpec
2
  from typing import List, Dict, Optional, Any
3
  import logging
 
4
  from config.settings import get_settings
5
 
6
  logger = logging.getLogger(__name__)
@@ -99,14 +100,16 @@ class PineconeService:
99
  include_metadata: bool = True
100
  ) -> List[Dict[str, Any]]:
101
  try:
 
102
  settings = get_settings()
103
  expected_dim = settings.PINECONE_DIMENSION
104
 
105
  if len(query_vector) != expected_dim:
106
  error_msg = f"Query vector dimension mismatch: expected {expected_dim}, got {len(query_vector)}"
107
- logger.error(error_msg)
108
  raise ValueError(error_msg)
109
 
 
110
  query_response = PineconeService._index.query(
111
  vector=query_vector,
112
  top_k=top_k,
@@ -114,6 +117,8 @@ class PineconeService:
114
  include_metadata=include_metadata
115
  )
116
 
 
 
117
  results = []
118
  for match in query_response.matches:
119
  results.append({
@@ -122,9 +127,10 @@ class PineconeService:
122
  "metadata": match.metadata if include_metadata else None
123
  })
124
 
 
125
  return results
126
  except Exception as e:
127
- logger.error(f"Failed to query similar mentors: {str(e)}")
128
  raise
129
 
130
  def delete_mentor(self, mentor_id: str) -> bool:
 
1
  from pinecone import Pinecone, ServerlessSpec
2
  from typing import List, Dict, Optional, Any
3
  import logging
4
+ import time
5
  from config.settings import get_settings
6
 
7
  logger = logging.getLogger(__name__)
 
100
  include_metadata: bool = True
101
  ) -> List[Dict[str, Any]]:
102
  try:
103
+ start_time = time.perf_counter()
104
  settings = get_settings()
105
  expected_dim = settings.PINECONE_DIMENSION
106
 
107
  if len(query_vector) != expected_dim:
108
  error_msg = f"Query vector dimension mismatch: expected {expected_dim}, got {len(query_vector)}"
109
+ logger.error(f"[PINECONE] {error_msg}")
110
  raise ValueError(error_msg)
111
 
112
+ logger.info(f"[PINECONE] Querying similar mentors: top_k={top_k}, filter={filter}")
113
  query_response = PineconeService._index.query(
114
  vector=query_vector,
115
  top_k=top_k,
 
117
  include_metadata=include_metadata
118
  )
119
 
120
+ query_time = time.perf_counter() - start_time
121
+
122
  results = []
123
  for match in query_response.matches:
124
  results.append({
 
127
  "metadata": match.metadata if include_metadata else None
128
  })
129
 
130
+ logger.info(f"[PINECONE] Query completed in {query_time:.3f}s: found {len(results)} results")
131
  return results
132
  except Exception as e:
133
+ logger.error(f"[PINECONE] Failed to query similar mentors: {str(e)}", exc_info=True)
134
  raise
135
 
136
  def delete_mentor(self, mentor_id: str) -> bool:
services/recommendation_service.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import List, Dict, Any, Optional
2
  import logging
 
3
  from services.embedding_service import EmbeddingService
4
  from services.pinecone_service import PineconeService
5
  from services.reranker_service import RerankerService
@@ -71,31 +72,53 @@ class RecommendationService:
71
  top_k: Optional[int] = None,
72
  final_count: Optional[int] = None
73
  ) -> List[Dict[str, Any]]:
 
 
 
74
  try:
75
  top_k = top_k or self.settings.RECOMMENDATION_TOP_K
76
  final_count = final_count or self.settings.RECOMMENDATION_FINAL_COUNT
 
77
 
 
78
  query_text = build_mentee_query_text(mentee_data)
 
 
 
 
79
  query_embedding = self.embedding_service.encode(query_text, is_query=True)
 
 
80
 
 
81
  filter_dict = self._build_filter(mentee_data)
 
 
82
 
 
83
  similar_mentors = self.pinecone_service.query_similar(
84
  query_vector=query_embedding,
85
  top_k=top_k,
86
  filter=filter_dict if filter_dict else None,
87
  include_metadata=True
88
  )
 
 
89
 
90
  if not similar_mentors:
 
91
  return []
92
 
 
93
  for mentor in similar_mentors:
94
  metadata = mentor.get("metadata", {})
95
  mentor["mentor_text"] = metadata.get("mentor_text", "")
96
  if not mentor["mentor_text"]:
97
- logger.warning(f"Mentor {mentor.get('mentor_id', 'unknown')} missing mentor_text in metadata")
 
 
98
 
 
99
  reranker = RerankerService()
100
  rerank_k = self.settings.RECOMMENDATION_RERANK_K
101
 
@@ -105,21 +128,34 @@ class RecommendationService:
105
  candidates=similar_mentors,
106
  top_k=rerank_k
107
  )
 
 
108
  except Exception as e:
109
- logger.error(f"Reranker failed: {e}. Falling back to cosine similarity.", exc_info=True)
 
110
  for mentor in similar_mentors:
111
  mentor["reranker_score"] = mentor.get("score", 0.0)
112
  reranked_mentors = sorted(similar_mentors, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:rerank_k]
113
 
 
114
  reranked = rerank_mentors(
115
  reranked_mentors,
116
  mentee_data,
117
  final_count=final_count
118
  )
 
 
 
 
 
 
 
 
119
 
120
  return reranked
121
  except Exception as e:
122
- logger.error(f"Failed to recommend mentors: {str(e)}")
 
123
  raise
124
 
125
  def _build_filter(self, mentee_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
 
1
  from typing import List, Dict, Any, Optional
2
  import logging
3
+ import time
4
  from services.embedding_service import EmbeddingService
5
  from services.pinecone_service import PineconeService
6
  from services.reranker_service import RerankerService
 
72
  top_k: Optional[int] = None,
73
  final_count: Optional[int] = None
74
  ) -> List[Dict[str, Any]]:
75
+ pipeline_start = time.perf_counter()
76
+ logger.info("[RECOMMEND] Starting recommendation pipeline")
77
+
78
  try:
79
  top_k = top_k or self.settings.RECOMMENDATION_TOP_K
80
  final_count = final_count or self.settings.RECOMMENDATION_FINAL_COUNT
81
+ logger.info(f"[RECOMMEND] Configuration: top_k={top_k}, rerank_k={self.settings.RECOMMENDATION_RERANK_K}, final_count={final_count}")
82
 
83
+ query_build_start = time.perf_counter()
84
  query_text = build_mentee_query_text(mentee_data)
85
+ query_build_time = time.perf_counter() - query_build_start
86
+ logger.info(f"[RECOMMEND] Query text built in {query_build_time:.3f}s: {query_text[:100]}...")
87
+
88
+ embedding_start = time.perf_counter()
89
  query_embedding = self.embedding_service.encode(query_text, is_query=True)
90
+ embedding_time = time.perf_counter() - embedding_start
91
+ logger.info(f"[RECOMMEND] Query embedding generated in {embedding_time:.3f}s")
92
 
93
+ filter_start = time.perf_counter()
94
  filter_dict = self._build_filter(mentee_data)
95
+ filter_time = time.perf_counter() - filter_start
96
+ logger.info(f"[RECOMMEND] Filter built in {filter_time:.3f}s: {filter_dict}")
97
 
98
+ pinecone_start = time.perf_counter()
99
  similar_mentors = self.pinecone_service.query_similar(
100
  query_vector=query_embedding,
101
  top_k=top_k,
102
  filter=filter_dict if filter_dict else None,
103
  include_metadata=True
104
  )
105
+ pinecone_time = time.perf_counter() - pinecone_start
106
+ logger.info(f"[RECOMMEND] Pinecone query completed in {pinecone_time:.3f}s: found {len(similar_mentors)} candidates")
107
 
108
  if not similar_mentors:
109
+ logger.warning("[RECOMMEND] No similar mentors found, returning empty list")
110
  return []
111
 
112
+ metadata_start = time.perf_counter()
113
  for mentor in similar_mentors:
114
  metadata = mentor.get("metadata", {})
115
  mentor["mentor_text"] = metadata.get("mentor_text", "")
116
  if not mentor["mentor_text"]:
117
+ logger.warning(f"[RECOMMEND] Mentor {mentor.get('mentor_id', 'unknown')} missing mentor_text in metadata")
118
+ metadata_time = time.perf_counter() - metadata_start
119
+ logger.info(f"[RECOMMEND] Metadata extraction completed in {metadata_time:.3f}s")
120
 
121
+ reranker_start = time.perf_counter()
122
  reranker = RerankerService()
123
  rerank_k = self.settings.RECOMMENDATION_RERANK_K
124
 
 
128
  candidates=similar_mentors,
129
  top_k=rerank_k
130
  )
131
+ reranker_time = time.perf_counter() - reranker_start
132
+ logger.info(f"[RECOMMEND] Reranking completed in {reranker_time:.3f}s: {len(reranked_mentors)} mentors reranked")
133
  except Exception as e:
134
+ reranker_time = time.perf_counter() - reranker_start
135
+ logger.error(f"[RECOMMEND] Reranker failed after {reranker_time:.3f}s: {e}. Falling back to cosine similarity.", exc_info=True)
136
  for mentor in similar_mentors:
137
  mentor["reranker_score"] = mentor.get("score", 0.0)
138
  reranked_mentors = sorted(similar_mentors, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:rerank_k]
139
 
140
+ scoring_start = time.perf_counter()
141
  reranked = rerank_mentors(
142
  reranked_mentors,
143
  mentee_data,
144
  final_count=final_count
145
  )
146
+ scoring_time = time.perf_counter() - scoring_start
147
+ logger.info(f"[RECOMMEND] Final scoring completed in {scoring_time:.3f}s: {len(reranked)} final recommendations")
148
+
149
+ total_time = time.perf_counter() - pipeline_start
150
+ logger.info(f"[RECOMMEND] Recommendation pipeline completed in {total_time:.3f}s")
151
+ logger.info(f"[RECOMMEND] Time breakdown - Query: {query_build_time:.3f}s, Embedding: {embedding_time:.3f}s, "
152
+ f"Pinecone: {pinecone_time:.3f}s, Metadata: {metadata_time:.3f}s, "
153
+ f"Reranker: {reranker_time:.3f}s, Scoring: {scoring_time:.3f}s")
154
 
155
  return reranked
156
  except Exception as e:
157
+ total_time = time.perf_counter() - pipeline_start
158
+ logger.error(f"[RECOMMEND] Failed to recommend mentors after {total_time:.3f}s: {str(e)}", exc_info=True)
159
  raise
160
 
161
  def _build_filter(self, mentee_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
services/reranker_service.py CHANGED
@@ -1,6 +1,7 @@
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
  import torch
3
  import logging
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
@@ -19,45 +20,69 @@ class RerankerService:
19
 
20
  try:
21
  self.model_name = "AITeamVN/Vietnamese_Reranker"
22
- logger.info(f"Loading reranker model: {self.model_name}")
 
23
 
 
 
24
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
 
 
 
 
 
25
  self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
26
  self.model.eval()
 
 
27
 
28
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
29
  self.model.to(self.device)
 
 
30
 
31
- logger.info(f"Reranker model loaded successfully on device: {self.device}")
 
32
  RerankerService._initialized = True
33
  except Exception as e:
34
- logger.error(f"Failed to load reranker model: {str(e)}", exc_info=True)
35
  raise
36
 
37
  def rerank(self, query_text: str, candidates: list, top_k: int = None) -> list:
38
  if not candidates:
 
39
  return []
40
 
41
  if not query_text:
42
- logger.warning("Empty query text provided to reranker")
43
  return candidates
44
 
45
  try:
 
 
 
46
  pairs = []
47
  valid_candidates = []
48
 
 
49
  for cand in candidates:
50
  mentor_text = cand.get("mentor_text", "")
51
  if mentor_text:
52
  pairs.append((query_text, mentor_text))
53
  valid_candidates.append(cand)
54
  else:
55
- logger.warning(f"Mentor {cand.get('mentor_id', 'unknown')} missing mentor_text, skipping reranking")
 
 
 
56
 
57
  if not pairs:
58
- logger.warning("No valid candidate pairs for reranking, returning original candidates")
59
  return candidates
60
 
 
61
  inputs = self.tokenizer(
62
  pairs,
63
  padding=True,
@@ -65,26 +90,37 @@ class RerankerService:
65
  max_length=512,
66
  return_tensors="pt"
67
  ).to(self.device)
 
 
68
 
 
69
  with torch.no_grad():
70
  outputs = self.model(**inputs)
71
  scores = torch.sigmoid(outputs.logits).squeeze(-1).cpu().tolist()
 
 
72
 
73
  if isinstance(scores, float):
74
  scores = [scores]
75
 
 
76
  for cand, score in zip(valid_candidates, scores):
77
  cand["reranker_score"] = float(score)
78
 
79
  sorted_candidates = sorted(valid_candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)
 
 
80
 
81
- if top_k:
82
- return sorted_candidates[:top_k]
83
- return sorted_candidates
 
 
84
  except Exception as e:
85
- logger.error(f"Error during reranking: {str(e)}", exc_info=True)
86
  for cand in candidates:
87
  if "reranker_score" not in cand:
88
  cand["reranker_score"] = cand.get("score", 0.0)
89
  return sorted(candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:top_k] if top_k else candidates
90
 
 
 
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
  import torch
3
  import logging
4
+ import time
5
 
6
  logger = logging.getLogger(__name__)
7
 
 
20
 
21
  try:
22
  self.model_name = "AITeamVN/Vietnamese_Reranker"
23
+ logger.info(f"[RERANKER] Starting to load reranker model: {self.model_name}")
24
+ start_time = time.perf_counter()
25
 
26
+ logger.info(f"[RERANKER] Loading tokenizer...")
27
+ tokenizer_start = time.perf_counter()
28
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
29
+ tokenizer_time = time.perf_counter() - tokenizer_start
30
+ logger.info(f"[RERANKER] Tokenizer loaded in {tokenizer_time:.3f}s")
31
+
32
+ logger.info(f"[RERANKER] Loading model...")
33
+ model_start = time.perf_counter()
34
  self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
35
  self.model.eval()
36
+ model_time = time.perf_counter() - model_start
37
+ logger.info(f"[RERANKER] Model loaded in {model_time:.3f}s")
38
 
39
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
+ logger.info(f"[RERANKER] Moving model to device: {self.device}")
41
+ device_start = time.perf_counter()
42
  self.model.to(self.device)
43
+ device_time = time.perf_counter() - device_start
44
+ logger.info(f"[RERANKER] Model moved to device in {device_time:.3f}s")
45
 
46
+ total_time = time.perf_counter() - start_time
47
+ logger.info(f"[RERANKER] Reranker model loaded successfully on device: {self.device} (Total time: {total_time:.3f}s)")
48
  RerankerService._initialized = True
49
  except Exception as e:
50
+ logger.error(f"[RERANKER] Failed to load reranker model: {str(e)}", exc_info=True)
51
  raise
52
 
53
  def rerank(self, query_text: str, candidates: list, top_k: int = None) -> list:
54
  if not candidates:
55
+ logger.info("[RERANKER] No candidates provided, returning empty list")
56
  return []
57
 
58
  if not query_text:
59
+ logger.warning("[RERANKER] Empty query text provided to reranker")
60
  return candidates
61
 
62
  try:
63
+ start_time = time.perf_counter()
64
+ logger.info(f"[RERANKER] Starting reranking for {len(candidates)} candidates")
65
+
66
  pairs = []
67
  valid_candidates = []
68
 
69
+ prep_start = time.perf_counter()
70
  for cand in candidates:
71
  mentor_text = cand.get("mentor_text", "")
72
  if mentor_text:
73
  pairs.append((query_text, mentor_text))
74
  valid_candidates.append(cand)
75
  else:
76
+ logger.warning(f"[RERANKER] Mentor {cand.get('mentor_id', 'unknown')} missing mentor_text, skipping reranking")
77
+
78
+ prep_time = time.perf_counter() - prep_start
79
+ logger.info(f"[RERANKER] Prepared {len(pairs)} pairs in {prep_time:.3f}s")
80
 
81
  if not pairs:
82
+ logger.warning("[RERANKER] No valid candidate pairs for reranking, returning original candidates")
83
  return candidates
84
 
85
+ tokenizer_start = time.perf_counter()
86
  inputs = self.tokenizer(
87
  pairs,
88
  padding=True,
 
90
  max_length=512,
91
  return_tensors="pt"
92
  ).to(self.device)
93
+ tokenizer_time = time.perf_counter() - tokenizer_start
94
+ logger.info(f"[RERANKER] Tokenization completed in {tokenizer_time:.3f}s")
95
 
96
+ inference_start = time.perf_counter()
97
  with torch.no_grad():
98
  outputs = self.model(**inputs)
99
  scores = torch.sigmoid(outputs.logits).squeeze(-1).cpu().tolist()
100
+ inference_time = time.perf_counter() - inference_start
101
+ logger.info(f"[RERANKER] Model inference completed in {inference_time:.3f}s for {len(pairs)} pairs")
102
 
103
  if isinstance(scores, float):
104
  scores = [scores]
105
 
106
+ scoring_start = time.perf_counter()
107
  for cand, score in zip(valid_candidates, scores):
108
  cand["reranker_score"] = float(score)
109
 
110
  sorted_candidates = sorted(valid_candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)
111
+ scoring_time = time.perf_counter() - scoring_start
112
+ logger.info(f"[RERANKER] Scoring and sorting completed in {scoring_time:.3f}s")
113
 
114
+ result = sorted_candidates[:top_k] if top_k else sorted_candidates
115
+ total_time = time.perf_counter() - start_time
116
+ logger.info(f"[RERANKER] Reranking completed: {len(result)} results returned (Total time: {total_time:.3f}s)")
117
+
118
+ return result
119
  except Exception as e:
120
+ logger.error(f"[RERANKER] Error during reranking: {str(e)}", exc_info=True)
121
  for cand in candidates:
122
  if "reranker_score" not in cand:
123
  cand["reranker_score"] = cand.get("score", 0.0)
124
  return sorted(candidates, key=lambda x: x.get("reranker_score", 0.0), reverse=True)[:top_k] if top_k else candidates
125
 
126
+
test_api.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ BASE_URL = "https://mentorme666-mentorme.hf.space/api/v1"
5
+
6
+ def test_recommend():
7
+ """Test recommend mentors endpoint"""
8
+ print("Testing Recommend Mentors...")
9
+ print("="*60)
10
+
11
+ recommendation_data = {
12
+ "goals": "Tôi muốn học Python và FastAPI để phát triển web",
13
+ "desired_skills": [
14
+ {"id": 1, "name": "Python"},
15
+ {"id": 2, "name": "FastAPI"}
16
+ ],
17
+ "interests": [
18
+ {"id": 1, "name": "Web Development"}
19
+ ],
20
+ "min_rating": 4.0,
21
+ "require_availability": True,
22
+ "skill_ids": [1, 2],
23
+ "domain_ids": [1],
24
+ "top_k": 30,
25
+ "final_count": 6
26
+ }
27
+
28
+ try:
29
+ print(f"Request URL: {BASE_URL}/recommend")
30
+ print(f"Request Data: {json.dumps(recommendation_data, indent=2, ensure_ascii=False)}")
31
+ print("\nSending request...")
32
+
33
+ response = requests.post(
34
+ f"{BASE_URL}/recommend",
35
+ json=recommendation_data,
36
+ headers={
37
+ "Content-Type": "application/json",
38
+ "User-Agent": "Mozilla/5.0",
39
+ "Accept": "application/json"
40
+ },
41
+ timeout=60
42
+ )
43
+
44
+ print(f"\n{'='*60}")
45
+ print("Response:")
46
+ print(f"{'='*60}")
47
+ print(f"Status Code: {response.status_code}")
48
+ print(f"URL: {response.url}")
49
+
50
+ if response.status_code == 200:
51
+ try:
52
+ result = response.json()
53
+ print(f"\nResponse JSON:")
54
+ print(json.dumps(result, indent=2, ensure_ascii=False))
55
+
56
+ if "mentors" in result:
57
+ print(f"\nFound {result.get('count', 0)} recommended mentors:")
58
+ for i, mentor in enumerate(result["mentors"], 1):
59
+ print(f"\n{i}. Mentor ID: {mentor.get('mentor_id')}")
60
+ print(f" Score: {mentor.get('score', 0):.4f}")
61
+ print(f" Semantic Similarity: {mentor.get('semantic_similarity', 0):.4f}")
62
+ print(f" Reason: {mentor.get('reason', 'N/A')}")
63
+ if "metadata" in mentor:
64
+ meta = mentor["metadata"]
65
+ print(f" Rating: {meta.get('rating', 'N/A')}")
66
+ print(f" Available Slots: {meta.get('available_slots', 0)}")
67
+
68
+ if "query_text" in result:
69
+ print(f"\nQuery Text: {result['query_text']}")
70
+
71
+ print(f"\n{'='*60}")
72
+ print("[PASSED] Test completed successfully!")
73
+ return True
74
+ except json.JSONDecodeError:
75
+ print(f"Response Text: {response.text}")
76
+ print(f"\n{'='*60}")
77
+ print("[FAILED] Invalid JSON response")
78
+ return False
79
+ else:
80
+ print(f"Response Text: {response.text}")
81
+ print(f"\n{'='*60}")
82
+ print(f"[FAILED] Status code: {response.status_code}")
83
+ return False
84
+
85
+ except requests.exceptions.Timeout:
86
+ print(f"\n{'='*60}")
87
+ print("[FAILED] Request timeout")
88
+ return False
89
+ except requests.exceptions.ConnectionError as e:
90
+ print(f"\n{'='*60}")
91
+ print(f"[FAILED] Connection error: {str(e)}")
92
+ return False
93
+ except Exception as e:
94
+ print(f"\n{'='*60}")
95
+ print(f"[FAILED] Error: {str(e)}")
96
+ return False
97
+
98
+ if __name__ == "__main__":
99
+ print("\n" + "="*60)
100
+ print("MentorMe AI Service - Recommend API Test")
101
+ print("="*60 + "\n")
102
+
103
+ success = test_recommend()
104
+
105
+ print("\n" + "="*60)
106
+ if success:
107
+ print("Test Result: PASSED")
108
+ else:
109
+ print("Test Result: FAILED")
110
+ print("="*60 + "\n")
utils/scoring.py CHANGED
@@ -1,15 +1,23 @@
1
  from typing import List, Dict, Any
2
  import math
 
 
3
  from config.settings import get_settings
4
 
 
 
5
  def rerank_mentors(
6
  similar_mentors: List[Dict[str, Any]],
7
  mentee_data: Dict[str, Any],
8
  final_count: int = 8
9
  ) -> List[Dict[str, Any]]:
 
 
 
10
  settings = get_settings()
11
  scored_mentors = []
12
 
 
13
  for mentor in similar_mentors:
14
  metadata = mentor.get("metadata", {})
15
 
@@ -48,9 +56,19 @@ def rerank_mentors(
48
  "reason": reason
49
  })
50
 
 
 
 
 
51
  scored_mentors.sort(key=lambda x: x["score"], reverse=True)
 
 
 
 
 
 
52
 
53
- return scored_mentors[:final_count]
54
 
55
  def _calculate_rating_score(rating: float) -> float:
56
  if rating <= 0:
 
1
  from typing import List, Dict, Any
2
  import math
3
+ import logging
4
+ import time
5
  from config.settings import get_settings
6
 
7
+ logger = logging.getLogger(__name__)
8
+
9
  def rerank_mentors(
10
  similar_mentors: List[Dict[str, Any]],
11
  mentee_data: Dict[str, Any],
12
  final_count: int = 8
13
  ) -> List[Dict[str, Any]]:
14
+ start_time = time.perf_counter()
15
+ logger.info(f"[SCORING] Starting final scoring for {len(similar_mentors)} mentors, final_count={final_count}")
16
+
17
  settings = get_settings()
18
  scored_mentors = []
19
 
20
+ scoring_start = time.perf_counter()
21
  for mentor in similar_mentors:
22
  metadata = mentor.get("metadata", {})
23
 
 
56
  "reason": reason
57
  })
58
 
59
+ scoring_time = time.perf_counter() - scoring_start
60
+ logger.info(f"[SCORING] Scoring completed in {scoring_time:.3f}s")
61
+
62
+ sort_start = time.perf_counter()
63
  scored_mentors.sort(key=lambda x: x["score"], reverse=True)
64
+ sort_time = time.perf_counter() - sort_start
65
+ logger.info(f"[SCORING] Sorting completed in {sort_time:.3f}s")
66
+
67
+ result = scored_mentors[:final_count]
68
+ total_time = time.perf_counter() - start_time
69
+ logger.info(f"[SCORING] Final scoring completed in {total_time:.3f}s: {len(result)} mentors returned")
70
 
71
+ return result
72
 
73
  def _calculate_rating_score(rating: float) -> float:
74
  if rating <= 0: