Doanh Van Vu commited on
Commit
b1f36a0
·
1 Parent(s): 01252a2

Update embedding model to Vietnamese_Embedding and adjust related configurations. Replace FlagEmbedding with SentenceTransformer in embedding service, and ensure dimension checks for vectors in Pinecone service. Update requirements to reflect new dependencies.

Browse files
app.py CHANGED
@@ -19,7 +19,7 @@ async def lifespan(app: FastAPI):
19
 
20
  app = FastAPI(
21
  title="MentorMe AI Recommendation Server",
22
- description="AI-powered mentor-mentee recommendation using bge-m3 embeddings and Pinecone",
23
  version="1.0.0",
24
  lifespan=lifespan
25
  )
 
19
 
20
  app = FastAPI(
21
  title="MentorMe AI Recommendation Server",
22
+ description="AI-powered mentor-mentee recommendation using Vietnamese_Embedding and Pinecone",
23
  version="1.0.0",
24
  lifespan=lifespan
25
  )
config/settings.py CHANGED
@@ -22,7 +22,7 @@ class Settings(BaseSettings):
22
  PORT: int = int(os.getenv("PORT", "7860"))
23
  DEBUG: bool = False
24
 
25
- BGE_MODEL_NAME: str = "BAAI/bge-m3"
26
  USE_FP16: bool = True
27
 
28
  RECOMMENDATION_TOP_K: int = 30
 
22
  PORT: int = int(os.getenv("PORT", "7860"))
23
  DEBUG: bool = False
24
 
25
+ EMBEDDING_MODEL_NAME: str = "AITeamVN/Vietnamese_Embedding"
26
  USE_FP16: bool = True
27
 
28
  RECOMMENDATION_TOP_K: int = 30
main.py CHANGED
@@ -19,7 +19,7 @@ async def lifespan(app: FastAPI):
19
 
20
  app = FastAPI(
21
  title="MentorMe AI Recommendation Server",
22
- description="AI-powered mentor-mentee recommendation using bge-m3 embeddings and Pinecone",
23
  version="1.0.0",
24
  lifespan=lifespan
25
  )
 
19
 
20
  app = FastAPI(
21
  title="MentorMe AI Recommendation Server",
22
+ description="AI-powered mentor-mentee recommendation using Vietnamese_Embedding and Pinecone",
23
  version="1.0.0",
24
  lifespan=lifespan
25
  )
requirements.txt CHANGED
@@ -4,12 +4,11 @@ pydantic==2.5.0
4
  pydantic-settings==2.1.0
5
  python-dotenv==1.0.0
6
  pinecone-client>=3.2.0,<6.0.0
7
- FlagEmbedding==1.2.11
8
  torch>=2.0.0
9
  numpy>=1.24.0
10
  python-multipart==0.0.6
11
  transformers>=4.30.0
12
- peft>=0.3.0
13
  accelerate>=0.20.0
14
  sentencepiece>=0.1.99
15
 
 
4
  pydantic-settings==2.1.0
5
  python-dotenv==1.0.0
6
  pinecone-client>=3.2.0,<6.0.0
7
+ sentence-transformers>=2.2.0
8
  torch>=2.0.0
9
  numpy>=1.24.0
10
  python-multipart==0.0.6
11
  transformers>=4.30.0
 
12
  accelerate>=0.20.0
13
  sentencepiece>=0.1.99
14
 
services/embedding_service.py CHANGED
@@ -1,5 +1,5 @@
1
  import torch
2
- from FlagEmbedding import FlagModel
3
  import logging
4
  from typing import List, Union
5
  from config.settings import get_settings
@@ -22,18 +22,16 @@ class EmbeddingService:
22
  def _load_model(self):
23
  settings = get_settings()
24
  try:
25
- logger.info(f"Loading embedding model: {settings.BGE_MODEL_NAME}")
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  logger.info(f"Using device: {device}")
28
 
29
- EmbeddingService._model = FlagModel(
30
- settings.BGE_MODEL_NAME,
31
- use_fp16=settings.USE_FP16 and device == "cuda",
32
- query_instruction_for_retrieval="Represent this sentence for searching relevant passages:"
33
  )
34
 
35
- if device == "cuda":
36
- EmbeddingService._model = EmbeddingService._model.cuda()
37
 
38
  logger.info("Embedding model loaded successfully")
39
  except Exception as e:
@@ -45,7 +43,7 @@ class EmbeddingService:
45
  texts: Union[str, List[str]],
46
  is_query: bool = False,
47
  batch_size: int = 32,
48
- max_length: int = 8192
49
  ) -> Union[List[float], List[List[float]]]:
50
  if EmbeddingService._model is None:
51
  raise RuntimeError("Embedding model not loaded")
@@ -56,34 +54,53 @@ class EmbeddingService:
56
  else:
57
  single_text = False
58
 
 
 
 
59
  try:
60
- if is_query:
61
- embeddings = EmbeddingService._model.encode_queries(
62
- texts,
63
- batch_size=batch_size,
64
- max_length=max_length
65
- )
66
- else:
67
- embeddings = EmbeddingService._model.encode(
68
- texts,
69
- batch_size=batch_size,
70
- max_length=max_length
71
- )
72
 
 
73
  if single_text:
74
- return embeddings[0].tolist()
75
- return [emb.tolist() for emb in embeddings]
 
 
 
 
 
 
 
 
 
 
 
76
  except Exception as e:
77
  logger.error(f"Error encoding texts: {str(e)}")
78
  raise
79
 
80
  def get_model_info(self) -> dict:
81
  settings = get_settings()
 
 
 
 
 
 
 
 
 
82
  return {
83
- "model_name": settings.BGE_MODEL_NAME,
84
- "dimension": 1024,
85
  "device": "cuda" if torch.cuda.is_available() else "cpu",
86
- "fp16": settings.USE_FP16 and torch.cuda.is_available()
87
  }
88
 
89
 
 
1
  import torch
2
+ from sentence_transformers import SentenceTransformer
3
  import logging
4
  from typing import List, Union
5
  from config.settings import get_settings
 
22
  def _load_model(self):
23
  settings = get_settings()
24
  try:
25
+ logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL_NAME}")
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  logger.info(f"Using device: {device}")
28
 
29
+ EmbeddingService._model = SentenceTransformer(
30
+ settings.EMBEDDING_MODEL_NAME,
31
+ device=device
 
32
  )
33
 
34
+ EmbeddingService._model.max_seq_length = 2048
 
35
 
36
  logger.info("Embedding model loaded successfully")
37
  except Exception as e:
 
43
  texts: Union[str, List[str]],
44
  is_query: bool = False,
45
  batch_size: int = 32,
46
+ max_length: int = 2048
47
  ) -> Union[List[float], List[List[float]]]:
48
  if EmbeddingService._model is None:
49
  raise RuntimeError("Embedding model not loaded")
 
54
  else:
55
  single_text = False
56
 
57
+ if not texts:
58
+ raise ValueError("Texts cannot be empty")
59
+
60
  try:
61
+ embeddings = EmbeddingService._model.encode(
62
+ texts,
63
+ batch_size=batch_size,
64
+ show_progress_bar=False,
65
+ convert_to_numpy=True,
66
+ normalize_embeddings=False
67
+ )
 
 
 
 
 
68
 
69
+ expected_dim = 1024
70
  if single_text:
71
+ embedding_list = embeddings[0].tolist()
72
+ if len(embedding_list) != expected_dim:
73
+ logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(embedding_list)}")
74
+ return embedding_list
75
+
76
+ result = []
77
+ for emb in embeddings:
78
+ emb_list = emb.tolist()
79
+ if len(emb_list) != expected_dim:
80
+ logger.warning(f"Embedding dimension mismatch: expected {expected_dim}, got {len(emb_list)}")
81
+ result.append(emb_list)
82
+
83
+ return result
84
  except Exception as e:
85
  logger.error(f"Error encoding texts: {str(e)}")
86
  raise
87
 
88
  def get_model_info(self) -> dict:
89
  settings = get_settings()
90
+ dimension = 1024
91
+
92
+ if EmbeddingService._model is not None:
93
+ try:
94
+ test_embedding = EmbeddingService._model.encode(["test"], convert_to_numpy=True)
95
+ dimension = len(test_embedding[0])
96
+ except Exception as e:
97
+ logger.warning(f"Could not determine model dimension: {str(e)}")
98
+
99
  return {
100
+ "model_name": settings.EMBEDDING_MODEL_NAME,
101
+ "dimension": dimension,
102
  "device": "cuda" if torch.cuda.is_available() else "cpu",
103
+ "max_seq_length": EmbeddingService._model.max_seq_length if EmbeddingService._model else 2048
104
  }
105
 
106
 
services/pinecone_service.py CHANGED
@@ -58,6 +58,14 @@ class PineconeService:
58
  metadata: Dict[str, Any]
59
  ) -> bool:
60
  try:
 
 
 
 
 
 
 
 
61
  PineconeService._index.upsert(
62
  vectors=[{
63
  "id": str(mentor_id),
@@ -91,6 +99,14 @@ class PineconeService:
91
  include_metadata: bool = True
92
  ) -> List[Dict[str, Any]]:
93
  try:
 
 
 
 
 
 
 
 
94
  query_response = PineconeService._index.query(
95
  vector=query_vector,
96
  top_k=top_k,
 
58
  metadata: Dict[str, Any]
59
  ) -> bool:
60
  try:
61
+ settings = get_settings()
62
+ expected_dim = settings.PINECONE_DIMENSION
63
+
64
+ if len(vector) != expected_dim:
65
+ error_msg = f"Vector dimension mismatch: expected {expected_dim}, got {len(vector)}"
66
+ logger.error(error_msg)
67
+ raise ValueError(error_msg)
68
+
69
  PineconeService._index.upsert(
70
  vectors=[{
71
  "id": str(mentor_id),
 
99
  include_metadata: bool = True
100
  ) -> List[Dict[str, Any]]:
101
  try:
102
+ settings = get_settings()
103
+ expected_dim = settings.PINECONE_DIMENSION
104
+
105
+ if len(query_vector) != expected_dim:
106
+ error_msg = f"Query vector dimension mismatch: expected {expected_dim}, got {len(query_vector)}"
107
+ logger.error(error_msg)
108
+ raise ValueError(error_msg)
109
+
110
  query_response = PineconeService._index.query(
111
  vector=query_vector,
112
  top_k=top_k,