Soumik Bose commited on
Commit
4f9495d
·
1 Parent(s): 58f4a9c
Files changed (2) hide show
  1. Dockerfile +1 -2
  2. model_service.py +11 -4
Dockerfile CHANGED
@@ -5,8 +5,7 @@ FROM python:3.11-slim
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
7
  PYTHONIOENCODING=UTF-8 \
8
- HF_HOME=/app/cache \
9
- TRANSFORMERS_CACHE=/app/cache
10
 
11
  # Install system dependencies
12
  RUN apt-get update && apt-get install -y --no-install-recommends curl \
 
5
  ENV PYTHONDONTWRITEBYTECODE=1 \
6
  PYTHONUNBUFFERED=1 \
7
  PYTHONIOENCODING=UTF-8 \
8
+ HF_HOME=/app/cache
 
9
 
10
  # Install system dependencies
11
  RUN apt-get update && apt-get install -y --no-install-recommends curl \
model_service.py CHANGED
@@ -7,6 +7,7 @@ logger = logging.getLogger("EmbedService")
7
  class MultiEmbeddingService:
8
  def __init__(self):
9
  self.models = {}
 
10
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
12
  # Map dimensions to local folders (downloaded in Dockerfile)
@@ -17,25 +18,31 @@ class MultiEmbeddingService:
17
  }
18
 
19
  def load_all_models(self):
20
- """Loads all defined models into memory."""
 
 
21
  for dim, path in self.model_map.items():
22
  try:
23
  logger.info(f"Loading {dim}-dimension model from {path}...")
24
  model = SentenceTransformer(path, device=self.device)
25
- model.eval() # Set to evaluation mode
26
  self.models[dim] = model
27
  logger.info(f"✅ Loaded model for dimension {dim}")
28
  except Exception as e:
29
  logger.error(f"❌ Failed to load {dim}-dim model: {e}")
30
- # We don't raise here, so partial failures don't crash the whole app
31
 
32
  def generate_embedding(self, text: str | list[str], dimension: int):
33
  """Generates embeddings using the specific model for the requested dimension."""
34
  if dimension not in self.models:
35
  raise ValueError(f"Dimension {dimension} not supported. Available: {list(self.models.keys())}")
36
 
 
 
 
37
  return self.models[dimension].encode(
38
  text,
39
  normalize_embeddings=True,
40
- convert_to_numpy=True
 
 
41
  ).tolist()
 
7
  class MultiEmbeddingService:
8
  def __init__(self):
9
  self.models = {}
10
+ # Auto-detect GPU, otherwise use CPU
11
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
  # Map dimensions to local folders (downloaded in Dockerfile)
 
18
  }
19
 
20
  def load_all_models(self):
21
+ """Loads all defined models into memory ONCE at startup."""
22
+ logger.info(f"🚀 Acceleration Device: {self.device.upper()}")
23
+
24
  for dim, path in self.model_map.items():
25
  try:
26
  logger.info(f"Loading {dim}-dimension model from {path}...")
27
  model = SentenceTransformer(path, device=self.device)
28
+ model.eval() # Set to evaluation mode (faster inference)
29
  self.models[dim] = model
30
  logger.info(f"✅ Loaded model for dimension {dim}")
31
  except Exception as e:
32
  logger.error(f"❌ Failed to load {dim}-dim model: {e}")
 
33
 
34
  def generate_embedding(self, text: str | list[str], dimension: int):
35
  """Generates embeddings using the specific model for the requested dimension."""
36
  if dimension not in self.models:
37
  raise ValueError(f"Dimension {dimension} not supported. Available: {list(self.models.keys())}")
38
 
39
+ # --- OPTIMIZATION FIX ---
40
+ # show_progress_bar=False prevents the logs you saw
41
+ # batch_size=32 ensures efficient processing for lists
42
  return self.models[dimension].encode(
43
  text,
44
  normalize_embeddings=True,
45
+ convert_to_numpy=True,
46
+ show_progress_bar=False, # <--- THIS STOPS THE LOG SPAM
47
+ batch_size=32
48
  ).tolist()