Spaces:
Running
Running
Soumik Bose commited on
Commit ·
4f9495d
1
Parent(s): 58f4a9c
ok
Browse files- Dockerfile +1 -2
- model_service.py +11 -4
Dockerfile
CHANGED
|
@@ -5,8 +5,7 @@ FROM python:3.11-slim
|
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
PYTHONUNBUFFERED=1 \
|
| 7 |
PYTHONIOENCODING=UTF-8 \
|
| 8 |
-
HF_HOME=/app/cache
|
| 9 |
-
TRANSFORMERS_CACHE=/app/cache
|
| 10 |
|
| 11 |
# Install system dependencies
|
| 12 |
RUN apt-get update && apt-get install -y --no-install-recommends curl \
|
|
|
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
PYTHONUNBUFFERED=1 \
|
| 7 |
PYTHONIOENCODING=UTF-8 \
|
| 8 |
+
HF_HOME=/app/cache
|
|
|
|
| 9 |
|
| 10 |
# Install system dependencies
|
| 11 |
RUN apt-get update && apt-get install -y --no-install-recommends curl \
|
model_service.py
CHANGED
|
@@ -7,6 +7,7 @@ logger = logging.getLogger("EmbedService")
|
|
| 7 |
class MultiEmbeddingService:
|
| 8 |
def __init__(self):
|
| 9 |
self.models = {}
|
|
|
|
| 10 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
|
| 12 |
# Map dimensions to local folders (downloaded in Dockerfile)
|
|
@@ -17,25 +18,31 @@ class MultiEmbeddingService:
|
|
| 17 |
}
|
| 18 |
|
| 19 |
def load_all_models(self):
|
| 20 |
-
"""Loads all defined models into memory."""
|
|
|
|
|
|
|
| 21 |
for dim, path in self.model_map.items():
|
| 22 |
try:
|
| 23 |
logger.info(f"Loading {dim}-dimension model from {path}...")
|
| 24 |
model = SentenceTransformer(path, device=self.device)
|
| 25 |
-
model.eval() # Set to evaluation mode
|
| 26 |
self.models[dim] = model
|
| 27 |
logger.info(f"✅ Loaded model for dimension {dim}")
|
| 28 |
except Exception as e:
|
| 29 |
logger.error(f"❌ Failed to load {dim}-dim model: {e}")
|
| 30 |
-
# We don't raise here, so partial failures don't crash the whole app
|
| 31 |
|
| 32 |
def generate_embedding(self, text: str | list[str], dimension: int):
|
| 33 |
"""Generates embeddings using the specific model for the requested dimension."""
|
| 34 |
if dimension not in self.models:
|
| 35 |
raise ValueError(f"Dimension {dimension} not supported. Available: {list(self.models.keys())}")
|
| 36 |
|
|
|
|
|
|
|
|
|
|
| 37 |
return self.models[dimension].encode(
|
| 38 |
text,
|
| 39 |
normalize_embeddings=True,
|
| 40 |
-
convert_to_numpy=True
|
|
|
|
|
|
|
| 41 |
).tolist()
|
|
|
|
| 7 |
class MultiEmbeddingService:
|
| 8 |
def __init__(self):
|
| 9 |
self.models = {}
|
| 10 |
+
# Auto-detect GPU, otherwise use CPU
|
| 11 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
|
| 13 |
# Map dimensions to local folders (downloaded in Dockerfile)
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
def load_all_models(self):
|
| 21 |
+
"""Loads all defined models into memory ONCE at startup."""
|
| 22 |
+
logger.info(f"🚀 Acceleration Device: {self.device.upper()}")
|
| 23 |
+
|
| 24 |
for dim, path in self.model_map.items():
|
| 25 |
try:
|
| 26 |
logger.info(f"Loading {dim}-dimension model from {path}...")
|
| 27 |
model = SentenceTransformer(path, device=self.device)
|
| 28 |
+
model.eval() # Set to evaluation mode (faster inference)
|
| 29 |
self.models[dim] = model
|
| 30 |
logger.info(f"✅ Loaded model for dimension {dim}")
|
| 31 |
except Exception as e:
|
| 32 |
logger.error(f"❌ Failed to load {dim}-dim model: {e}")
|
|
|
|
| 33 |
|
| 34 |
def generate_embedding(self, text: str | list[str], dimension: int):
|
| 35 |
"""Generates embeddings using the specific model for the requested dimension."""
|
| 36 |
if dimension not in self.models:
|
| 37 |
raise ValueError(f"Dimension {dimension} not supported. Available: {list(self.models.keys())}")
|
| 38 |
|
| 39 |
+
# --- OPTIMIZATION FIX ---
|
| 40 |
+
# show_progress_bar=False prevents the logs you saw
|
| 41 |
+
# batch_size=32 ensures efficient processing for lists
|
| 42 |
return self.models[dimension].encode(
|
| 43 |
text,
|
| 44 |
normalize_embeddings=True,
|
| 45 |
+
convert_to_numpy=True,
|
| 46 |
+
show_progress_bar=False, # <--- THIS STOPS THE LOG SPAM
|
| 47 |
+
batch_size=32
|
| 48 |
).tolist()
|