llm-ready-data / app /services /embeddings_service.py
Soumik-404's picture
feat: oom
4b54fab
Raw
History Blame Contribute Delete
5.72 kB
from __future__ import annotations
import logging
import os
from typing import Dict, List, Optional
import numpy as np
# import torch # DISABLED (OOM mitigation) — only used by vision
# import torch.nn.functional as F # DISABLED (OOM mitigation)
# from PIL import Image # DISABLED (OOM mitigation)
from sentence_transformers import SentenceTransformer
# from transformers import AutoImageProcessor, AutoModel # DISABLED (OOM mitigation)
_logger = logging.getLogger(__name__)
# Only 384-dim embedding is enabled. 768 and 1024 are disabled to reduce memory usage.
_MODEL_MAP: Dict[int, str] = {
384: "ibm-granite/granite-embedding-small-english-r2",
# 768: "nomic-ai/nomic-embed-text-v1.5", # DISABLED (OOM mitigation)
# 1024: "lightonai/modernbert-embed-large", # DISABLED (OOM mitigation)
}
# _VISION_MODEL_NAME = "nomic-ai/nomic-embed-vision-v1.5" # DISABLED (OOM mitigation)
# _VISION_DIMENSION = 768
class EmbeddingService:
def __init__(self, models_dir: Optional[str] = None) -> None:
self._models: Dict[int, SentenceTransformer] = {}
self._models_dir = models_dir or os.path.join(os.getcwd(), "models")
self._device = "cuda"
try:
import torch
if not torch.cuda.is_available():
self._device = "cpu"
except ImportError:
self._device = "cpu"
self._loaded_dimensions: List[int] = []
# self._vision_processor: Optional[AutoImageProcessor] = None # DISABLED (OOM mitigation)
# self._vision_model: Optional[AutoModel] = None # DISABLED (OOM mitigation)
# self._vision_loaded = False
def load_model(self, dimension: int) -> None:
if dimension in self._models:
return
if dimension not in _MODEL_MAP:
raise ValueError(f"Unsupported dimension {dimension}. Supported: {list(_MODEL_MAP.keys())}")
model_name = _MODEL_MAP[dimension]
local_path = os.path.join(self._models_dir, f"bge-{dimension}")
_logger.info("Loading embedding model dim=%s from %s", dimension, local_path if os.path.isdir(local_path) else model_name)
model = SentenceTransformer(
local_path if os.path.isdir(local_path) else model_name,
device=self._device,
trust_remote_code=True,
)
model.eval()
self._models[dimension] = model
self._loaded_dimensions.append(dimension)
_logger.info("Loaded embedding model dim=%s (device=%s)", dimension, self._device)
def load_all_models(self) -> None:
for dim in _MODEL_MAP:
self.load_model(dim)
# def load_vision_model(self) -> None: # DISABLED (OOM mitigation)
# if self._vision_loaded:
# return
# local_path = os.path.join(self._models_dir, "vision")
# source = local_path if os.path.isdir(local_path) else _VISION_MODEL_NAME
#
# cfg_path = os.path.join(local_path, "config.json")
# if os.path.exists(cfg_path):
# import json
# with open(cfg_path) as f:
# d = json.load(f)
# if isinstance(d.get("n_inner"), float):
# d["n_inner"] = int(d["n_inner"])
# with open(cfg_path, "w") as f:
# json.dump(d, f, indent=2)
# _logger.info("Patched vision model config: n_inner float -> int")
#
# _logger.info("Loading vision embedding model from %s", source)
# self._vision_processor = AutoImageProcessor.from_pretrained(source)
# self._vision_model = AutoModel.from_pretrained(
# source,
# trust_remote_code=True,
# _fast_init=False,
# )
# self._vision_model.eval()
# self._vision_model.to(self._device)
# self._vision_loaded = True
# _logger.info("Loaded vision embedding model (device=%s)", self._device)
def generate_embedding(self, text: List[str], dimension: int) -> List[List[float]]:
if dimension not in self._models:
raise ValueError(f"Model for dimension {dimension} not loaded")
model = self._models[dimension]
# When querying/searching using nomic-embed-text-v1.5, ensure the queries are prefixed correctly.
# This is required for correct semantic search performance.
result: np.ndarray = model.encode(
text,
normalize_embeddings=True,
convert_to_numpy=True,
show_progress_bar=False,
)
return result.tolist()
# def generate_image_embedding(self, images: List[Image.Image]) -> List[List[float]]: # DISABLED (OOM mitigation)
# if not self._vision_loaded or self._vision_model is None or self._vision_processor is None:
# raise ValueError("Vision model not loaded")
# all_embeddings: List[List[float]] = []
# with torch.no_grad():
# for image in images:
# inputs = self._vision_processor(image, return_tensors="pt")
# inputs = {k: v.to(self._device) for k, v in inputs.items()}
# outputs = self._vision_model(**inputs)
# emb = outputs.last_hidden_state[:, 0]
# emb = F.normalize(emb, p=2, dim=1)
# all_embeddings.append(emb.cpu().numpy().flatten().tolist())
# return all_embeddings
@property
def loaded_dimensions(self) -> List[int]:
return list(self._loaded_dimensions)
def is_loaded(self, dimension: int) -> bool:
return dimension in self._models
# @property # DISABLED (OOM mitigation)
# def vision_dimension(self) -> int:
# return _VISION_DIMENSION