Spaces:
Running
Running
| from __future__ import annotations | |
| import logging | |
| import os | |
| from typing import Dict, List, Optional | |
| import numpy as np | |
| # import torch # DISABLED (OOM mitigation) — only used by vision | |
| # import torch.nn.functional as F # DISABLED (OOM mitigation) | |
| # from PIL import Image # DISABLED (OOM mitigation) | |
| from sentence_transformers import SentenceTransformer | |
| # from transformers import AutoImageProcessor, AutoModel # DISABLED (OOM mitigation) | |
| _logger = logging.getLogger(__name__) | |
| # Only 384-dim embedding is enabled. 768 and 1024 are disabled to reduce memory usage. | |
| _MODEL_MAP: Dict[int, str] = { | |
| 384: "ibm-granite/granite-embedding-small-english-r2", | |
| # 768: "nomic-ai/nomic-embed-text-v1.5", # DISABLED (OOM mitigation) | |
| # 1024: "lightonai/modernbert-embed-large", # DISABLED (OOM mitigation) | |
| } | |
| # _VISION_MODEL_NAME = "nomic-ai/nomic-embed-vision-v1.5" # DISABLED (OOM mitigation) | |
| # _VISION_DIMENSION = 768 | |
| class EmbeddingService: | |
| def __init__(self, models_dir: Optional[str] = None) -> None: | |
| self._models: Dict[int, SentenceTransformer] = {} | |
| self._models_dir = models_dir or os.path.join(os.getcwd(), "models") | |
| self._device = "cuda" | |
| try: | |
| import torch | |
| if not torch.cuda.is_available(): | |
| self._device = "cpu" | |
| except ImportError: | |
| self._device = "cpu" | |
| self._loaded_dimensions: List[int] = [] | |
| # self._vision_processor: Optional[AutoImageProcessor] = None # DISABLED (OOM mitigation) | |
| # self._vision_model: Optional[AutoModel] = None # DISABLED (OOM mitigation) | |
| # self._vision_loaded = False | |
| def load_model(self, dimension: int) -> None: | |
| if dimension in self._models: | |
| return | |
| if dimension not in _MODEL_MAP: | |
| raise ValueError(f"Unsupported dimension {dimension}. Supported: {list(_MODEL_MAP.keys())}") | |
| model_name = _MODEL_MAP[dimension] | |
| local_path = os.path.join(self._models_dir, f"bge-{dimension}") | |
| _logger.info("Loading embedding model dim=%s from %s", dimension, local_path if os.path.isdir(local_path) else model_name) | |
| model = SentenceTransformer( | |
| local_path if os.path.isdir(local_path) else model_name, | |
| device=self._device, | |
| trust_remote_code=True, | |
| ) | |
| model.eval() | |
| self._models[dimension] = model | |
| self._loaded_dimensions.append(dimension) | |
| _logger.info("Loaded embedding model dim=%s (device=%s)", dimension, self._device) | |
| def load_all_models(self) -> None: | |
| for dim in _MODEL_MAP: | |
| self.load_model(dim) | |
| # def load_vision_model(self) -> None: # DISABLED (OOM mitigation) | |
| # if self._vision_loaded: | |
| # return | |
| # local_path = os.path.join(self._models_dir, "vision") | |
| # source = local_path if os.path.isdir(local_path) else _VISION_MODEL_NAME | |
| # | |
| # cfg_path = os.path.join(local_path, "config.json") | |
| # if os.path.exists(cfg_path): | |
| # import json | |
| # with open(cfg_path) as f: | |
| # d = json.load(f) | |
| # if isinstance(d.get("n_inner"), float): | |
| # d["n_inner"] = int(d["n_inner"]) | |
| # with open(cfg_path, "w") as f: | |
| # json.dump(d, f, indent=2) | |
| # _logger.info("Patched vision model config: n_inner float -> int") | |
| # | |
| # _logger.info("Loading vision embedding model from %s", source) | |
| # self._vision_processor = AutoImageProcessor.from_pretrained(source) | |
| # self._vision_model = AutoModel.from_pretrained( | |
| # source, | |
| # trust_remote_code=True, | |
| # _fast_init=False, | |
| # ) | |
| # self._vision_model.eval() | |
| # self._vision_model.to(self._device) | |
| # self._vision_loaded = True | |
| # _logger.info("Loaded vision embedding model (device=%s)", self._device) | |
| def generate_embedding(self, text: List[str], dimension: int) -> List[List[float]]: | |
| if dimension not in self._models: | |
| raise ValueError(f"Model for dimension {dimension} not loaded") | |
| model = self._models[dimension] | |
| # When querying/searching using nomic-embed-text-v1.5, ensure the queries are prefixed correctly. | |
| # This is required for correct semantic search performance. | |
| result: np.ndarray = model.encode( | |
| text, | |
| normalize_embeddings=True, | |
| convert_to_numpy=True, | |
| show_progress_bar=False, | |
| ) | |
| return result.tolist() | |
| # def generate_image_embedding(self, images: List[Image.Image]) -> List[List[float]]: # DISABLED (OOM mitigation) | |
| # if not self._vision_loaded or self._vision_model is None or self._vision_processor is None: | |
| # raise ValueError("Vision model not loaded") | |
| # all_embeddings: List[List[float]] = [] | |
| # with torch.no_grad(): | |
| # for image in images: | |
| # inputs = self._vision_processor(image, return_tensors="pt") | |
| # inputs = {k: v.to(self._device) for k, v in inputs.items()} | |
| # outputs = self._vision_model(**inputs) | |
| # emb = outputs.last_hidden_state[:, 0] | |
| # emb = F.normalize(emb, p=2, dim=1) | |
| # all_embeddings.append(emb.cpu().numpy().flatten().tolist()) | |
| # return all_embeddings | |
| def loaded_dimensions(self) -> List[int]: | |
| return list(self._loaded_dimensions) | |
| def is_loaded(self, dimension: int) -> bool: | |
| return dimension in self._models | |
| # @property # DISABLED (OOM mitigation) | |
| # def vision_dimension(self) -> int: | |
| # return _VISION_DIMENSION |