meet4150/ALIV_AI / app /agent /kb_embedding.py
download
raw
2.3 kB
from __future__ import annotations
from pathlib import Path
from threading import Lock
from sentence_transformers import SentenceTransformer
class KBEmbeddingService:
_instance: "KBEmbeddingService | None" = None
_instance_lock = Lock()
_model_lock = Lock()
_model_name = "BAAI/bge-base-en-v1.5"
_local_model_dir = Path(__file__).resolve().parents[2] / "models" / "BAAI__bge-base-en-v1.5"
_medical_prefix = "Medical text: "
def __new__(cls) -> "KBEmbeddingService":
if cls._instance is None:
with cls._instance_lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._model = None
return cls._instance
def load_model(self) -> SentenceTransformer:
if self._model is None:
with self._model_lock:
if self._model is None:
model_source = (
str(self._local_model_dir)
if self._local_model_dir.exists()
else self._model_name
)
self._model = SentenceTransformer(model_source)
print(f"KB Embedding model loaded: {self._model_name}")
return self._model
def embed(self, text: str) -> list[float]:
model = self.load_model()
embedding = model.encode(
self._prepare_text(text),
normalize_embeddings=True,
show_progress_bar=False,
)
return embedding.tolist()
def embed_batch(self, texts: list[str]) -> list[list[float]]:
if not texts:
return []
model = self.load_model()
embeddings = model.encode(
[self._prepare_text(text) for text in texts],
batch_size=min(64, len(texts)),
normalize_embeddings=True,
show_progress_bar=False,
)
return embeddings.tolist()
def embedding_dimension(self) -> int:
model = self.load_model()
return int(model.get_sentence_embedding_dimension())
def _prepare_text(self, text: str) -> str:
normalized_text = (text or "").strip()
return f"{self._medical_prefix}{normalized_text}" if normalized_text else self._medical_prefix

Xet Storage Details

Size:
2.3 kB
·
Xet hash:
8d507a38a4d2d8ffeb20a102cddd1851d45d45a77b7c4999183d8901b6531890

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.