codeweaver-ai / CodeWeaver /src /vector_db /local_embeddings.py
ใ……ใ…Žใ…‡
Add CodeWeaver Gradio app
515f392
"""
๋กœ์ปฌ ์ž„๋ฒ ๋”ฉ ๊ด€๋ฆฌ ๋ชจ๋“ˆ.
BAAI/bge-m3 ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ด ๋กœ์ปฌ์—์„œ ์ž„๋ฒ ๋”ฉ์„ ์ƒ์„ฑํ•œ๋‹ค.
"""
import logging
from typing import List
from sentence_transformers import SentenceTransformer
logger = logging.getLogger(__name__)
class LocalEmbeddingManager:
"""BAAI/bge-m3 ๋กœ์ปฌ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ๊ธฐ."""
def __init__(self, model_name: str = "BAAI/bge-m3") -> None:
logger.info("๋กœ์ปฌ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: %s", model_name)
self.model = SentenceTransformer(model_name)
dim = self.model.get_sentence_embedding_dimension()
logger.info("๋กœ์ปฌ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ (์ฐจ์›: %d)", dim)
def get_embedding(self, text: str) -> List[float]:
"""๋‹จ์ผ ํ…์ŠคํŠธ๋ฅผ ์ž„๋ฒ ๋”ฉ."""
embedding = self.model.encode(text, convert_to_numpy=True)
return embedding.tolist()
def get_embeddings_batch(self, texts: List[str]) -> List[List[float]]:
"""๋ฐฐ์น˜ ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ."""
embeddings = self.model.encode(texts, convert_to_numpy=True)
return embeddings.tolist()