Spaces:
Sleeping
Sleeping
| """ | |
| ๋ก์ปฌ ์๋ฒ ๋ฉ ๊ด๋ฆฌ ๋ชจ๋. | |
| BAAI/bge-m3 ๋ชจ๋ธ์ ์ฌ์ฉํด ๋ก์ปฌ์์ ์๋ฒ ๋ฉ์ ์์ฑํ๋ค. | |
| """ | |
| import logging | |
| from typing import List | |
| from sentence_transformers import SentenceTransformer | |
| logger = logging.getLogger(__name__) | |
| class LocalEmbeddingManager: | |
| """BAAI/bge-m3 ๋ก์ปฌ ์๋ฒ ๋ฉ ์์ฑ๊ธฐ.""" | |
| def __init__(self, model_name: str = "BAAI/bge-m3") -> None: | |
| logger.info("๋ก์ปฌ ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์ค: %s", model_name) | |
| self.model = SentenceTransformer(model_name) | |
| dim = self.model.get_sentence_embedding_dimension() | |
| logger.info("๋ก์ปฌ ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ (์ฐจ์: %d)", dim) | |
| def get_embedding(self, text: str) -> List[float]: | |
| """๋จ์ผ ํ ์คํธ๋ฅผ ์๋ฒ ๋ฉ.""" | |
| embedding = self.model.encode(text, convert_to_numpy=True) | |
| return embedding.tolist() | |
| def get_embeddings_batch(self, texts: List[str]) -> List[List[float]]: | |
| """๋ฐฐ์น ํ ์คํธ ์๋ฒ ๋ฉ.""" | |
| embeddings = self.model.encode(texts, convert_to_numpy=True) | |
| return embeddings.tolist() | |