Spaces:
Running
Running
| # src/embeddings/instructor_embeddings.py | |
| from sentence_transformers import SentenceTransformer # | |
| import os | |
| from pathlib import Path | |
| class InstructorEmbeddings: | |
| def __init__(self, model_name="hkunlp/instructor-large", cache_dir="./data/embeddings/cache"): | |
| self.cache_dir = Path(cache_dir) | |
| self.cache_dir.mkdir(parents=True, exist_ok=True) | |
| # Silenciar warning sobre tied weights que es inofensivo | |
| import warnings | |
| warnings.filterwarnings('ignore', message='.*tied weights mapping.*') | |
| # HF Spaces descargará automáticamente el modelo | |
| self.model = SentenceTransformer( | |
| model_name, | |
| cache_folder=str(self.cache_dir) | |
| ) | |
| def encode(self, texts, instruction="", **kwargs): | |
| if instruction: | |
| texts_with_instruction = [[instruction, text] for text in texts] | |
| return self.model.encode(texts_with_instruction, **kwargs) | |
| return self.model.encode(texts, **kwargs) |