Spaces:
Running
Running
File size: 1,030 Bytes
ce90801 7c5b118 ce90801 7c5b118 ce90801 5a731e7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | # src/embeddings/instructor_embeddings.py
from sentence_transformers import SentenceTransformer #
import os
from pathlib import Path
class InstructorEmbeddings:
def __init__(self, model_name="hkunlp/instructor-large", cache_dir="./data/embeddings/cache"):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
# Silenciar warning sobre tied weights que es inofensivo
import warnings
warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
# HF Spaces descargará automáticamente el modelo
self.model = SentenceTransformer(
model_name,
cache_folder=str(self.cache_dir)
)
def encode(self, texts, instruction="", **kwargs):
if instruction:
texts_with_instruction = [[instruction, text] for text in texts]
return self.model.encode(texts_with_instruction, **kwargs)
return self.model.encode(texts, **kwargs) |