File size: 1,030 Bytes
ce90801
 
 
 
 
 
 
 
 
 
7c5b118
 
 
ce90801
 
 
 
7c5b118
ce90801
 
 
 
 
 
5a731e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# src/embeddings/instructor_embeddings.py
from sentence_transformers import SentenceTransformer #
import os
from pathlib import Path

class InstructorEmbeddings:
    def __init__(self, model_name="hkunlp/instructor-large", cache_dir="./data/embeddings/cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
        # Silenciar warning sobre tied weights que es inofensivo
        import warnings
        warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
        
        # HF Spaces descargará automáticamente el modelo
        self.model = SentenceTransformer(
            model_name,
            cache_folder=str(self.cache_dir)
        )
    
    def encode(self, texts, instruction="", **kwargs):
        if instruction:
            texts_with_instruction = [[instruction, text] for text in texts]
            return self.model.encode(texts_with_instruction, **kwargs)
        return self.model.encode(texts, **kwargs)