| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| class EndpointHandler: | |
| def __init__(self, model_dir="/opt/huggingface/model"): | |
| """ | |
| model_dir: путь к директории с моделью из artifact_uri | |
| """ | |
| self.device = 0 if torch.cuda.is_available() else -1 | |
| # Загрузка модели из локальной директории (GCS) | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| model_dir, | |
| trust_remote_code=True | |
| ) | |
| self.model = AutoModel.from_pretrained( | |
| model_dir, | |
| trust_remote_code=True | |
| ) | |
| if self.device >= 0: | |
| self.model = self.model.cuda(self.device) | |
| self.model.eval() | |
| def __call__(self, data): | |
| """ | |
| data: словарь с ключом 'inputs' или список строк | |
| """ | |
| if isinstance(data, dict): | |
| inputs = data.get("inputs", data.get("input", "")) | |
| else: | |
| inputs = data | |
| # Токенизация | |
| encoded = self.tokenizer( | |
| inputs, | |
| padding=True, | |
| truncation=True, | |
| max_length=4096, | |
| return_tensors="pt" | |
| ) | |
| if self.device >= 0: | |
| encoded = {k: v.cuda(self.device) for k, v in encoded.items()} | |
| # Генерация эмбеддингов | |
| with torch.no_grad(): | |
| outputs = self.model(**encoded) | |
| # Используем [CLS] токен (первый токен) | |
| embeddings = outputs.last_hidden_state[:, 0].cpu().numpy() | |
| return {"embeddings": embeddings.tolist()} | |