from transformers import AutoModel, AutoTokenizer import torch class EndpointHandler: def __init__(self, model_dir): self.model = None self.tokenizer = None self.model_dir = model_dir def __call__(self, data): # Initialize model if not already initialized if self.model is None: self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) self.model = AutoModel.from_pretrained(self.model_dir) # Process input data inputs = data.get("inputs") if isinstance(inputs, str): inputs = [inputs] # Tokenize and get model outputs encoded_input = self.tokenizer(inputs, return_tensors="pt", padding=True, truncation=True) with torch.no_grad(): outputs = self.model(**encoded_input) # Return the last hidden state return {"outputs": outputs.last_hidden_state.tolist()}