import torch from typing import Dict, List, Any from transformers import T5ForConditionalGeneration, AutoTokenizer # check for GPU # device = 0 if torch.cuda.is_available() else -1 temp = 1.0 def generate_samples_with_temp(tokenizer, model, txts): to_tokenizer = txts outputs = model.generate(tokenizer(to_tokenizer, return_tensors='pt', padding=True).input_ids, do_sample=True, max_length=128, temperature = temp) results = tokenizer.batch_decode(outputs, skip_special_tokens=True) return results class EndpointHandler(): def __init__(self, path=""): # load the model self.tokenizer = AutoTokenizer.from_pretrained(path) self.model = T5ForConditionalGeneration.from_pretrained(path) def __call__(self, data: Any) -> List[List[Dict[str, float]]]: inputs = data.pop("inputs", data) # parameters = data.pop("parameters", None) return generate_samples_with_temp(self.tokenizer, self.model, inputs)