cad-llm / handler.py
evanthebouncy's picture
Create new file
fb9232c
import torch
from typing import Dict, List, Any
from transformers import T5ForConditionalGeneration, AutoTokenizer
# check for GPU
# device = 0 if torch.cuda.is_available() else -1
temp = 1.0
def generate_samples_with_temp(tokenizer, model, txts):
to_tokenizer = txts
outputs = model.generate(tokenizer(to_tokenizer, return_tensors='pt', padding=True).input_ids, do_sample=True, max_length=128, temperature = temp)
results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return results
class EndpointHandler():
def __init__(self, path=""):
# load the model
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.model = T5ForConditionalGeneration.from_pretrained(path)
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
inputs = data.pop("inputs", data)
# parameters = data.pop("parameters", None)
return generate_samples_with_temp(self.tokenizer, self.model, inputs)