# handler.py from transformers import AutoModelForCausalLM, AutoTokenizer class EndpointHandler: def __init__(self, path=""): self.tokenizer = AutoTokenizer.from_pretrained(path) self.model = AutoModelForCausalLM.from_pretrained(path) def __call__(self, data): prompt = data["inputs"] inputs = self.tokenizer(prompt, return_tensors="pt") outputs = self.model.generate(**inputs, max_new_tokens=100) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) return {"prediction": response}