from transformers import AutoModelForCausalLM, AutoTokenizer import torch class EndpointHandler: def __init__(self, model_dir: str, **kwargs): """ Initialize the handler. This is required by Hugging Face Inference Endpoints. """ self.model_id = "vrouco/jais-13b-custom" # Load the tokenizer and model with trust_remote_code=True self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True) def __call__(self, data): """ This function is required to process inference requests. """ prompt = data.get("inputs", "") if not prompt: return {"error": "No input text provided"} input_ids = self.tokenizer(prompt, return_tensors='pt').input_ids with torch.no_grad(): output_ids = self.model.generate(input_ids, max_length=200) response_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) return {"generated_text": response_text}