File size: 1,129 Bytes

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

class EndpointHandler:
    def __init__(self, model_dir: str, **kwargs):
        """
        Initialize the handler. This is required by Hugging Face Inference Endpoints.
        """
        self.model_id = "vrouco/jais-13b-custom"  
        
        # Load the tokenizer and model with trust_remote_code=True
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True)

    def __call__(self, data):
        """
        This function is required to process inference requests.
        """
        prompt = data.get("inputs", "")
        if not prompt:
            return {"error": "No input text provided"}

        input_ids = self.tokenizer(prompt, return_tensors='pt').input_ids

        with torch.no_grad():
            output_ids = self.model.generate(input_ids, max_length=200)

        response_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return {"generated_text": response_text}