| from typing import Dict, List, Any | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| class EndpointHandler: | |
| def __init__(self, path=""): | |
| model_id = "DisgustingOzil/Academic-ShortQA-Generator" | |
| load_in_4bit = True | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| self.model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=load_in_4bit) | |
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| input_text = data.pop("input_text", data) | |
| inputs = self.tokenizer(input_text, return_tensors="pt") | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_length=1000, | |
| num_return_sequences=1, | |
| ) | |
| output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return [{"generated_text": output_text}] |