| from typing import Dict, List, Any | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import bitsandbytes as bnb | |
| from accelerate import Accelerator | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| model_id = "DisgustingOzil/Academic-MCQ-Generator" | |
| load_in_4bit = True | |
| max_seq_length = 2048 | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=load_in_4bit) | |
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| input_text = data.pop("input_text",data) | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=1000, | |
| num_return_sequences=1, | |
| ) | |
| output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return output_text |