| import bitsandbytes as bnb | |
| from transformers import (AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| TrainingArguments, | |
| pipeline) | |
| import torch | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_8bit=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| path, | |
| device_map="auto", | |
| torch_dtype="float16", | |
| quantization_config=bnb_config, | |
| ) | |
| model.eval() | |
| tokenizer = AutoTokenizer.from_pretrained(path) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| self.model = model | |
| self.tokenizer = tokenizer | |
| self.pipeline = pipe | |
| def __call__(self, data) : | |
| """ | |
| Args: | |
| data (:obj:): | |
| includes the input data and the parameters for the inference. | |
| Return: | |
| A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing : | |
| - "label": A string representing what the label/class is. There can be multiple labels. | |
| - "score": A score between 0 and 1 describing how confident the model is for this label/class. | |
| """ | |
| inputs = data.pop("inputs", data) | |
| parameters = data.pop("parameters", None) | |
| output = self.pipeline(inputs , **parameters) | |
| return output |