| import typing | |
| import transformers | |
| class EndpointHandler(): | |
| def __init__(self, path: str = ""): | |
| self.pipeline = transformers.pipeline("text-generation", model=path) | |
| self.pipeline.model.load_adapter(path) | |
| def __call__(self, data: typing.Dict[str, typing.Any]) -> typing.List[typing.Dict[str, typing.Any]]: | |
| """ | |
| data args: | |
| inputs (:obj: `str`) | |
| max_new_tokens (:obj: `int`) | |
| Return: | |
| A :obj:`list` | `dict`: will be serialized and returned | |
| """ | |
| return self.pipeline( | |
| ( | |
| self.pipeline | |
| .tokenizer | |
| .apply_chat_template(data["inputs"], tokenize=False) | |
| ), | |
| max_new_tokens=data["max_new_tokens"], | |
| return_full_text=False | |
| ) |