| from typing import Dict, List, Any | |
| import torch | |
| from torch import autocast | |
| from diffusers import StableDiffusionPipeline | |
| import base64 | |
| from io import BytesIO | |
| # set device | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| if device.type != 'cuda': | |
| raise ValueError("need to run on GPU") | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| # load the optimized model | |
| self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16) | |
| self.pipe = self.pipe.to(device) | |
| def __call__(self, data: Any) -> "PIL.Image": | |
| """ | |
| Args: | |
| data (:obj:): | |
| includes the input data and the parameters for the inference. | |
| Return: | |
| A :obj:`dict`:. base64 encoded image | |
| """ | |
| inputs = data.pop("inputs", data) | |
| # run inference pipeline | |
| with autocast(device.type): | |
| image = self.pipe(inputs, guidance_scale=7.5)["sample"][0] | |
| # encoding image as base 64 is done by the default toolkit | |
| return image | |