socialtrait
/

stable-diffusion-xl-base-1.0-infendpoint

@@ -16,40 +16,75 @@ if device.type != "cuda":
 class EndpointHandler:
     def __init__(self, path=""):
         # load StableDiffusionInpaintPipeline pipeline
-        self.pipe = StableDiffusionXLPipeline.from_pretrained(
             path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
         )
         # use DPMSolverMultistepScheduler
-        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
-            self.pipe.scheduler.config
         )
         # move to device
-        self.pipe = self.pipe.to(device)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         """
         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
-        prompt = data.pop("inputs", data)
         # hyperparamters
         num_inference_steps = data.pop("num_inference_steps", 30)
         guidance_scale = data.pop("guidance_scale", 8)
         negative_prompt = data.pop("negative_prompt", None)
         height = data.pop("height", None)
         width = data.pop("width", None)
-        # run inference pipeline
-        out = self.pipe(
-            prompt,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            num_images_per_prompt=1,
-            negative_prompt=negative_prompt,
-            height=height,
-            width=width,
-        )
         # encode image as base 64
         buffered = BytesIO()

 class EndpointHandler:
     def __init__(self, path=""):
         # load StableDiffusionInpaintPipeline pipeline
+        self.base = StableDiffusionXLPipeline.from_pretrained(
             path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
         )
         # use DPMSolverMultistepScheduler
+        self.base.scheduler = DPMSolverMultistepScheduler.from_config(
+            self.base.scheduler.config
         )
         # move to device
+        self.base = self.base.to(device)
+        self.base.unet = torch.compile(self.base.unet, mode="reduce-overhead", fullgraph=True)
+        self.refiner = StableDiffusionXLPipeline.from_pretrained(
+            "socialtrait/stable-diffusion-xl-refiner-1.0-infendpoint",
+            text_encoder_2=self.base.text_encoder_2,
+            vae=self.base.vae,
+            torch_dtype=torch.float16,
+            use_safetensors=True,
+            variant="fp16",
+        )
+        # use DPMSolverMultistepScheduler
+        self.refiner.scheduler = DPMSolverMultistepScheduler.from_config(
+            self.refiner.scheduler.config
+        )
+        self.refiner = self.refiner.to(device)
+        self.refiner.unet = torch.compile(self.refiner.unet, mode="reduce-overhead", fullgraph=True)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         """
         :param data: A dictionary contains `inputs` and optional `image` field.
         :return: A dictionary with `image` field contains image in base64.
         """
+        prompt = data.pop("inputs", None)
+        if prompt is None:
+            return {"error": "Please provide a prompt"}
         # hyperparamters
+        use_refiner = True if data.pop("use_refiner", False) else False
         num_inference_steps = data.pop("num_inference_steps", 30)
         guidance_scale = data.pop("guidance_scale", 8)
         negative_prompt = data.pop("negative_prompt", None)
+        high_noise_frac = data.pop("high_noise_frac", 0.8)
         height = data.pop("height", None)
         width = data.pop("width", None)
+        if use_refiner:
+            image = self.base(
+                prompt=prompt,
+                num_inference_steps=num_inference_steps,
+                denoising_end=high_noise_frac,
+                output_type="latent",
+            ).images
+            out = self.refiner(
+                prompt=prompt,
+                num_inference_steps=num_inference_steps,
+                denoising_start=high_noise_frac,
+                image=image,
+            )
+        else:
+            out = self.pipe(
+                prompt,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                num_images_per_prompt=1,
+                negative_prompt=negative_prompt,
+                height=height,
+                width=width,
+            )
         # encode image as base 64
         buffered = BytesIO()