Spaces:
Sleeping
Sleeping
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| from diffusers import StableDiffusionInpaintPipeline, StableDiffusionXLInpaintPipeline | |
| class SDInpainter: | |
| def __init__(self, model_id="runwayml/stable-diffusion-inpainting"): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.pipe = StableDiffusionInpaintPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, | |
| ).to(self.device) | |
| if self.device == "cuda": | |
| self.pipe.enable_model_cpu_offload() | |
| def inpaint(self, image, mask, prompt="background"): | |
| pil_image = Image.fromarray(image).convert('RGB') | |
| mask = self._dilate_mask(mask) | |
| pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L') | |
| w, h = pil_image.size | |
| factor = 512 / max(w, h) | |
| new_w = int(w * factor) - (int(w * factor) % 8) | |
| new_h = int(h * factor) - (int(h * factor) % 8) | |
| resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS) | |
| resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST) | |
| output = self.pipe( | |
| prompt=prompt, | |
| negative_prompt="artifacts, low quality, distortion, object", | |
| image=resized_image, | |
| mask_image=resized_mask, | |
| num_inference_steps=30, | |
| guidance_scale=7.5, | |
| ).images[0] | |
| result = output.resize((w, h), Image.LANCZOS) | |
| return np.array(result) | |
| def _dilate_mask(self, mask, kernel_size=9): | |
| import cv2 | |
| kernel = np.ones((kernel_size, kernel_size), np.uint8) | |
| return cv2.dilate(mask, kernel, iterations=1) | |
| class SDXLInpainter: | |
| def __init__(self, model_id="diffusers/stable-diffusion-xl-1.0-inpainting-0.1"): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.pipe = StableDiffusionXLInpaintPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, | |
| variant="fp16", | |
| use_safetensors=True | |
| ).to(self.device) | |
| if self.device == "cuda": | |
| self.pipe.enable_model_cpu_offload() | |
| def inpaint(self, image, mask, prompt=""): | |
| pil_image = Image.fromarray(image).convert('RGB') | |
| mask = self._dilate_mask(mask, kernel_size=15) | |
| import cv2 | |
| mask = cv2.GaussianBlur(mask, (21, 21), 0) | |
| pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L') | |
| w, h = pil_image.size | |
| target_size = 1024 | |
| scale = target_size / max(w, h) | |
| new_w = int(w * scale) - (int(w * scale) % 8) | |
| new_h = int(h * scale) - (int(h * scale) % 8) | |
| resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS) | |
| resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST) | |
| if not prompt or prompt == "background": | |
| final_prompt = "clean background, empty space, seamless texture, high quality" | |
| guidance_scale = 4.5 | |
| else: | |
| final_prompt = prompt | |
| guidance_scale = 7.5 | |
| neg_prompt = ( | |
| "object, subject, person, animal, cat, dog, " | |
| "glass, transparent, crystal, bottle, cup, reflection, " | |
| "complex, 3d render, artifacts, shadow, distortion, blur, watermark" | |
| ) | |
| output = self.pipe( | |
| prompt=final_prompt, | |
| negative_prompt=neg_prompt, | |
| image=resized_image, | |
| mask_image=resized_mask, | |
| num_inference_steps=40, | |
| guidance_scale=guidance_scale, | |
| strength=0.99, | |
| ).images[0] | |
| result = output.resize((w, h), Image.LANCZOS) | |
| return np.array(result) | |
| def _dilate_mask(self, mask, kernel_size=15): | |
| import cv2 | |
| kernel = np.ones((kernel_size, kernel_size), np.uint8) | |
| return cv2.dilate(mask, kernel, iterations=1) |