File size: 4,112 Bytes
144afae
 
 
a8246e3
144afae
 
 
 
9de67ae
144afae
 
 
 
 
 
 
 
 
 
 
9de67ae
144afae
 
 
9de67ae
144afae
9de67ae
144afae
 
 
 
 
 
9de67ae
144afae
 
9de67ae
144afae
 
 
 
 
 
9de67ae
144afae
 
 
 
 
9de67ae
a8246e3
 
 
 
 
 
 
 
9de67ae
a8246e3
 
 
9de67ae
a8246e3
 
 
 
9de67ae
a8246e3
9de67ae
a8246e3
 
9de67ae
a8246e3
 
9de67ae
a8246e3
03bafc0
a8246e3
 
 
 
 
 
 
 
 
 
 
 
 
 
9de67ae
a8246e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9de67ae
 
a8246e3
 
 
 
 
 
 
144afae
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
import numpy as np
from PIL import Image
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionXLInpaintPipeline

class SDInpainter:
    def __init__(self, model_id="runwayml/stable-diffusion-inpainting"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
        ).to(self.device)
        
        if self.device == "cuda":
            self.pipe.enable_model_cpu_offload()
    
    def inpaint(self, image, mask, prompt="background"):
        pil_image = Image.fromarray(image).convert('RGB')
        

        mask = self._dilate_mask(mask)
        pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L')
        

        w, h = pil_image.size
        factor = 512 / max(w, h)
        new_w = int(w * factor) - (int(w * factor) % 8)
        new_h = int(h * factor) - (int(h * factor) % 8)
        
        resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS)
        resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST)
        

        output = self.pipe(
            prompt=prompt,
            negative_prompt="artifacts, low quality, distortion, object",
            image=resized_image,
            mask_image=resized_mask,
            num_inference_steps=30,
            guidance_scale=7.5,
        ).images[0]
        

        result = output.resize((w, h), Image.LANCZOS)
        
        return np.array(result)
    
    def _dilate_mask(self, mask, kernel_size=9): 

        import cv2
        kernel = np.ones((kernel_size, kernel_size), np.uint8)
        return cv2.dilate(mask, kernel, iterations=1)


class SDXLInpainter:
    def __init__(self, model_id="diffusers/stable-diffusion-xl-1.0-inpainting-0.1"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            variant="fp16",
            use_safetensors=True
        ).to(self.device)
        
        if self.device == "cuda":
            self.pipe.enable_model_cpu_offload()
    
    def inpaint(self, image, mask, prompt=""):
        pil_image = Image.fromarray(image).convert('RGB')
        

        mask = self._dilate_mask(mask, kernel_size=15)
        

        import cv2
        mask = cv2.GaussianBlur(mask, (21, 21), 0)
        
        pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L')
        
        w, h = pil_image.size
        target_size = 1024
        scale = target_size / max(w, h)
        new_w = int(w * scale) - (int(w * scale) % 8)
        new_h = int(h * scale) - (int(h * scale) % 8)
        
        resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS)
        resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST)
        
        if not prompt or prompt == "background":
            final_prompt = "clean background, empty space, seamless texture, high quality"

            guidance_scale = 4.5 
        else:
            final_prompt = prompt
            guidance_scale = 7.5

        neg_prompt = (
            "object, subject, person, animal, cat, dog, "
            "glass, transparent, crystal, bottle, cup, reflection, "
            "complex, 3d render, artifacts, shadow, distortion, blur, watermark"
        )

        output = self.pipe(
            prompt=final_prompt,
            negative_prompt=neg_prompt,
            image=resized_image,
            mask_image=resized_mask,
            num_inference_steps=40,
            guidance_scale=guidance_scale,
            strength=0.99,
        ).images[0]
        
        result = output.resize((w, h), Image.LANCZOS)
        
        return np.array(result)
    
    def _dilate_mask(self, mask, kernel_size=15):
        import cv2
        kernel = np.ones((kernel_size, kernel_size), np.uint8)
        return cv2.dilate(mask, kernel, iterations=1)