Spaces:

lxzcpro
/

TextEraser

Sleeping

File size: 4,112 Bytes

144afae
 
 
a8246e3
144afae
 
 
 
9de67ae
144afae
 
 
 
 
 
 
 
 
 
 
9de67ae
144afae
 
 
9de67ae
144afae
9de67ae
144afae
 
 
 
 
 
9de67ae
144afae
 
9de67ae
144afae
 
 
 
 
 
9de67ae
144afae
 
 
 
 
9de67ae
a8246e3
 
 
 
 
 
 
 
9de67ae
a8246e3
 
 
9de67ae
a8246e3
 
 
 
9de67ae
a8246e3
9de67ae
a8246e3
 
9de67ae
a8246e3
 
9de67ae
a8246e3
03bafc0
a8246e3
 
 
 
 
 
 
 
 
 
 
 
 
 
9de67ae
a8246e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9de67ae
 
a8246e3
 
 
 
 
 
 
144afae

import torch
import numpy as np
from PIL import Image
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionXLInpaintPipeline

class SDInpainter:
    def __init__(self, model_id="runwayml/stable-diffusion-inpainting"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
        ).to(self.device)
        
        if self.device == "cuda":
            self.pipe.enable_model_cpu_offload()
    
    def inpaint(self, image, mask, prompt="background"):
        pil_image = Image.fromarray(image).convert('RGB')
        

        mask = self._dilate_mask(mask)
        pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L')
        

        w, h = pil_image.size
        factor = 512 / max(w, h)
        new_w = int(w * factor) - (int(w * factor) % 8)
        new_h = int(h * factor) - (int(h * factor) % 8)
        
        resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS)
        resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST)
        

        output = self.pipe(
            prompt=prompt,
            negative_prompt="artifacts, low quality, distortion, object",
            image=resized_image,
            mask_image=resized_mask,
            num_inference_steps=30,
            guidance_scale=7.5,
        ).images[0]
        

        result = output.resize((w, h), Image.LANCZOS)
        
        return np.array(result)
    
    def _dilate_mask(self, mask, kernel_size=9): 

        import cv2
        kernel = np.ones((kernel_size, kernel_size), np.uint8)
        return cv2.dilate(mask, kernel, iterations=1)


class SDXLInpainter:
    def __init__(self, model_id="diffusers/stable-diffusion-xl-1.0-inpainting-0.1"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            variant="fp16",
            use_safetensors=True
        ).to(self.device)
        
        if self.device == "cuda":
            self.pipe.enable_model_cpu_offload()
    
    def inpaint(self, image, mask, prompt=""):
        pil_image = Image.fromarray(image).convert('RGB')
        

        mask = self._dilate_mask(mask, kernel_size=15)
        

        import cv2
        mask = cv2.GaussianBlur(mask, (21, 21), 0)
        
        pil_mask = Image.fromarray((mask * 255).astype(np.uint8)).convert('L')
        
        w, h = pil_image.size
        target_size = 1024
        scale = target_size / max(w, h)
        new_w = int(w * scale) - (int(w * scale) % 8)
        new_h = int(h * scale) - (int(h * scale) % 8)
        
        resized_image = pil_image.resize((new_w, new_h), Image.LANCZOS)
        resized_mask = pil_mask.resize((new_w, new_h), Image.NEAREST)
        
        if not prompt or prompt == "background":
            final_prompt = "clean background, empty space, seamless texture, high quality"

            guidance_scale = 4.5 
        else:
            final_prompt = prompt
            guidance_scale = 7.5

        neg_prompt = (
            "object, subject, person, animal, cat, dog, "
            "glass, transparent, crystal, bottle, cup, reflection, "
            "complex, 3d render, artifacts, shadow, distortion, blur, watermark"
        )

        output = self.pipe(
            prompt=final_prompt,
            negative_prompt=neg_prompt,
            image=resized_image,
            mask_image=resized_mask,
            num_inference_steps=40,
            guidance_scale=guidance_scale,
            strength=0.99,
        ).images[0]
        
        result = output.resize((w, h), Image.LANCZOS)
        
        return np.array(result)
    
    def _dilate_mask(self, mask, kernel_size=15):
        import cv2
        kernel = np.ones((kernel_size, kernel_size), np.uint8)
        return cv2.dilate(mask, kernel, iterations=1)