Spaces:
Configuration error
Configuration error
| import argparse | |
| import os | |
| import random | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from controlnet_aux import HEDdetector, OpenposeDetector | |
| from PIL import Image, ImageFilter | |
| from safetensors.torch import load_model | |
| from transformers import CLIPTextModel, DPTFeatureExtractor, DPTForDepthEstimation | |
| from diffusers import UniPCMultistepScheduler | |
| from diffusers.pipelines.controlnet.pipeline_controlnet import ControlNetModel | |
| from PowerPaint.powerpaint.models.BrushNet_CA import BrushNetModel | |
| from PowerPaint.powerpaint.models.unet_2d_condition import UNet2DConditionModel | |
| from PowerPaint.powerpaint.pipelines.pipeline_PowerPaint import StableDiffusionInpaintPipeline as Pipeline | |
| from PowerPaint.powerpaint.pipelines.pipeline_PowerPaint_Brushnet_CA import StableDiffusionPowerPaintBrushNetPipeline | |
| from PowerPaint.powerpaint.pipelines.pipeline_PowerPaint_ControlNet import ( | |
| StableDiffusionControlNetInpaintPipeline as controlnetPipeline, | |
| ) | |
| from PowerPaint.powerpaint.utils.utils import TokenizerWrapper, add_tokens | |
| torch.set_grad_enabled(False) | |
| weight_dtype = torch.float16 | |
| def set_seed(seed): | |
| torch.manual_seed(seed) | |
| torch.cuda.manual_seed(seed) | |
| torch.cuda.manual_seed_all(seed) | |
| np.random.seed(seed) | |
| random.seed(seed) | |
| def add_task(prompt, negative_prompt, control_type, version): | |
| pos_prefix = neg_prefix = "" | |
| if control_type == "object-removal" or control_type == "image-outpainting": | |
| if version == "ppt-v1": | |
| pos_prefix = "empty scene blur " + prompt | |
| neg_prefix = negative_prompt | |
| promptA = pos_prefix + " P_ctxt" | |
| promptB = pos_prefix + " P_ctxt" | |
| negative_promptA = neg_prefix + " P_obj" | |
| negative_promptB = neg_prefix + " P_obj" | |
| elif control_type == "shape-guided": | |
| if version == "ppt-v1": | |
| pos_prefix = prompt | |
| neg_prefix = negative_prompt + ", worst quality, low quality, normal quality, bad quality, blurry " | |
| promptA = pos_prefix + " P_shape" | |
| promptB = pos_prefix + " P_ctxt" | |
| negative_promptA = neg_prefix + "P_shape" | |
| negative_promptB = neg_prefix + "P_ctxt" | |
| else: | |
| if version == "ppt-v1": | |
| pos_prefix = prompt | |
| neg_prefix = negative_prompt + ", worst quality, low quality, normal quality, bad quality, blurry " | |
| promptA = pos_prefix + " P_obj" | |
| promptB = pos_prefix + " P_obj" | |
| negative_promptA = neg_prefix + "P_obj" | |
| negative_promptB = neg_prefix + "P_obj" | |
| return promptA, promptB, negative_promptA, negative_promptB | |
| def select_tab_text_guided(): | |
| return "text-guided" | |
| def select_tab_object_removal(): | |
| return "object-removal" | |
| def select_tab_image_outpainting(): | |
| return "image-outpainting" | |
| def select_tab_shape_guided(): | |
| return "shape-guided" | |
| class PowerPaintController: | |
| def __init__(self, weight_dtype, checkpoint_dir, local_files_only, version) -> None: | |
| self.version = version | |
| self.checkpoint_dir = checkpoint_dir | |
| self.local_files_only = local_files_only | |
| # initialize powerpaint pipeline | |
| if version == "ppt-v1": | |
| self.pipe = Pipeline.from_pretrained( | |
| "runwayml/stable-diffusion-inpainting", torch_dtype=weight_dtype, local_files_only=local_files_only | |
| ) | |
| self.pipe.tokenizer = TokenizerWrapper( | |
| from_pretrained="runwayml/stable-diffusion-v1-5", | |
| subfolder="tokenizer", | |
| revision=None, | |
| local_files_only=local_files_only, | |
| ) | |
| # add learned task tokens into the tokenizer | |
| add_tokens( | |
| tokenizer=self.pipe.tokenizer, | |
| text_encoder=self.pipe.text_encoder, | |
| placeholder_tokens=["P_ctxt", "P_shape", "P_obj"], | |
| initialize_tokens=["a", "a", "a"], | |
| num_vectors_per_token=10, | |
| ) | |
| # loading pre-trained weights | |
| load_model(self.pipe.unet, os.path.join(checkpoint_dir, "unet/unet.safetensors")) | |
| load_model(self.pipe.text_encoder, os.path.join(checkpoint_dir, "text_encoder/text_encoder.safetensors")) | |
| self.pipe = self.pipe.to("cuda") | |
| # initialize controlnet-related models | |
| self.depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda") | |
| self.feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas") | |
| self.openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") | |
| self.hed = HEDdetector.from_pretrained("lllyasviel/ControlNet") | |
| base_control = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-canny", torch_dtype=weight_dtype, local_files_only=local_files_only | |
| ) | |
| self.control_pipe = controlnetPipeline( | |
| self.pipe.vae, | |
| self.pipe.text_encoder, | |
| self.pipe.tokenizer, | |
| self.pipe.unet, | |
| base_control, | |
| self.pipe.scheduler, | |
| None, | |
| None, | |
| False, | |
| ) | |
| self.control_pipe = self.control_pipe.to("cuda") | |
| self.current_control = "canny" | |
| # controlnet_conditioning_scale = 0.8 | |
| else: | |
| # brushnet-based version | |
| unet = UNet2DConditionModel.from_pretrained( | |
| "runwayml/stable-diffusion-v1-5", | |
| subfolder="unet", | |
| revision=None, | |
| torch_dtype=weight_dtype, | |
| local_files_only=local_files_only, | |
| ) | |
| text_encoder_brushnet = CLIPTextModel.from_pretrained( | |
| "runwayml/stable-diffusion-v1-5", | |
| subfolder="text_encoder", | |
| revision=None, | |
| torch_dtype=weight_dtype, | |
| local_files_only=local_files_only, | |
| ) | |
| brushnet = BrushNetModel.from_unet(unet) | |
| base_model_path = os.path.join(checkpoint_dir, "realisticVisionV60B1_v51VAE") | |
| self.pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained( | |
| base_model_path, | |
| brushnet=brushnet, | |
| text_encoder_brushnet=text_encoder_brushnet, | |
| torch_dtype=weight_dtype, | |
| low_cpu_mem_usage=False, | |
| safety_checker=None, | |
| ) | |
| self.pipe.unet = UNet2DConditionModel.from_pretrained( | |
| base_model_path, | |
| subfolder="unet", | |
| revision=None, | |
| torch_dtype=weight_dtype, | |
| local_files_only=local_files_only, | |
| ) | |
| self.pipe.tokenizer = TokenizerWrapper( | |
| from_pretrained=base_model_path, | |
| subfolder="tokenizer", | |
| revision=None, | |
| torch_type=weight_dtype, | |
| local_files_only=local_files_only, | |
| ) | |
| # add learned task tokens into the tokenizer | |
| add_tokens( | |
| tokenizer=self.pipe.tokenizer, | |
| text_encoder=self.pipe.text_encoder_brushnet, | |
| placeholder_tokens=["P_ctxt", "P_shape", "P_obj"], | |
| initialize_tokens=["a", "a", "a"], | |
| num_vectors_per_token=10, | |
| ) | |
| load_model( | |
| self.pipe.brushnet, | |
| os.path.join(checkpoint_dir, "PowerPaint_Brushnet/diffusion_pytorch_model.safetensors"), | |
| ) | |
| self.pipe.text_encoder_brushnet.load_state_dict( | |
| torch.load(os.path.join(checkpoint_dir, "PowerPaint_Brushnet/pytorch_model.bin")), strict=False | |
| ) | |
| self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config) | |
| self.pipe.enable_model_cpu_offload() | |
| self.pipe = self.pipe.to("cuda") | |
| def get_depth_map(self, image): | |
| image = self.feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda") | |
| with torch.no_grad(), torch.autocast("cuda"): | |
| depth_map = self.depth_estimator(image).predicted_depth | |
| depth_map = torch.nn.functional.interpolate( | |
| depth_map.unsqueeze(1), | |
| size=(1024, 1024), | |
| mode="bicubic", | |
| align_corners=False, | |
| ) | |
| depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True) | |
| depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True) | |
| depth_map = (depth_map - depth_min) / (depth_max - depth_min) | |
| image = torch.cat([depth_map] * 3, dim=1) | |
| image = image.permute(0, 2, 3, 1).cpu().numpy()[0] | |
| image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8)) | |
| return image | |
| def load_controlnet(self, control_type): | |
| if self.current_control != control_type: | |
| if control_type == "canny" or control_type is None: | |
| self.control_pipe.controlnet = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-canny", torch_dtype=weight_dtype, local_files_only=self.local_files_only | |
| ) | |
| elif control_type == "pose": | |
| self.control_pipe.controlnet = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-openpose", | |
| torch_dtype=weight_dtype, | |
| local_files_only=self.local_files_only, | |
| ) | |
| elif control_type == "depth": | |
| self.control_pipe.controlnet = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-depth", torch_dtype=weight_dtype, local_files_only=self.local_files_only | |
| ) | |
| else: | |
| self.control_pipe.controlnet = ControlNetModel.from_pretrained( | |
| "lllyasviel/sd-controlnet-hed", torch_dtype=weight_dtype, local_files_only=self.local_files_only | |
| ) | |
| self.control_pipe = self.control_pipe.to("cuda") | |
| self.current_control = control_type | |
| def predict( | |
| self, | |
| input_image, | |
| prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| negative_prompt, | |
| task, | |
| vertical_expansion_ratio, | |
| horizontal_expansion_ratio, | |
| ): | |
| input_image['image'] = input_image['background'] | |
| input_image['mask'] = input_image['layers'][-1] | |
| size1, size2 = input_image["image"].convert("RGB").size | |
| if task != "image-outpainting": | |
| if size1 < size2: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((640, int(size2 / size1 * 640))) | |
| else: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((int(size1 / size2 * 640), 640)) | |
| else: | |
| if size1 < size2: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((512, int(size2 / size1 * 512))) | |
| else: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((int(size1 / size2 * 512), 512)) | |
| if vertical_expansion_ratio is not None and horizontal_expansion_ratio is not None: | |
| o_W, o_H = input_image["image"].convert("RGB").size | |
| c_W = int(horizontal_expansion_ratio * o_W) | |
| c_H = int(vertical_expansion_ratio * o_H) | |
| expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127 | |
| original_img = np.array(input_image["image"]) | |
| expand_img[ | |
| int((c_H - o_H) / 2.0) : int((c_H - o_H) / 2.0) + o_H, | |
| int((c_W - o_W) / 2.0) : int((c_W - o_W) / 2.0) + o_W, | |
| :, | |
| ] = original_img | |
| blurry_gap = 10 | |
| expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255 | |
| if vertical_expansion_ratio == 1 and horizontal_expansion_ratio != 1: | |
| expand_mask[ | |
| int((c_H - o_H) / 2.0) : int((c_H - o_H) / 2.0) + o_H, | |
| int((c_W - o_W) / 2.0) + blurry_gap : int((c_W - o_W) / 2.0) + o_W - blurry_gap, | |
| :, | |
| ] = 0 | |
| elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio != 1: | |
| expand_mask[ | |
| int((c_H - o_H) / 2.0) + blurry_gap : int((c_H - o_H) / 2.0) + o_H - blurry_gap, | |
| int((c_W - o_W) / 2.0) + blurry_gap : int((c_W - o_W) / 2.0) + o_W - blurry_gap, | |
| :, | |
| ] = 0 | |
| elif vertical_expansion_ratio != 1 and horizontal_expansion_ratio == 1: | |
| expand_mask[ | |
| int((c_H - o_H) / 2.0) + blurry_gap : int((c_H - o_H) / 2.0) + o_H - blurry_gap, | |
| int((c_W - o_W) / 2.0) : int((c_W - o_W) / 2.0) + o_W, | |
| :, | |
| ] = 0 | |
| input_image["image"] = Image.fromarray(expand_img) | |
| input_image["mask"] = Image.fromarray(expand_mask) | |
| if self.version != "ppt-v1": | |
| if task == "image-outpainting": | |
| prompt = prompt + " empty scene" | |
| if task == "object-removal": | |
| prompt = prompt + " empty scene blur" | |
| promptA, promptB, negative_promptA, negative_promptB = add_task(prompt, negative_prompt, task, self.version) | |
| print(promptA, promptB, negative_promptA, negative_promptB) | |
| img = np.array(input_image["image"].convert("RGB")) | |
| W = int(np.shape(img)[0] - np.shape(img)[0] % 8) | |
| H = int(np.shape(img)[1] - np.shape(img)[1] % 8) | |
| input_image["image"] = input_image["image"].resize((H, W)) | |
| input_image["mask"] = input_image["mask"].resize((H, W)) | |
| set_seed(seed) | |
| if self.version == "ppt-v1": | |
| # for sd-inpainting based method | |
| result = self.pipe( | |
| promptA=promptA, | |
| promptB=promptB, | |
| tradoff=fitting_degree, | |
| tradoff_nag=fitting_degree, | |
| negative_promptA=negative_promptA, | |
| negative_promptB=negative_promptB, | |
| image=input_image["image"].convert("RGB"), | |
| mask=input_image["mask"].convert("RGB"), | |
| width=H, | |
| height=W, | |
| guidance_scale=scale, | |
| num_inference_steps=ddim_steps, | |
| ).images[0] | |
| else: | |
| # for brushnet-based method | |
| np_inpimg = np.array(input_image["image"]) | |
| np_inmask = np.array(input_image["mask"]) / 255.0 | |
| np_inpimg = np_inpimg * (1 - np_inmask) | |
| input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB") | |
| result = self.pipe( | |
| promptA=promptA, | |
| promptB=promptB, | |
| promptU=prompt, | |
| tradoff=fitting_degree, | |
| tradoff_nag=fitting_degree, | |
| image=input_image["image"].convert("RGB"), | |
| mask=input_image["mask"].convert("RGB"), | |
| num_inference_steps=ddim_steps, | |
| generator=torch.Generator("cuda").manual_seed(seed), | |
| brushnet_conditioning_scale=1.0, | |
| negative_promptA=negative_promptA, | |
| negative_promptB=negative_promptB, | |
| negative_promptU=negative_prompt, | |
| guidance_scale=scale, | |
| width=H, | |
| height=W, | |
| ).images[0] | |
| mask_np = np.array(input_image["mask"].convert("RGB")) | |
| red = np.array(result).astype("float") * 1 | |
| red[:, :, 0] = 180.0 | |
| red[:, :, 2] = 0 | |
| red[:, :, 1] = 0 | |
| result_m = np.array(result) | |
| result_m = Image.fromarray( | |
| ( | |
| result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) | |
| + mask_np.astype("float") / 512.0 * red | |
| ).astype("uint8") | |
| ) | |
| m_img = input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3)) | |
| m_img = np.asarray(m_img) / 255.0 | |
| img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0 | |
| ours_np = np.asarray(result) / 255.0 | |
| ours_np = ours_np * m_img + (1 - m_img) * img_np | |
| dict_res = [input_image["mask"].convert("RGB"), result_m] | |
| # result_paste = Image.fromarray(np.uint8(ours_np * 255)) | |
| # dict_out = [input_image["image"].convert("RGB"), result_paste] | |
| dict_out = [result] | |
| return dict_out, dict_res | |
| def predict_controlnet( | |
| self, | |
| input_image, | |
| input_control_image, | |
| control_type, | |
| prompt, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| negative_prompt, | |
| controlnet_conditioning_scale, | |
| ): | |
| promptA = prompt + " P_obj" | |
| promptB = prompt + " P_obj" | |
| negative_promptA = negative_prompt | |
| negative_promptB = negative_prompt | |
| input_image['image'] = input_image['background'] | |
| input_image['mask'] = input_image['layers'][-1] | |
| size1, size2 = input_image["image"].convert("RGB").size | |
| if size1 < size2: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((640, int(size2 / size1 * 640))) | |
| else: | |
| input_image["image"] = input_image["image"].convert("RGB").resize((int(size1 / size2 * 640), 640)) | |
| img = np.array(input_image["image"].convert("RGB")) | |
| W = int(np.shape(img)[0] - np.shape(img)[0] % 8) | |
| H = int(np.shape(img)[1] - np.shape(img)[1] % 8) | |
| input_image["image"] = input_image["image"].resize((H, W)) | |
| input_image["mask"] = input_image["mask"].resize((H, W)) | |
| if control_type != self.current_control: | |
| self.load_controlnet(control_type) | |
| controlnet_image = input_control_image | |
| if control_type == "canny": | |
| controlnet_image = controlnet_image.resize((H, W)) | |
| controlnet_image = np.array(controlnet_image) | |
| controlnet_image = cv2.Canny(controlnet_image, 100, 200) | |
| controlnet_image = controlnet_image[:, :, None] | |
| controlnet_image = np.concatenate([controlnet_image, controlnet_image, controlnet_image], axis=2) | |
| controlnet_image = Image.fromarray(controlnet_image) | |
| elif control_type == "pose": | |
| controlnet_image = self.openpose(controlnet_image) | |
| elif control_type == "depth": | |
| controlnet_image = controlnet_image.resize((H, W)) | |
| controlnet_image = self.get_depth_map(controlnet_image) | |
| else: | |
| controlnet_image = self.hed(controlnet_image) | |
| mask_np = np.array(input_image["mask"].convert("RGB")) | |
| controlnet_image = controlnet_image.resize((H, W)) | |
| set_seed(seed) | |
| result = self.control_pipe( | |
| promptA=promptB, | |
| promptB=promptA, | |
| tradoff=1.0, | |
| tradoff_nag=1.0, | |
| negative_promptA=negative_promptA, | |
| negative_promptB=negative_promptB, | |
| image=input_image["image"].convert("RGB"), | |
| mask=input_image["mask"].convert("RGB"), | |
| control_image=controlnet_image, | |
| width=H, | |
| height=W, | |
| guidance_scale=scale, | |
| controlnet_conditioning_scale=controlnet_conditioning_scale, | |
| num_inference_steps=ddim_steps, | |
| ).images[0] | |
| red = np.array(result).astype("float") * 1 | |
| red[:, :, 0] = 180.0 | |
| red[:, :, 2] = 0 | |
| red[:, :, 1] = 0 | |
| result_m = np.array(result) | |
| result_m = Image.fromarray( | |
| ( | |
| result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) | |
| + mask_np.astype("float") / 512.0 * red | |
| ).astype("uint8") | |
| ) | |
| mask_np = np.array(input_image["mask"].convert("RGB")) | |
| m_img = input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=4)) | |
| m_img = np.asarray(m_img) / 255.0 | |
| img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0 | |
| ours_np = np.asarray(result) / 255.0 | |
| ours_np = ours_np * m_img + (1 - m_img) * img_np | |
| result_paste = Image.fromarray(np.uint8(ours_np * 255)) | |
| return [input_image["image"].convert("RGB"), result_paste], [controlnet_image, result_m] | |
| def infer( | |
| self, | |
| input_image, | |
| text_guided_prompt, | |
| text_guided_negative_prompt, | |
| shape_guided_prompt, | |
| shape_guided_negative_prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| task, | |
| vertical_expansion_ratio, | |
| horizontal_expansion_ratio, | |
| outpaint_prompt, | |
| outpaint_negative_prompt, | |
| removal_prompt, | |
| removal_negative_prompt, | |
| enable_control=False, | |
| input_control_image=None, | |
| control_type="canny", | |
| controlnet_conditioning_scale=None, | |
| ): | |
| if task == "text-guided": | |
| prompt = text_guided_prompt | |
| negative_prompt = text_guided_negative_prompt | |
| elif task == "shape-guided": | |
| prompt = shape_guided_prompt | |
| negative_prompt = shape_guided_negative_prompt | |
| elif task == "object-removal": | |
| prompt = removal_prompt | |
| negative_prompt = removal_negative_prompt | |
| elif task == "image-outpainting": | |
| prompt = outpaint_prompt | |
| negative_prompt = outpaint_negative_prompt | |
| return self.predict( | |
| input_image, | |
| prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| negative_prompt, | |
| task, | |
| vertical_expansion_ratio, | |
| horizontal_expansion_ratio, | |
| ) | |
| else: | |
| task = "text-guided" | |
| prompt = text_guided_prompt | |
| negative_prompt = text_guided_negative_prompt | |
| # currently, we only support controlnet in PowerPaint-v1 | |
| if self.version == "ppt-v1" and enable_control and task == "text-guided": | |
| return self.predict_controlnet( | |
| input_image, | |
| input_control_image, | |
| control_type, | |
| prompt, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| negative_prompt, | |
| controlnet_conditioning_scale, | |
| ) | |
| else: | |
| return self.predict( | |
| input_image, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, None, None | |
| ) | |
| if __name__ == "__main__": | |
| args = argparse.ArgumentParser() | |
| args.add_argument("--weight_dtype", type=str, default="float16") | |
| args.add_argument("--checkpoint_dir", type=str, default="./checkpoints/ppt-v1") | |
| args.add_argument("--version", type=str, default="ppt-v1") | |
| args.add_argument("--share", action="store_true") | |
| args.add_argument( | |
| "--local_files_only", action="store_true", help="enable it to use cached files without requesting from the hub" | |
| ) | |
| args.add_argument("--port", type=int, default=7860) | |
| args = args.parse_args() | |
| # initialize the pipeline controller | |
| weight_dtype = torch.float16 if args.weight_dtype == "float16" else torch.float32 | |
| controller = PowerPaintController(weight_dtype, args.checkpoint_dir, args.local_files_only, args.version) | |
| # ui | |
| with gr.Blocks(css="style.css") as demo: | |
| with gr.Row(): | |
| gr.Markdown( | |
| "<div align='center'><font size='18'>PowerPaint: High-Quality Versatile Image Inpainting</font></div>" # noqa | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| "<div align='center'><font size='5'><a href='https://powerpaint.github.io/'>Project Page</a>  " # noqa | |
| "<a href='https://arxiv.org/abs/2312.03594/'>Paper</a>  " | |
| "<a href='https://github.com/open-mmlab/powerpaint'>Code</a> </font></div>" # noqa | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| "**Note:** Due to network-related factors, the page may experience occasional bugs! If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content." # noqa | |
| ) | |
| # Attention: Due to network-related factors, the page may experience occasional bugs. If the inpainting results deviate significantly from expectations, consider toggling between task options to refresh the content. | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Input image and draw mask") | |
| input_image = gr.Image(source="upload", tool="sketch", type="pil") | |
| task = gr.Radio( | |
| ["text-guided", "object-removal", "shape-guided", "image-outpainting"], | |
| show_label=False, | |
| visible=False, | |
| ) | |
| # Text-guided object inpainting | |
| with gr.Tab("Text-guided object inpainting") as tab_text_guided: | |
| enable_text_guided = gr.Checkbox( | |
| label="Enable text-guided object inpainting", value=True, interactive=False | |
| ) | |
| text_guided_prompt = gr.Textbox(label="Prompt") | |
| text_guided_negative_prompt = gr.Textbox(label="negative_prompt") | |
| tab_text_guided.select(fn=select_tab_text_guided, inputs=None, outputs=task) | |
| # currently, we only support controlnet in PowerPaint-v1 | |
| if args.version == "ppt-v1": | |
| gr.Markdown("### Controlnet setting") | |
| enable_control = gr.Checkbox( | |
| label="Enable controlnet", info="Enable this if you want to use controlnet" | |
| ) | |
| controlnet_conditioning_scale = gr.Slider( | |
| label="controlnet conditioning scale", | |
| minimum=0, | |
| maximum=1, | |
| step=0.05, | |
| value=0.5, | |
| ) | |
| control_type = gr.Radio(["canny", "pose", "depth", "hed"], label="Control type") | |
| input_control_image = gr.Image(source="upload", type="pil") | |
| # Object removal inpainting | |
| with gr.Tab("Object removal inpainting") as tab_object_removal: | |
| enable_object_removal = gr.Checkbox( | |
| label="Enable object removal inpainting", | |
| value=True, | |
| info="The recommended configuration for the Guidance Scale is 10 or higher. \ | |
| If undesired objects appear in the masked area, \ | |
| you can address this by specifically increasing the Guidance Scale.", | |
| interactive=False, | |
| ) | |
| removal_prompt = gr.Textbox(label="Prompt") | |
| removal_negative_prompt = gr.Textbox(label="negative_prompt") | |
| tab_object_removal.select(fn=select_tab_object_removal, inputs=None, outputs=task) | |
| # Object image outpainting | |
| with gr.Tab("Image outpainting") as tab_image_outpainting: | |
| enable_object_removal = gr.Checkbox( | |
| label="Enable image outpainting", | |
| value=True, | |
| info="The recommended configuration for the Guidance Scale is 10 or higher. \ | |
| If unwanted random objects appear in the extended image region, \ | |
| you can enhance the cleanliness of the extension area by increasing the Guidance Scale.", | |
| interactive=False, | |
| ) | |
| outpaint_prompt = gr.Textbox(label="Outpainting_prompt") | |
| outpaint_negative_prompt = gr.Textbox(label="Outpainting_negative_prompt") | |
| horizontal_expansion_ratio = gr.Slider( | |
| label="horizontal expansion ratio", | |
| minimum=1, | |
| maximum=4, | |
| step=0.05, | |
| value=1, | |
| ) | |
| vertical_expansion_ratio = gr.Slider( | |
| label="vertical expansion ratio", | |
| minimum=1, | |
| maximum=4, | |
| step=0.05, | |
| value=1, | |
| ) | |
| tab_image_outpainting.select(fn=select_tab_image_outpainting, inputs=None, outputs=task) | |
| # Shape-guided object inpainting | |
| with gr.Tab("Shape-guided object inpainting") as tab_shape_guided: | |
| enable_shape_guided = gr.Checkbox( | |
| label="Enable shape-guided object inpainting", value=True, interactive=False | |
| ) | |
| shape_guided_prompt = gr.Textbox(label="shape_guided_prompt") | |
| shape_guided_negative_prompt = gr.Textbox(label="shape_guided_negative_prompt") | |
| fitting_degree = gr.Slider( | |
| label="fitting degree", | |
| minimum=0, | |
| maximum=1, | |
| step=0.05, | |
| value=1, | |
| ) | |
| tab_shape_guided.select(fn=select_tab_shape_guided, inputs=None, outputs=task) | |
| run_button = gr.Button(label="Run") | |
| with gr.Accordion("Advanced options", open=False): | |
| ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=50, value=45, step=1) | |
| scale = gr.Slider( | |
| label="Guidance Scale", | |
| info="For object removal and image outpainting, it is recommended to set the value at 10 or above.", | |
| minimum=0.1, | |
| maximum=30.0, | |
| value=7.5, | |
| step=0.1, | |
| ) | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=2147483647, | |
| step=1, | |
| randomize=True, | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### Inpainting result") | |
| inpaint_result = gr.Gallery(label="Generated images", show_label=False, columns=2) | |
| gr.Markdown("### Mask") | |
| gallery = gr.Gallery(label="Generated masks", show_label=False, columns=2) | |
| if args.version == "ppt-v1": | |
| run_button.click( | |
| fn=controller.infer, | |
| inputs=[ | |
| input_image, | |
| text_guided_prompt, | |
| text_guided_negative_prompt, | |
| shape_guided_prompt, | |
| shape_guided_negative_prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| task, | |
| vertical_expansion_ratio, | |
| horizontal_expansion_ratio, | |
| outpaint_prompt, | |
| outpaint_negative_prompt, | |
| removal_prompt, | |
| removal_negative_prompt, | |
| enable_control, | |
| input_control_image, | |
| control_type, | |
| controlnet_conditioning_scale, | |
| ], | |
| outputs=[inpaint_result, gallery], | |
| ) | |
| else: | |
| run_button.click( | |
| fn=controller.infer, | |
| inputs=[ | |
| input_image, | |
| text_guided_prompt, | |
| text_guided_negative_prompt, | |
| shape_guided_prompt, | |
| shape_guided_negative_prompt, | |
| fitting_degree, | |
| ddim_steps, | |
| scale, | |
| seed, | |
| task, | |
| vertical_expansion_ratio, | |
| horizontal_expansion_ratio, | |
| outpaint_prompt, | |
| outpaint_negative_prompt, | |
| removal_prompt, | |
| removal_negative_prompt, | |
| ], | |
| outputs=[inpaint_result, gallery], | |
| ) | |
| demo.queue() | |
| demo.launch(share=args.share, server_name="0.0.0.0", server_port=args.port) | |