Upload 3 files

Browse files

Files changed (3) hide show

app_shadow_generation_gradio.py +56 -0
generate_shadow_main.py +131 -0
shadow_utils.py +54 -0

app_shadow_generation_gradio.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import gradio as gr
+import numpy as np
+from PIL import Image
+from generate_shadow_main import Shadow_Diffusion
+shadow_diffuser = Shadow_Diffusion()
+shadow_prompt_dict = {"软阴影": "a product with soft natural shadow, white background",
+                      "硬阴影": "a product with hard natural shadow, white background",
+                      "悬浮阴影": "a product with floating natural shadow, white background",}
+def generate_shadow(image_rgba, shadow_type, padding_rate, denoise_strength, num_inference_steps,
+                    controlnet_conditioning_scale, cfg, seed):
+    img_pil = Image.fromarray(image_rgba[..., :-1])
+    mask_pil = Image.fromarray(np.repeat(image_rgba[..., -1:], 3, -1))
+    shadow_prompt = shadow_prompt_dict[shadow_type]
+    print(padding_rate, denoise_strength, num_inference_steps, controlnet_conditioning_scale, cfg, seed)
+    _, _, _, shadow_result_pil = shadow_diffuser.generate_shadow(img_pil,
+                                                                 mask_pil,
+                                                                 shadow_prompt,
+                                                                 padding_rate=float(padding_rate),
+                                                                 denoise_strength=float(denoise_strength),
+                                                                 num_inference_steps=int(num_inference_steps),
+                                                                 controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+                                                                 cfg=float(cfg),
+                                                                 seed=int(seed))
+    return np.array(shadow_result_pil)
+with gr.Blocks() as demo:
+    gr.Markdown("# 💡基于Diffusion的白底阴影生成 \n"
+                "请确保上传带有透明通道的RGBA图像作为输入")
+    with gr.Row():
+        with gr.Column():
+            rgba = gr.Image(image_mode="RGBA", label="输入商品图(RGBA)")
+            gr.Examples(label="示例图片", inputs=[rgba],
+                        examples=[os.path.join("./test_images", n) for n in os.listdir("./test_images")])
+        with gr.Column():
+            shadow_output = gr.Image(image_mode="RGB", label="阴影生成结果")
+    with gr.Row():
+        shadow_type = gr.Radio(["软阴影", "硬阴影", "悬浮阴影"], value="硬阴影", label="阴影类型")
+        generate_btn = gr.Button(value="生成阴影")
+    with gr.Accordion(label="其他参数>>", open=False) as sku_accordion:
+        padding_rate = gr.Slider(0, 0.99, value=0.4, step=0.1, label="白边填充比例")
+        denoise_strength = gr.Slider(0, 0.99, value=1.0, step=0.01, label="去噪程度")
+        num_inference_steps = gr.Slider(10, 50, value=20, step=1, label="推理步数")
+        controlnet_conditioning_scale = gr.Slider(0, 1.0, value=0.5, step=0.01, label="控制强度(ControlNet)")
+        step_num = gr.Slider(1, 50, value=20, step=1, label="推理步数")
+        cfg = gr.Slider(0, 20, value=5, step=0.5, label="CFG")
+        seed = gr.Slider(-1, 99999999999999, value=42, step=0.01, label="随机种子")
+    generate_btn.click(generate_shadow, inputs=[rgba, shadow_type, padding_rate, denoise_strength, num_inference_steps,
+                                                controlnet_conditioning_scale, cfg, seed], outputs=shadow_output)
+demo.queue().launch(server_name='[::]', share=True)

generate_shadow_main.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+import cv2
+import time
+import torch
+import numpy as np
+import shadow_utils
+from PIL import Image
+from datetime import datetime
+from diffusers.utils import make_image_grid
+from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+from diffusers import ControlNetModel, StableDiffusionXLControlNetImg2ImgPipeline, AutoencoderKL
+class Shadow_Diffusion:
+    def __init__(self,
+                 base_model_path="stabilityai/stable-diffusion-xl-base-1.0",
+                 vae_path="madebyollin/sdxl-vae-fp16-fix",
+                 controlnet_path="./checkpoints/shadow",
+                 depth_midas_path="Intel/dpt-hybrid-midas",
+                 resolution=1024,
+                 precision_type=torch.float16):
+        self.resolution=resolution
+        self.depth_estimator = DPTForDepthEstimation.from_pretrained(depth_midas_path).to("cuda")
+        self.feature_extractor = DPTFeatureExtractor.from_pretrained(depth_midas_path)
+        self.controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=precision_type).to("cuda")
+        self.vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=precision_type).to("cuda")
+        self.shadow_pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(base_model_path,
+                                                                               controlnet=self.controlnet,
+                                                                               vae=self.vae,
+                                                                               variant="fp16",
+                                                                               use_safetensors=True,
+                                                                               torch_dtype=precision_type).to("cuda")
+        self.shadow_pipe.enable_model_cpu_offload()
+    def get_depth_map(self, image):
+        image = self.feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
+        with torch.no_grad(), torch.autocast("cuda"):
+            depth_map = self.depth_estimator(image).predicted_depth
+        depth_map = torch.nn.functional.interpolate(
+            depth_map.unsqueeze(1),
+            size=(self.resolution, self.resolution),
+            mode="bicubic",
+            align_corners=False,
+        )
+        depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
+        depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
+        depth_map = (depth_map - depth_min) / (depth_max - depth_min)
+        image = torch.cat([depth_map] * 3, dim=1)
+        image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
+        image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
+        return image
+    def generate_shadow(self,
+                        img_pil,
+                        mask_pil,
+                        prompt="",
+                        padding_rate=0.4,
+                        denoise_strength=1.0,
+                        num_inference_steps=20,
+                        controlnet_conditioning_scale=0.5,
+                        cfg=5.0,
+                        seed=-1):
+        # 1.Extract the foreground area according to the minimum bounding box
+        objcut_xmin, objcut_xmax, objcut_ymin, objcut_ymax = shadow_utils.get_mask_bbox(np.array(mask_pil))
+        img_pil = Image.fromarray(np.array(img_pil)[objcut_ymin:objcut_ymax, objcut_xmin:objcut_xmax])
+        mask_pil = Image.fromarray(np.array(mask_pil)[objcut_ymin:objcut_ymax, objcut_xmin:objcut_xmax])
+        # 2.Fill the smallest foreground area with white edges
+        img_pil = shadow_utils.padding_image(img_pil, padding_rate, self.resolution, (255, 255, 255))
+        mask_pil = shadow_utils.padding_image(mask_pil, padding_rate, self.resolution, (0, 0, 0))
+        mask_np = np.array(mask_pil)
+        depth_pil = self.get_depth_map(img_pil)
+        masked_depth_pil = Image.fromarray(((1 - mask_np / 255.0) * np.zeros_like(mask_np) +
+                                                 mask_np / 255.0 * np.array(depth_pil)).astype(np.uint8))
+        masked_image_pil = Image.fromarray(((1 - mask_np / 255.0) * np.array([255, 255, 255])[np.newaxis, np.newaxis, :] +
+                                                 mask_np / 255.0 * np.array(img_pil)).astype(np.uint8))
+        generated_image = self.shadow_pipe(prompt,
+                                           image=masked_image_pil,
+                                           control_image=masked_depth_pil,
+                                           strength=denoise_strength,
+                                           num_inference_steps=num_inference_steps,
+                                           generator=None if seed == -1 else torch.manual_seed(seed),
+                                           controlnet_conditioning_scale=controlnet_conditioning_scale,
+                                           guidance_scale=cfg
+                                           ).images[0]
+        composed_image = mask_np / 255.0 * np.array(img_pil) + (1 - mask_np / 255.0) * np.array(generated_image)
+        composed_image = Image.fromarray(composed_image.astype(np.uint8))
+        return masked_image_pil, masked_depth_pil, generated_image, composed_image
+if __name__ == '__main__':
+    shadowfree_img_dir = "./test_images"
+    save_dir = "./test_results".format(datetime.now().strftime("%Y%m%d%H%M%S"))
+    os.makedirs(save_dir, exist_ok=True)
+    prompts = {"软阴影": "a product with soft natural shadow, white background",
+               "硬阴影": "a product with hard natural shadow, white background",
+               "悬浮阴影": "a product with floating natural shadow, white background"}
+    shadow_diffuser = Shadow_Diffusion()
+    for image_name in os.listdir(shadowfree_img_dir):
+        results = []
+        for shadow_type, prompt in prompts.items():
+            org_image = cv2.imread(os.path.join(shadowfree_img_dir, image_name), cv2.IMREAD_UNCHANGED)
+            org_image = cv2.cvtColor(org_image, cv2.COLOR_BGRA2RGBA)
+            img_pil = Image.fromarray(org_image[..., :-1])
+            mask_pil = Image.fromarray(np.repeat(org_image[..., -1:], 3, -1))
+            start_time = time.time()
+            masked_img, masked_depth, gen_img, compose_img = shadow_diffuser.generate_shadow(img_pil,
+                                                                                             mask_pil,
+                                                                                             prompt,
+                                                                                             num_inference_steps=50,
+                                                                                             padding_rate=0.5,
+                                                                                             seed=42,
+                                                                                             denoise_strength=1.0,
+                                                                                             cfg=5.0,
+                                                                                             controlnet_conditioning_scale=0.5,
+                                                                                             )
+            results.append(compose_img)
+        make_image_grid(results, rows=1, cols=3).save(os.path.join(save_dir, image_name))

shadow_utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import cv2
+from PIL import Image
+import numpy as np
+def get_mask_bbox(mask_org, threshold=0):
+    '''
+    Returns the minimum bounding box of the mask. If the shadow area is all 0, a single-pixel image is symbolically returned.
+    '''
+    mask = mask_org.copy()
+    mask[mask > threshold] = 255
+    mask[mask <= threshold] = 0
+    h, w = mask.shape[:2]
+    if len(mask.shape) == 3:
+        coords = np.where(np.mean(mask, axis=-1, keepdims=False) > threshold)
+    else:
+        coords = np.where(mask > 20)
+    if len(coords[0]) > 0:
+        ymin, ymax = coords[0].min(), coords[0].max()
+    else:
+        ymin, ymax = h // 2, h // 2 + 1
+    if len(coords[1]) > 0:
+        xmin, xmax = coords[1].min(), coords[1].max()
+    else:
+        xmin, xmax = w // 2, w // 2 + 1
+    return (xmin, xmax, ymin, ymax)
+def padding_image(img, padding_rate, canve_reso=1024, padding_color=(0, 0, 0)):
+    '''
+    img:PIL.Image
+    Scale the image proportionally and place it on a white square canvas,
+    making sure the long side of the original image is filled with padding_rate on the edge of the canvas.
+    '''
+    long_size = int(canve_reso*(1-padding_rate))
+    img = np.array(img)
+    h, w = img.shape[:2]
+    if h > w:
+        new_h = long_size
+        new_w = int(w / h * long_size)
+    else:
+        new_w = long_size
+        new_h = int(h / w * long_size)
+    resized_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    padding_h = (canve_reso - new_h) // 2
+    padding_w = (canve_reso - new_w) // 2
+    padding_output = np.ones((canve_reso, canve_reso, resized_img.shape[-1])) * \
+                    np.array(padding_color)[np.newaxis, np.newaxis, :]
+    padding_output[padding_h:padding_h + new_h, padding_w:padding_w + new_w, :] = resized_img
+    return Image.fromarray(padding_output.astype(np.uint8))