b3h-young123 commited on Feb 13, 2025

Commit

31c8e71

verified ·

1 Parent(s): 9391a86

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

CatVTON/.gitattributes +36 -0
CatVTON/.gitignore +2 -0
CatVTON/README.md +13 -0
CatVTON/__pycache__/utils.cpython-39.pyc +0 -0
CatVTON/app.py +778 -0
CatVTON/densepose/__init__.py +22 -0
CatVTON/densepose/__pycache__/__init__.cpython-39.pyc +0 -0
CatVTON/densepose/__pycache__/config.cpython-39.pyc +0 -0
CatVTON/densepose/config.py +277 -0
CatVTON/densepose/converters/__init__.py +17 -0
CatVTON/densepose/converters/__pycache__/__init__.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/base.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/builtin.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/chart_output_hflip.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/chart_output_to_chart_result.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/hflip.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/segm_to_mask.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/to_chart_result.cpython-39.pyc +0 -0
CatVTON/densepose/converters/__pycache__/to_mask.cpython-39.pyc +0 -0
CatVTON/densepose/converters/base.py +95 -0
CatVTON/densepose/converters/builtin.py +33 -0
CatVTON/densepose/converters/chart_output_hflip.py +73 -0
CatVTON/densepose/converters/chart_output_to_chart_result.py +190 -0
CatVTON/densepose/converters/hflip.py +36 -0
CatVTON/densepose/converters/segm_to_mask.py +152 -0
CatVTON/densepose/converters/to_chart_result.py +72 -0
CatVTON/densepose/converters/to_mask.py +51 -0
CatVTON/densepose/engine/__init__.py +5 -0
CatVTON/densepose/engine/trainer.py +260 -0
CatVTON/densepose/modeling/__init__.py +15 -0
CatVTON/densepose/modeling/build.py +89 -0
CatVTON/densepose/modeling/confidence.py +75 -0
CatVTON/densepose/modeling/densepose_checkpoint.py +37 -0
CatVTON/densepose/modeling/filter.py +96 -0
CatVTON/densepose/modeling/hrfpn.py +184 -0
CatVTON/densepose/modeling/hrnet.py +476 -0
CatVTON/densepose/modeling/inference.py +46 -0
CatVTON/densepose/modeling/test_time_augmentation.py +209 -0
CatVTON/densepose/modeling/utils.py +13 -0
CatVTON/densepose/utils/__init__.py +0 -0
CatVTON/densepose/utils/__pycache__/__init__.cpython-39.pyc +0 -0
CatVTON/densepose/utils/__pycache__/transform.cpython-39.pyc +0 -0
CatVTON/densepose/utils/dbhelper.py +149 -0
CatVTON/densepose/utils/logger.py +15 -0
CatVTON/densepose/utils/transform.py +17 -0
CatVTON/model/DensePose/__init__.py +158 -0
CatVTON/model/DensePose/__pycache__/__init__.cpython-310.pyc +0 -0
CatVTON/model/DensePose/__pycache__/__init__.cpython-312.pyc +0 -0
CatVTON/model/DensePose/__pycache__/__init__.cpython-39.pyc +0 -0
CatVTON/model/SCHP/__init__.py +179 -0

CatVTON/.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+detectron2/_C.cpython-39-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text

CatVTON/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ playground.py
2	+ __pycache__

CatVTON/README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: CatVTON
+emoji: 🐈
+colorFrom: indigo
+colorTo: blue
+sdk: gradio
+sdk_version: 4.40.0
+app_file: app.py
+pinned: false
+license: cc-by-nc-sa-4.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

CatVTON/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (20.3 kB). View file

CatVTON/app.py ADDED Viewed

	@@ -0,0 +1,778 @@

+import argparse
+import os
+os.environ['CUDA_HOME'] = '/usr/local/cuda'
+os.environ['PATH'] = os.environ['PATH'] + ':/usr/local/cuda/bin'
+from datetime import datetime
+import gradio as gr
+import spaces
+import numpy as np
+import torch
+from diffusers.image_processor import VaeImageProcessor
+from huggingface_hub import snapshot_download
+from PIL import Image
+torch.jit.script = lambda f: f
+from model.cloth_masker import AutoMasker, vis_mask
+from model.pipeline import CatVTONPipeline, CatVTONPix2PixPipeline
+from model.flux.pipeline_flux_tryon import FluxTryOnPipeline
+from utils import init_weight_dtype, resize_and_crop, resize_and_padding
+def parse_args():
+    parser = argparse.ArgumentParser(description="Simple example of a training script.")
+    parser.add_argument(
+        "--base_model_path",
+        type=str,
+        default="booksforcharlie/stable-diffusion-inpainting",
+        help=(
+            "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
+        ),
+    )
+    parser.add_argument(
+        "--p2p_base_model_path",
+        type=str,
+        default="timbrooks/instruct-pix2pix",
+        help=(
+            "The path to the base model to use for evaluation. This can be a local path or a model identifier from the Model Hub."
+        ),
+    )
+    parser.add_argument(
+        "--resume_path",
+        type=str,
+        default="zhengchong/CatVTON",
+        help=(
+            "The Path to the checkpoint of trained tryon model."
+        ),
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="resource/demo/output",
+        help="The output directory where the model predictions will be written.",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=768,
+        help=(
+            "The resolution for input images, all the images in the train/validation dataset will be resized to this"
+            " resolution"
+        ),
+    )
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=1024,
+        help=(
+            "The resolution for input images, all the images in the train/validation dataset will be resized to this"
+            " resolution"
+        ),
+    )
+    parser.add_argument(
+        "--repaint",
+        action="store_true",
+        help="Whether to repaint the result image with the original background."
+    )
+    parser.add_argument(
+        "--allow_tf32",
+        action="store_true",
+        default=True,
+        help=(
+            "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see"
+            " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices"
+        ),
+    )
+    parser.add_argument(
+        "--mixed_precision",
+        type=str,
+        default="bf16",
+        choices=["no", "fp16", "bf16"],
+        help=(
+            "Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
+            " 1.10.and an Nvidia Ampere GPU.  Default to the value of accelerate config of the current system or the"
+            " flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
+        ),
+    )
+    args = parser.parse_args()
+    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
+    if env_local_rank != -1 and env_local_rank != args.local_rank:
+        args.local_rank = env_local_rank
+    return args
+def image_grid(imgs, rows, cols):
+    assert len(imgs) == rows * cols
+    w, h = imgs[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h))
+    for i, img in enumerate(imgs):
+        grid.paste(img, box=(i % cols * w, i // cols * h))
+    return grid
+args = parse_args()
+# Mask-based CatVTON
+catvton_repo = "zhengchong/CatVTON"
+repo_path = snapshot_download(repo_id=catvton_repo)
+# Pipeline
+pipeline = CatVTONPipeline(
+    base_ckpt=args.base_model_path,
+    attn_ckpt=repo_path,
+    attn_ckpt_version="mix",
+    weight_dtype=init_weight_dtype(args.mixed_precision),
+    use_tf32=args.allow_tf32,
+    device='cuda'
+)
+# AutoMasker
+mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
+automasker = AutoMasker(
+    densepose_ckpt=os.path.join(repo_path, "DensePose"),
+    schp_ckpt=os.path.join(repo_path, "SCHP"),
+    device='cuda',
+)
+# Flux-based CatVTON
+access_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
+flux_repo = "black-forest-labs/FLUX.1-Fill-dev"
+pipeline_flux = FluxTryOnPipeline.from_pretrained(flux_repo, use_auth_token=access_token)
+pipeline_flux.load_lora_weights(
+    os.path.join(repo_path, "flux-lora"),
+    weight_name='pytorch_lora_weights.safetensors'
+)
+pipeline_flux.to("cuda", init_weight_dtype(args.mixed_precision))
+# Mask-free CatVTON
+catvton_mf_repo = "zhengchong/CatVTON-MaskFree"
+repo_path_mf = snapshot_download(repo_id=catvton_mf_repo, use_auth_token=access_token)
+pipeline_p2p = CatVTONPix2PixPipeline(
+    base_ckpt=args.p2p_base_model_path,
+    attn_ckpt=repo_path_mf,
+    attn_ckpt_version="mix-48k-1024",
+    weight_dtype=init_weight_dtype(args.mixed_precision),
+    use_tf32=args.allow_tf32,
+    device='cuda'
+)
+@spaces.GPU(duration=120)
+def submit_function(
+    person_image,
+    cloth_image,
+    cloth_type,
+    num_inference_steps,
+    guidance_scale,
+    seed,
+    show_type
+):
+    person_image, mask = person_image["background"], person_image["layers"][0]
+    mask = Image.open(mask).convert("L")
+    if len(np.unique(np.array(mask))) == 1:
+        mask = None
+    else:
+        mask = np.array(mask)
+        mask[mask > 0] = 255
+        mask = Image.fromarray(mask)
+    tmp_folder = args.output_dir
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
+    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
+    generator = None
+    if seed != -1:
+        generator = torch.Generator(device='cuda').manual_seed(seed)
+    person_image = Image.open(person_image).convert("RGB")
+    cloth_image = Image.open(cloth_image).convert("RGB")
+    person_image = resize_and_crop(person_image, (args.width, args.height))
+    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
+    # Process mask
+    if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height))
+    else:
+        mask = automasker(
+            person_image,
+            cloth_type
+        )['mask']
+    mask = mask_processor.blur(mask, blur_factor=9)
+    # Inference
+    # try:
+    result_image = pipeline(
+        image=person_image,
+        condition_image=cloth_image,
+        mask=mask,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+        generator=generator
+    )[0]
+    # except Exception as e:
+    #     raise gr.Error(
+    #         "An error occurred. Please try again later: {}".format(e)
+    #     )
+    # Post-process
+    masked_person = vis_mask(person_image, mask)
+    save_result_image = image_grid([person_image, masked_person, cloth_image, result_image], 1, 4)
+    save_result_image.save(result_save_path)
+    if show_type == "result only":
+        return result_image
+    else:
+        width, height = person_image.size
+        if show_type == "input & result":
+            condition_width = width // 2
+            conditions = image_grid([person_image, cloth_image], 2, 1)
+        else:
+            condition_width = width // 3
+            conditions = image_grid([person_image, masked_person , cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
+        new_result_image.paste(conditions, (0, 0))
+        new_result_image.paste(result_image, (condition_width + 5, 0))
+    return new_result_image
+@spaces.GPU(duration=120)
+def submit_function_p2p(
+    person_image,
+    cloth_image,
+    num_inference_steps,
+    guidance_scale,
+    seed):
+    person_image= person_image["background"]
+    tmp_folder = args.output_dir
+    date_str = datetime.now().strftime("%Y%m%d%H%M%S")
+    result_save_path = os.path.join(tmp_folder, date_str[:8], date_str[8:] + ".png")
+    if not os.path.exists(os.path.join(tmp_folder, date_str[:8])):
+        os.makedirs(os.path.join(tmp_folder, date_str[:8]))
+    generator = None
+    if seed != -1:
+        generator = torch.Generator(device='cuda').manual_seed(seed)
+    person_image = Image.open(person_image).convert("RGB")
+    cloth_image = Image.open(cloth_image).convert("RGB")
+    person_image = resize_and_crop(person_image, (args.width, args.height))
+    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
+    # Inference
+    try:
+        result_image = pipeline_p2p(
+            image=person_image,
+            condition_image=cloth_image,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            generator=generator
+        )[0]
+    except Exception as e:
+        raise gr.Error(
+            "An error occurred. Please try again later: {}".format(e)
+        )
+    # Post-process
+    save_result_image = image_grid([person_image, cloth_image, result_image], 1, 3)
+    save_result_image.save(result_save_path)
+    return result_image
+@spaces.GPU(duration=120)
+def submit_function_flux(
+    person_image,
+    cloth_image,
+    cloth_type,
+    num_inference_steps,
+    guidance_scale,
+    seed,
+    show_type
+):
+    # Process image editor input
+    person_image, mask = person_image["background"], person_image["layers"][0]
+    mask = Image.open(mask).convert("L")
+    if len(np.unique(np.array(mask))) == 1:
+        mask = None
+    else:
+        mask = np.array(mask)
+        mask[mask > 0] = 255
+        mask = Image.fromarray(mask)
+    # Set random seed
+    generator = None
+    if seed != -1:
+        generator = torch.Generator(device='cuda').manual_seed(seed)
+    # Process input images
+    person_image = Image.open(person_image).convert("RGB")
+    cloth_image = Image.open(cloth_image).convert("RGB")
+    # Adjust image sizes
+    person_image = resize_and_crop(person_image, (args.width, args.height))
+    cloth_image = resize_and_padding(cloth_image, (args.width, args.height))
+    # Process mask
+    if mask is not None:
+        mask = resize_and_crop(mask, (args.width, args.height))
+    else:
+        mask = automasker(
+            person_image,
+            cloth_type
+        )['mask']
+    mask = mask_processor.blur(mask, blur_factor=9)
+    # Inference
+    result_image = pipeline_flux(
+        image=person_image,
+        condition_image=cloth_image,
+        mask_image=mask,
+        width=args.width,
+        height=args.height,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+        generator=generator
+    ).images[0]
+    # Post-processing
+    masked_person = vis_mask(person_image, mask)
+    # Return result based on show type
+    if show_type == "result only":
+        return result_image
+    else:
+        width, height = person_image.size
+        if show_type == "input & result":
+            condition_width = width // 2
+            conditions = image_grid([person_image, cloth_image], 2, 1)
+        else:
+            condition_width = width // 3
+            conditions = image_grid([person_image, masked_person, cloth_image], 3, 1)
+        conditions = conditions.resize((condition_width, height), Image.NEAREST)
+        new_result_image = Image.new("RGB", (width + condition_width + 5, height))
+        new_result_image.paste(conditions, (0, 0))
+        new_result_image.paste(result_image, (condition_width + 5, 0))
+        return new_result_image
+def person_example_fn(image_path):
+    return image_path
+HEADER = """
+<h1 style="text-align: center;"> 🐈 CatVTON: Concatenation Is All You Need for Virtual Try-On with Diffusion Models </h1>
+<div style="display: flex; justify-content: center; align-items: center;">
+  <a href="http://arxiv.org/abs/2407.15886" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/arXiv-2407.15886-red?style=flat&logo=arXiv&logoColor=red' alt='arxiv'>
+  </a>
+  <a href='https://huggingface.co/zhengchong/CatVTON' style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Hugging Face-ckpts-orange?style=flat&logo=HuggingFace&logoColor=orange' alt='huggingface'>
+  </a>
+  <a href="https://github.com/Zheng-Chong/CatVTON" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/GitHub-Repo-blue?style=flat&logo=GitHub' alt='GitHub'>
+  </a>
+  <a href="http://120.76.142.206:8888" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Demo-Gradio-gold?style=flat&logo=Gradio&logoColor=red' alt='Demo'>
+  </a>
+  <a href="https://huggingface.co/spaces/zhengchong/CatVTON" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Space-ZeroGPU-orange?style=flat&logo=Gradio&logoColor=red' alt='Demo'>
+  </a>
+  <a href='https://zheng-chong.github.io/CatVTON/' style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/Webpage-Project-silver?style=flat&logo=&logoColor=orange' alt='webpage'>
+  </a>
+  <a href="https://github.com/Zheng-Chong/CatVTON/LICENCE" style="margin: 0 2px;">
+    <img src='https://img.shields.io/badge/License-CC BY--NC--SA--4.0-lightgreen?style=flat&logo=Lisence' alt='License'>
+  </a>
+</div>
+<br>
+· This demo and our weights are only for Non-commercial Use. <br>
+· Thanks to <a href="https://huggingface.co/zero-gpu-explorers">ZeroGPU</a> for providing A100 for our <a href="https://huggingface.co/spaces/zhengchong/CatVTON">HuggingFace Space</a>. <br>
+· SafetyChecker is set to filter NSFW content, but it may block normal results too. Please adjust the <span>`seed`</span> for normal outcomes.<br>
+"""
+def app_gradio():
+    with gr.Blocks(title="CatVTON") as demo:
+        gr.Markdown(HEADER)
+        with gr.Tab("Mask-based & SD1.5"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=350):
+                    with gr.Row():
+                        image_path = gr.Image(
+                            type="filepath",
+                            interactive=True,
+                            visible=False,
+                        )
+                        person_image = gr.ImageEditor(
+                            interactive=True, label="Person Image", type="filepath"
+                        )
+                    with gr.Row():
+                        with gr.Column(scale=1, min_width=230):
+                            cloth_image = gr.Image(
+                                interactive=True, label="Condition Image", type="filepath"
+                            )
+                        with gr.Column(scale=1, min_width=120):
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">Two ways to provide Mask:<br>1. Upload the person image and use the `🖌️` above to draw the Mask (higher priority)<br>2. Select the `Try-On Cloth Type` to generate automatically </span>'
+                            )
+                            cloth_type = gr.Radio(
+                                label="Try-On Cloth Type",
+                                choices=["upper", "lower", "overall"],
+                                value="upper",
+                            )
+                    submit = gr.Button("Submit")
+                    gr.Markdown(
+                        '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
+                    )
+                    gr.Markdown(
+                        '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
+                    )
+                    with gr.Accordion("Advanced Options", open=False):
+                        num_inference_steps = gr.Slider(
+                            label="Inference Step", minimum=10, maximum=100, step=5, value=50
+                        )
+                        # Guidence Scale
+                        guidance_scale = gr.Slider(
+                            label="CFG Strenth", minimum=0.0, maximum=7.5, step=0.5, value=2.5
+                        )
+                        # Random Seed
+                        seed = gr.Slider(
+                            label="Seed", minimum=-1, maximum=10000, step=1, value=42
+                        )
+                        show_type = gr.Radio(
+                            label="Show Type",
+                            choices=["result only", "input & result", "input & mask & result"],
+                            value="input & mask & result",
+                        )
+                with gr.Column(scale=2, min_width=500):
+                    result_image = gr.Image(interactive=False, label="Result")
+                    with gr.Row():
+                        # Photo Examples
+                        root_path = "resource/demo/example"
+                        with gr.Column():
+                            men_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "men", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "men"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path,
+                                label="Person Examples ①",
+                            )
+                            women_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "women", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "women"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path,
+                                label="Person Examples ②",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Person examples come from the demos of <a href="https://huggingface.co/spaces/levihsu/OOTDiffusion">OOTDiffusion</a> and <a href="https://www.outfitanyone.org">OutfitAnyone</a>. </span>'
+                            )
+                        with gr.Column():
+                            condition_upper_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "upper", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "upper"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Upper Examples",
+                            )
+                            condition_overall_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "overall", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "overall"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Overall Examples",
+                            )
+                            condition_person_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "person", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "person"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image,
+                                label="Condition Reference Person Examples",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Condition examples come from the Internet. </span>'
+                            )
+                image_path.change(
+                    person_example_fn, inputs=image_path, outputs=person_image
+                )
+                submit.click(
+                    submit_function,
+                    [
+                        person_image,
+                        cloth_image,
+                        cloth_type,
+                        num_inference_steps,
+                        guidance_scale,
+                        seed,
+                        show_type,
+                    ],
+                    result_image,
+                )
+        with gr.Tab("Mask-based & Flux.1 Fill Dev"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=350):
+                    with gr.Row():
+                        image_path_flux = gr.Image(
+                            type="filepath",
+                            interactive=True,
+                            visible=False,
+                        )
+                        person_image_flux = gr.ImageEditor(
+                            interactive=True, label="Person Image", type="filepath"
+                        )
+                    with gr.Row():
+                        with gr.Column(scale=1, min_width=230):
+                            cloth_image_flux = gr.Image(
+                                interactive=True, label="Condition Image", type="filepath"
+                            )
+                        with gr.Column(scale=1, min_width=120):
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">Two ways to provide Mask:<br>1. Upload the person image and use the `🖌️` above to draw the Mask (higher priority)<br>2. Select the `Try-On Cloth Type` to generate automatically </span>'
+                            )
+                            cloth_type = gr.Radio(
+                                label="Try-On Cloth Type",
+                                choices=["upper", "lower", "overall"],
+                                value="upper",
+                            )
+                    submit_flux = gr.Button("Submit")
+                    gr.Markdown(
+                        '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
+                    )
+                    with gr.Accordion("Advanced Options", open=False):
+                        num_inference_steps_flux = gr.Slider(
+                            label="Inference Step", minimum=10, maximum=100, step=5, value=50
+                        )
+                        # Guidence Scale
+                        guidance_scale_flux = gr.Slider(
+                            label="CFG Strenth", minimum=0.0, maximum=50, step=0.5, value=30
+                        )
+                        # Random Seed
+                        seed_flux = gr.Slider(
+                            label="Seed", minimum=-1, maximum=10000, step=1, value=42
+                        )
+                        show_type = gr.Radio(
+                            label="Show Type",
+                            choices=["result only", "input & result", "input & mask & result"],
+                            value="input & mask & result",
+                        )
+                with gr.Column(scale=2, min_width=500):
+                    result_image_flux = gr.Image(interactive=False, label="Result")
+                    with gr.Row():
+                        # Photo Examples
+                        root_path = "resource/demo/example"
+                        with gr.Column():
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "men", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "men"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path_flux,
+                                label="Person Examples ①",
+                            )
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "women", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "women"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path_flux,
+                                label="Person Examples ②",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Person examples come from the demos of <a href="https://huggingface.co/spaces/levihsu/OOTDiffusion">OOTDiffusion</a> and <a href="https://www.outfitanyone.org">OutfitAnyone</a>. </span>'
+                            )
+                        with gr.Column():
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "upper", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "upper"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_flux,
+                                label="Condition Upper Examples",
+                            )
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "overall", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "overall"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_flux,
+                                label="Condition Overall Examples",
+                            )
+                            condition_person_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "person", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "person"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_flux,
+                                label="Condition Reference Person Examples",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Condition examples come from the Internet. </span>'
+                            )
+                image_path_flux.change(
+                    person_example_fn, inputs=image_path_flux, outputs=person_image_flux
+                )
+                submit_flux.click(
+                    submit_function_flux,
+                    [person_image_flux, cloth_image_flux, cloth_type, num_inference_steps_flux, guidance_scale_flux, seed_flux, show_type],
+                    result_image_flux,
+                )
+        with gr.Tab("Mask-free & SD1.5"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=350):
+                    with gr.Row():
+                        image_path_p2p = gr.Image(
+                            type="filepath",
+                            interactive=True,
+                            visible=False,
+                        )
+                        person_image_p2p = gr.ImageEditor(
+                            interactive=True, label="Person Image", type="filepath"
+                        )
+                    with gr.Row():
+                        with gr.Column(scale=1, min_width=230):
+                            cloth_image_p2p = gr.Image(
+                                interactive=True, label="Condition Image", type="filepath"
+                            )
+                    submit_p2p = gr.Button("Submit")
+                    gr.Markdown(
+                        '<center><span style="color: #FF0000">!!! Click only Once, Wait for Delay !!!</span></center>'
+                    )
+                    gr.Markdown(
+                        '<span style="color: #808080; font-size: small;">Advanced options can adjust details:<br>1. `Inference Step` may enhance details;<br>2. `CFG` is highly correlated with saturation;<br>3. `Random seed` may improve pseudo-shadow.</span>'
+                    )
+                    with gr.Accordion("Advanced Options", open=False):
+                        num_inference_steps_p2p = gr.Slider(
+                            label="Inference Step", minimum=10, maximum=100, step=5, value=50
+                        )
+                        # Guidence Scale
+                        guidance_scale_p2p = gr.Slider(
+                            label="CFG Strenth", minimum=0.0, maximum=7.5, step=0.5, value=2.5
+                        )
+                        # Random Seed
+                        seed_p2p = gr.Slider(
+                            label="Seed", minimum=-1, maximum=10000, step=1, value=42
+                        )
+                        # show_type = gr.Radio(
+                        #     label="Show Type",
+                        #     choices=["result only", "input & result", "input & mask & result"],
+                        #     value="input & mask & result",
+                        # )
+                with gr.Column(scale=2, min_width=500):
+                    result_image_p2p = gr.Image(interactive=False, label="Result")
+                    with gr.Row():
+                        # Photo Examples
+                        root_path = "resource/demo/example"
+                        with gr.Column():
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "men", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "men"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path_p2p,
+                                label="Person Examples ①",
+                            )
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "person", "women", _)
+                                    for _ in os.listdir(os.path.join(root_path, "person", "women"))
+                                ],
+                                examples_per_page=4,
+                                inputs=image_path_p2p,
+                                label="Person Examples ②",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Person examples come from the demos of <a href="https://huggingface.co/spaces/levihsu/OOTDiffusion">OOTDiffusion</a> and <a href="https://www.outfitanyone.org">OutfitAnyone</a>. </span>'
+                            )
+                        with gr.Column():
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "upper", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "upper"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_p2p,
+                                label="Condition Upper Examples",
+                            )
+                            gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "overall", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "overall"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_p2p,
+                                label="Condition Overall Examples",
+                            )
+                            condition_person_exm = gr.Examples(
+                                examples=[
+                                    os.path.join(root_path, "condition", "person", _)
+                                    for _ in os.listdir(os.path.join(root_path, "condition", "person"))
+                                ],
+                                examples_per_page=4,
+                                inputs=cloth_image_p2p,
+                                label="Condition Reference Person Examples",
+                            )
+                            gr.Markdown(
+                                '<span style="color: #808080; font-size: small;">*Condition examples come from the Internet. </span>'
+                            )
+                image_path_p2p.change(
+                    person_example_fn, inputs=image_path_p2p, outputs=person_image_p2p
+                )
+                submit_p2p.click(
+                    submit_function_p2p,
+                    [
+                        person_image_p2p,
+                        cloth_image_p2p,
+                        num_inference_steps_p2p,
+                        guidance_scale_p2p,
+                        seed_p2p],
+                    result_image_p2p,
+                )
+    demo.queue().launch(share=True, show_error=True)
+if __name__ == "__main__":
+    app_gradio()

CatVTON/densepose/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from .data.datasets import builtin  # just to register data
+from .converters import builtin as builtin_converters  # register converters
+from .config import (
+    add_densepose_config,
+    add_densepose_head_config,
+    add_hrnet_config,
+    add_dataset_category_config,
+    add_bootstrap_config,
+    load_bootstrap_config,
+)
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+from .evaluation import DensePoseCOCOEvaluator
+from .modeling.roi_heads import DensePoseROIHeads
+from .modeling.test_time_augmentation import (
+    DensePoseGeneralizedRCNNWithTTA,
+    DensePoseDatasetMapperTTA,
+)
+from .utils.transform import load_from_cfg
+from .modeling.hrfpn import build_hrfpn_backbone

CatVTON/densepose/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (925 Bytes). View file

CatVTON/densepose/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (5.82 kB). View file

CatVTON/densepose/config.py ADDED Viewed

	@@ -0,0 +1,277 @@

+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-ignore-all-errors
+from detectron2.config import CfgNode as CN
+def add_dataset_category_config(cfg: CN) -> None:
+    """
+    Add config for additional category-related dataset options
+     - category whitelisting
+     - category mapping
+    """
+    _C = cfg
+    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+    # class to mesh mapping
+    _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
+def add_evaluation_config(cfg: CN) -> None:
+    _C = cfg
+    _C.DENSEPOSE_EVALUATION = CN()
+    # evaluator type, possible values:
+    #  - "iou": evaluator for models that produce iou data
+    #  - "cse": evaluator for models that produce cse data
+    _C.DENSEPOSE_EVALUATION.TYPE = "iou"
+    # storage for DensePose results, possible values:
+    #  - "none": no explicit storage, all the results are stored in the
+    #            dictionary with predictions, memory intensive;
+    #            historically the default storage type
+    #  - "ram": RAM storage, uses per-process RAM storage, which is
+    #           reduced to a single process storage on later stages,
+    #           less memory intensive
+    #  - "file": file storage, uses per-process file-based storage,
+    #            the least memory intensive, but may create bottlenecks
+    #            on file system accesses
+    _C.DENSEPOSE_EVALUATION.STORAGE = "none"
+    # minimum threshold for IOU values: the lower its values is,
+    # the more matches are produced (and the higher the AP score)
+    _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
+    # Non-distributed inference is slower (at inference time) but can avoid RAM OOM
+    _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
+    # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
+    _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
+    # meshes to compute mesh alignment for
+    _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
+def add_bootstrap_config(cfg: CN) -> None:
+    """ """
+    _C = cfg
+    _C.BOOTSTRAP_DATASETS = []
+    _C.BOOTSTRAP_MODEL = CN()
+    _C.BOOTSTRAP_MODEL.WEIGHTS = ""
+    _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
+def get_bootstrap_dataset_config() -> CN:
+    _C = CN()
+    _C.DATASET = ""
+    # ratio used to mix data loaders
+    _C.RATIO = 0.1
+    # image loader
+    _C.IMAGE_LOADER = CN(new_allowed=True)
+    _C.IMAGE_LOADER.TYPE = ""
+    _C.IMAGE_LOADER.BATCH_SIZE = 4
+    _C.IMAGE_LOADER.NUM_WORKERS = 4
+    _C.IMAGE_LOADER.CATEGORIES = []
+    _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
+    _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
+    # inference
+    _C.INFERENCE = CN()
+    # batch size for model inputs
+    _C.INFERENCE.INPUT_BATCH_SIZE = 4
+    # batch size to group model outputs
+    _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
+    # sampled data
+    _C.DATA_SAMPLER = CN(new_allowed=True)
+    _C.DATA_SAMPLER.TYPE = ""
+    _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
+    # filter
+    _C.FILTER = CN(new_allowed=True)
+    _C.FILTER.TYPE = ""
+    return _C
+def load_bootstrap_config(cfg: CN) -> None:
+    """
+    Bootstrap datasets are given as a list of `dict` that are not automatically
+    converted into CfgNode. This method processes all bootstrap dataset entries
+    and ensures that they are in CfgNode format and comply with the specification
+    """
+    if not cfg.BOOTSTRAP_DATASETS:
+        return
+    bootstrap_datasets_cfgnodes = []
+    for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
+        _C = get_bootstrap_dataset_config().clone()
+        _C.merge_from_other_cfg(CN(dataset_cfg))
+        bootstrap_datasets_cfgnodes.append(_C)
+    cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
+def add_densepose_head_cse_config(cfg: CN) -> None:
+    """
+    Add configuration options for Continuous Surface Embeddings (CSE)
+    """
+    _C = cfg
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
+    # Dimensionality D of the embedding space
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
+    # Embedder specifications for various mesh IDs
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
+    # normalization coefficient for embedding distances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
+    # normalization coefficient for geodesic distances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
+    # embedding loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
+    # embedding loss name, currently the following options are supported:
+    # - EmbeddingLoss: cross-entropy on vertex labels
+    # - SoftEmbeddingLoss: cross-entropy on vertex label combined with
+    #    Gaussian penalty on distance between vertices
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
+    # optimizer hyperparameters
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
+    # Shape to shape cycle consistency loss parameters:
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+    # shape to shape cycle consistency loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
+    # norm type used for loss computation
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+    # normalization term for embedding similarity matrices
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
+    # maximum number of vertices to include into shape to shape cycle loss
+    # if negative or zero, all vertices are considered
+    # if positive, random subset of vertices of given size is considered
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
+    # Pixel to shape cycle consistency loss parameters:
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+    # pixel to shape cycle consistency loss weight
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
+    # norm type used for loss computation
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+    # map images to all meshes and back (if false, use only gt meshes from the batch)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
+    # Randomly select at most this number of pixels from every instance
+    # if negative or zero, all vertices are considered
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
+    # normalization factor for pixel to pixel distances (higher value = smoother distribution)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
+def add_densepose_head_config(cfg: CN) -> None:
+    """
+    Add config for densepose head.
+    """
+    _C = cfg
+    _C.MODEL.DENSEPOSE_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+    # Number of parts used for point labels
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
+    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+    # Loss weights for annotation masks.(14 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+    # Loss weights for surface parts. (24 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+    # Loss weights for UV regression.
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+    # Coarse segmentation is trained using instance segmentation task data
+    _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
+    # For Decoder
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+    # For DeepLab head
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+    # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
+    # Some registered predictors:
+    #   "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
+    #   "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
+    #       and associated confidences for predefined charts (default)
+    #   "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
+    #       and associated confidences for CSE
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
+    # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
+    # Some registered losses:
+    #   "DensePoseChartLoss": loss for chart-based models that estimate
+    #      segmentation and UV coordinates
+    #   "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
+    #      segmentation, UV coordinates and the corresponding confidences (default)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
+    # Confidences
+    # Enable learning UV confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+    # UV confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+    # Enable learning segmentation confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
+    # Segmentation confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
+    # Statistical model type for confidence learning, possible values:
+    # - "iid_iso": statistically independent identically distributed residuals
+    #    with isotropic covariance
+    # - "indep_aniso": statistically independent residuals with anisotropic
+    #    covariances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
+    # List of angles for rotation in data augmentation during training
+    _C.INPUT.ROTATION_ANGLES = [0]
+    _C.TEST.AUG.ROTATION_ANGLES = ()  # Rotation TTA
+    add_densepose_head_cse_config(cfg)
+def add_hrnet_config(cfg: CN) -> None:
+    """
+    Add config for HRNet backbone.
+    """
+    _C = cfg
+    # For HigherHRNet w32
+    _C.MODEL.HRNET = CN()
+    _C.MODEL.HRNET.STEM_INPLANES = 64
+    _C.MODEL.HRNET.STAGE2 = CN()
+    _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
+    _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
+    _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
+    _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
+    _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE3 = CN()
+    _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
+    _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
+    _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+    _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+    _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE4 = CN()
+    _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
+    _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
+    _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+    _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+    _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.HRFPN = CN()
+    _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
+def add_densepose_config(cfg: CN) -> None:
+    add_densepose_head_config(cfg)
+    add_hrnet_config(cfg)
+    add_bootstrap_config(cfg)
+    add_dataset_category_config(cfg)
+    add_evaluation_config(cfg)

CatVTON/densepose/converters/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from .hflip import HFlipConverter
+from .to_mask import ToMaskConverter
+from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
+from .segm_to_mask import (
+    predictor_output_with_fine_and_coarse_segm_to_mask,
+    predictor_output_with_coarse_segm_to_mask,
+    resample_fine_and_coarse_segm_to_bbox,
+)
+from .chart_output_to_chart_result import (
+    densepose_chart_predictor_output_to_result,
+    densepose_chart_predictor_output_to_result_with_confidences,
+)
+from .chart_output_hflip import densepose_chart_predictor_output_hflip

CatVTON/densepose/converters/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (799 Bytes). View file

CatVTON/densepose/converters/__pycache__/base.cpython-39.pyc ADDED Viewed

Binary file (3.68 kB). View file

CatVTON/densepose/converters/__pycache__/builtin.cpython-39.pyc ADDED Viewed

Binary file (804 Bytes). View file

CatVTON/densepose/converters/__pycache__/chart_output_hflip.cpython-39.pyc ADDED Viewed

Binary file (1.95 kB). View file

CatVTON/densepose/converters/__pycache__/chart_output_to_chart_result.cpython-39.pyc ADDED Viewed

Binary file (6.03 kB). View file

CatVTON/densepose/converters/__pycache__/hflip.cpython-39.pyc ADDED Viewed

Binary file (1.35 kB). View file

CatVTON/densepose/converters/__pycache__/segm_to_mask.cpython-39.pyc ADDED Viewed

Binary file (5.75 kB). View file

CatVTON/densepose/converters/__pycache__/to_chart_result.cpython-39.pyc ADDED Viewed

Binary file (2.74 kB). View file

CatVTON/densepose/converters/__pycache__/to_mask.cpython-39.pyc ADDED Viewed

Binary file (1.76 kB). View file

CatVTON/densepose/converters/base.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any, Tuple, Type
+import torch
+class BaseConverter:
+    """
+    Converter base class to be reused by various converters.
+    Converter allows one to convert data from various source types to a particular
+    destination type. Each source type needs to register its converter. The
+    registration for each source type is valid for all descendants of that type.
+    """
+    @classmethod
+    def register(cls, from_type: Type, converter: Any = None):
+        """
+        Registers a converter for the specified type.
+        Can be used as a decorator (if converter is None), or called as a method.
+        Args:
+            from_type (type): type to register the converter for;
+                all instances of this type will use the same converter
+            converter (callable): converter to be registered for the given
+                type; if None, this method is assumed to be a decorator for the converter
+        """
+        if converter is not None:
+            cls._do_register(from_type, converter)
+        def wrapper(converter: Any) -> Any:
+            cls._do_register(from_type, converter)
+            return converter
+        return wrapper
+    @classmethod
+    def _do_register(cls, from_type: Type, converter: Any):
+        cls.registry[from_type] = converter  # pyre-ignore[16]
+    @classmethod
+    def _lookup_converter(cls, from_type: Type) -> Any:
+        """
+        Perform recursive lookup for the given type
+        to find registered converter. If a converter was found for some base
+        class, it gets registered for this class to save on further lookups.
+        Args:
+            from_type: type for which to find a converter
+        Return:
+            callable or None - registered converter or None
+                if no suitable entry was found in the registry
+        """
+        if from_type in cls.registry:  # pyre-ignore[16]
+            return cls.registry[from_type]
+        for base in from_type.__bases__:
+            converter = cls._lookup_converter(base)
+            if converter is not None:
+                cls._do_register(from_type, converter)
+                return converter
+        return None
+    @classmethod
+    def convert(cls, instance: Any, *args, **kwargs):
+        """
+        Convert an instance to the destination type using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            instance: source instance to convert to the destination type
+        Return:
+            An instance of the destination type obtained from the source instance
+            Raises KeyError, if no suitable converter found
+        """
+        instance_type = type(instance)
+        converter = cls._lookup_converter(instance_type)
+        if converter is None:
+            if cls.dst_type is None:  # pyre-ignore[16]
+                output_type_str = "itself"
+            else:
+                output_type_str = cls.dst_type
+            raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
+        return converter(instance, *args, **kwargs)
+IntTupleBox = Tuple[int, int, int, int]
+def make_int_box(box: torch.Tensor) -> IntTupleBox:
+    int_box = [0, 0, 0, 0]
+    int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
+    return int_box[0], int_box[1], int_box[2], int_box[3]

CatVTON/densepose/converters/builtin.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
+from . import (
+    HFlipConverter,
+    ToChartResultConverter,
+    ToChartResultConverterWithConfidences,
+    ToMaskConverter,
+    densepose_chart_predictor_output_hflip,
+    densepose_chart_predictor_output_to_result,
+    densepose_chart_predictor_output_to_result_with_confidences,
+    predictor_output_with_coarse_segm_to_mask,
+    predictor_output_with_fine_and_coarse_segm_to_mask,
+)
+ToMaskConverter.register(
+    DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
+)
+ToMaskConverter.register(
+    DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
+)
+ToChartResultConverter.register(
+    DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
+)
+ToChartResultConverterWithConfidences.register(
+    DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
+)
+HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)

CatVTON/densepose/converters/chart_output_hflip.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from dataclasses import fields
+import torch
+from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
+def densepose_chart_predictor_output_hflip(
+    densepose_predictor_output: DensePoseChartPredictorOutput,
+    transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+    """
+    Change  to take into account a Horizontal flip.
+    """
+    if len(densepose_predictor_output) > 0:
+        PredictorOutput = type(densepose_predictor_output)
+        output_dict = {}
+        for field in fields(densepose_predictor_output):
+            field_value = getattr(densepose_predictor_output, field.name)
+            # flip tensors
+            if isinstance(field_value, torch.Tensor):
+                setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
+        densepose_predictor_output = _flip_iuv_semantics_tensor(
+            densepose_predictor_output, transform_data
+        )
+        densepose_predictor_output = _flip_segm_semantics_tensor(
+            densepose_predictor_output, transform_data
+        )
+        for field in fields(densepose_predictor_output):
+            output_dict[field.name] = getattr(densepose_predictor_output, field.name)
+        return PredictorOutput(**output_dict)
+    else:
+        return densepose_predictor_output
+def _flip_iuv_semantics_tensor(
+    densepose_predictor_output: DensePoseChartPredictorOutput,
+    dp_transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+    point_label_symmetries = dp_transform_data.point_label_symmetries
+    uv_symmetries = dp_transform_data.uv_symmetries
+    N, C, H, W = densepose_predictor_output.u.shape
+    u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
+    v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
+    Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
+        None, :, None, None
+    ].expand(N, C - 1, H, W)
+    densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
+    densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
+    for el in ["fine_segm", "u", "v"]:
+        densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
+            :, point_label_symmetries, :, :
+        ]
+    return densepose_predictor_output
+def _flip_segm_semantics_tensor(
+    densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
+):
+    if densepose_predictor_output.coarse_segm.shape[1] > 2:
+        densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
+            :, dp_transform_data.mask_label_symmetries, :, :
+        ]
+    return densepose_predictor_output

CatVTON/densepose/converters/chart_output_to_chart_result.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Dict
+import torch
+from torch.nn import functional as F
+from detectron2.structures.boxes import Boxes, BoxMode
+from ..structures import (
+    DensePoseChartPredictorOutput,
+    DensePoseChartResult,
+    DensePoseChartResultWithConfidences,
+)
+from . import resample_fine_and_coarse_segm_to_bbox
+from .base import IntTupleBox, make_int_box
+def resample_uv_tensors_to_bbox(
+    u: torch.Tensor,
+    v: torch.Tensor,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+    """
+    Resamples U and V coordinate estimates for the given bounding box
+    Args:
+        u (tensor [1, C, H, W] of float): U coordinates
+        v (tensor [1, C, H, W] of float): V coordinates
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled U and V coordinates - a tensor [2, H, W] of float
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
+    v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
+    uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
+    for part_id in range(1, u_bbox.size(1)):
+        uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
+        uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
+    return uv
+def resample_uv_to_bbox(
+    predictor_output: DensePoseChartPredictorOutput,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+    """
+    Resamples U and V coordinate estimates for the given bounding box
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be resampled
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled U and V coordinates - a tensor [2, H, W] of float
+    """
+    return resample_uv_tensors_to_bbox(
+        predictor_output.u,
+        predictor_output.v,
+        labels,
+        box_xywh_abs,
+    )
+def densepose_chart_predictor_output_to_result(
+    predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResult:
+    """
+    Convert densepose chart predictor outputs to results
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be converted to results, must contain only 1 output
+        boxes (Boxes): bounding box that corresponds to the predictor output,
+            must contain only 1 bounding box
+    Return:
+       DensePose chart-based result (DensePoseChartResult)
+    """
+    assert len(predictor_output) == 1 and len(boxes) == 1, (
+        f"Predictor output to result conversion can operate only single outputs"
+        f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+    )
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    box_xywh = make_int_box(boxes_xywh_abs[0])
+    labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+    uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+    return DensePoseChartResult(labels=labels, uv=uv)
+def resample_confidences_to_bbox(
+    predictor_output: DensePoseChartPredictorOutput,
+    labels: torch.Tensor,
+    box_xywh_abs: IntTupleBox,
+) -> Dict[str, torch.Tensor]:
+    """
+    Resamples confidences for the given bounding box
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output to be resampled
+        labels (tensor [H, W] of long): labels obtained by resampling segmentation
+            outputs for the given bounding box
+        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+    Return:
+       Resampled confidences - a dict of [H, W] tensors of float
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    confidence_names = [
+        "sigma_1",
+        "sigma_2",
+        "kappa_u",
+        "kappa_v",
+        "fine_segm_confidence",
+        "coarse_segm_confidence",
+    ]
+    confidence_results = {key: None for key in confidence_names}
+    confidence_names = [
+        key for key in confidence_names if getattr(predictor_output, key) is not None
+    ]
+    confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
+    # assign data from channels that correspond to the labels
+    for key in confidence_names:
+        resampled_confidence = F.interpolate(
+            getattr(predictor_output, key),
+            (h, w),
+            mode="bilinear",
+            align_corners=False,
+        )
+        result = confidence_base.clone()
+        for part_id in range(1, predictor_output.u.size(1)):
+            if resampled_confidence.size(1) != predictor_output.u.size(1):
+                # confidence is not part-based, don't try to fill it part by part
+                continue
+            result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
+        if resampled_confidence.size(1) != predictor_output.u.size(1):
+            # confidence is not part-based, fill the data with the first channel
+            # (targeted for segmentation confidences that have only 1 channel)
+            result = resampled_confidence[0, 0]
+        confidence_results[key] = result
+    return confidence_results  # pyre-ignore[7]
+def densepose_chart_predictor_output_to_result_with_confidences(
+    predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResultWithConfidences:
+    """
+    Convert densepose chart predictor outputs to results
+    Args:
+        predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+            output with confidences to be converted to results, must contain only 1 output
+        boxes (Boxes): bounding box that corresponds to the predictor output,
+            must contain only 1 bounding box
+    Return:
+       DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
+    """
+    assert len(predictor_output) == 1 and len(boxes) == 1, (
+        f"Predictor output to result conversion can operate only single outputs"
+        f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+    )
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    box_xywh = make_int_box(boxes_xywh_abs[0])
+    labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+    uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+    confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
+    return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)

CatVTON/densepose/converters/hflip.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any
+from .base import BaseConverter
+class HFlipConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = None
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
+        """
+        Performs an horizontal flip on DensePose predictor outputs.
+        Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            predictor_outputs: DensePose predictor output to be converted to BitMasks
+            transform_data: Anything useful for the flip
+        Return:
+            An instance of the same type as predictor_outputs
+        """
+        return super(HFlipConverter, cls).convert(
+            predictor_outputs, transform_data, *args, **kwargs
+        )

CatVTON/densepose/converters/segm_to_mask.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any
+import torch
+from torch.nn import functional as F
+from detectron2.structures import BitMasks, Boxes, BoxMode
+from .base import IntTupleBox, make_int_box
+from .to_mask import ImageSizeType
+def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
+    """
+    Resample coarse segmentation tensor to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        coarse_segm: float tensor of shape [1, K, Hout, Wout]
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+    return labels
+def resample_fine_and_coarse_segm_tensors_to_bbox(
+    fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
+):
+    """
+    Resample fine and coarse segmentation tensors to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        fine_segm: float tensor of shape [1, C, Hout, Wout]
+        coarse_segm: float tensor of shape [1, K, Hout, Wout]
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    x, y, w, h = box_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    # coarse segmentation
+    coarse_segm_bbox = F.interpolate(
+        coarse_segm,
+        (h, w),
+        mode="bilinear",
+        align_corners=False,
+    ).argmax(dim=1)
+    # combined coarse and fine segmentation
+    labels = (
+        F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        * (coarse_segm_bbox > 0).long()
+    )
+    return labels
+def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
+    """
+    Resample fine and coarse segmentation outputs from a predictor to the given
+    bounding box and derive labels for each pixel of the bounding box
+    Args:
+        predictor_output: DensePose predictor output that contains segmentation
+            results to be resampled
+        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+            corner coordinates, width (W) and height (H)
+    Return:
+        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+    """
+    return resample_fine_and_coarse_segm_tensors_to_bbox(
+        predictor_output.fine_segm,
+        predictor_output.coarse_segm,
+        box_xywh_abs,
+    )
+def predictor_output_with_coarse_segm_to_mask(
+    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+    """
+    Convert predictor output with coarse and fine segmentation to a mask.
+    Assumes that predictor output has the following attributes:
+     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+         unnormalized scores for N instances; D is the number of coarse
+         segmentation labels, H and W is the resolution of the estimate
+    Args:
+        predictor_output: DensePose predictor output to be converted to mask
+        boxes (Boxes): bounding boxes that correspond to the DensePose
+            predictor outputs
+        image_size_hw (tuple [int, int]): image height Himg and width Wimg
+    Return:
+        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+        a mask of the size of the image for each instance
+    """
+    H, W = image_size_hw
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    N = len(boxes_xywh_abs)
+    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+    for i in range(len(boxes_xywh_abs)):
+        box_xywh = make_int_box(boxes_xywh_abs[i])
+        box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
+        x, y, w, h = box_xywh
+        masks[i, y : y + h, x : x + w] = box_mask
+    return BitMasks(masks)
+def predictor_output_with_fine_and_coarse_segm_to_mask(
+    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+    """
+    Convert predictor output with coarse and fine segmentation to a mask.
+    Assumes that predictor output has the following attributes:
+     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+         unnormalized scores for N instances; D is the number of coarse
+         segmentation labels, H and W is the resolution of the estimate
+     - fine_segm (tensor of size [N, C, H, W]): fine segmentation
+         unnormalized scores for N instances; C is the number of fine
+         segmentation labels, H and W is the resolution of the estimate
+    Args:
+        predictor_output: DensePose predictor output to be converted to mask
+        boxes (Boxes): bounding boxes that correspond to the DensePose
+            predictor outputs
+        image_size_hw (tuple [int, int]): image height Himg and width Wimg
+    Return:
+        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+        a mask of the size of the image for each instance
+    """
+    H, W = image_size_hw
+    boxes_xyxy_abs = boxes.tensor.clone()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    N = len(boxes_xywh_abs)
+    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+    for i in range(len(boxes_xywh_abs)):
+        box_xywh = make_int_box(boxes_xywh_abs[i])
+        labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
+        x, y, w, h = box_xywh
+        masks[i, y : y + h, x : x + w] = labels_i > 0
+    return BitMasks(masks)

CatVTON/densepose/converters/to_chart_result.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any
+from detectron2.structures import Boxes
+from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
+from .base import BaseConverter
+class ToChartResultConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = DensePoseChartResult
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
+        """
+        Convert DensePose predictor outputs to DensePoseResult using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output to be
+                converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+        Return:
+            An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+        """
+        return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
+class ToChartResultConverterWithConfidences(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to DensePose results.
+    Each DensePose predictor output type has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = DensePoseChartResultWithConfidences
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(
+        cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
+    ) -> DensePoseChartResultWithConfidences:
+        """
+        Convert DensePose predictor outputs to DensePoseResult with confidences
+        using some registered converter. Does recursive lookup for base classes,
+        so there's no need for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output with confidences
+                to be converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+        Return:
+            An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+        """
+        return super(ToChartResultConverterWithConfidences, cls).convert(
+            predictor_outputs, boxes, *args, **kwargs
+        )

CatVTON/densepose/converters/to_mask.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any, Tuple
+from detectron2.structures import BitMasks, Boxes
+from .base import BaseConverter
+ImageSizeType = Tuple[int, int]
+class ToMaskConverter(BaseConverter):
+    """
+    Converts various DensePose predictor outputs to masks
+    in bit mask format (see `BitMasks`). Each DensePose predictor output type
+    has to register its convertion strategy.
+    """
+    registry = {}
+    dst_type = BitMasks
+    @classmethod
+    # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+    #  inconsistently.
+    def convert(
+        cls,
+        densepose_predictor_outputs: Any,
+        boxes: Boxes,
+        image_size_hw: ImageSizeType,
+        *args,
+        **kwargs
+    ) -> BitMasks:
+        """
+        Convert DensePose predictor outputs to BitMasks using some registered
+        converter. Does recursive lookup for base classes, so there's no need
+        for explicit registration for derived classes.
+        Args:
+            densepose_predictor_outputs: DensePose predictor output to be
+                converted to BitMasks
+            boxes (Boxes): bounding boxes that correspond to the DensePose
+                predictor outputs
+            image_size_hw (tuple [int, int]): image height and width
+        Return:
+            An instance of `BitMasks`. If no suitable converter was found, raises KeyError
+        """
+        return super(ToMaskConverter, cls).convert(
+            densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
+        )

CatVTON/densepose/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from .trainer import Trainer

CatVTON/densepose/engine/trainer.py ADDED Viewed

	@@ -0,0 +1,260 @@

+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# pyre-unsafe
+import logging
+import os
+from collections import OrderedDict
+from typing import List, Optional, Union
+import torch
+from torch import nn
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode
+from detectron2.engine import DefaultTrainer
+from detectron2.evaluation import (
+    DatasetEvaluator,
+    DatasetEvaluators,
+    inference_on_dataset,
+    print_csv_format,
+)
+from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
+from detectron2.utils import comm
+from detectron2.utils.events import EventWriter, get_event_storage
+from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
+from densepose.data import (
+    DatasetMapper,
+    build_combined_loader,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    build_inference_based_loaders,
+    has_inference_based_loaders,
+)
+from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
+from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
+from densepose.modeling.cse import Embedder
+class SampleCountingLoader:
+    def __init__(self, loader):
+        self.loader = loader
+    def __iter__(self):
+        it = iter(self.loader)
+        storage = get_event_storage()
+        while True:
+            try:
+                batch = next(it)
+                num_inst_per_dataset = {}
+                for data in batch:
+                    dataset_name = data["dataset"]
+                    if dataset_name not in num_inst_per_dataset:
+                        num_inst_per_dataset[dataset_name] = 0
+                    num_inst = len(data["instances"])
+                    num_inst_per_dataset[dataset_name] += num_inst
+                for dataset_name in num_inst_per_dataset:
+                    storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
+                yield batch
+            except StopIteration:
+                break
+class SampleCountMetricPrinter(EventWriter):
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+    def write(self):
+        storage = get_event_storage()
+        batch_stats_strs = []
+        for key, buf in storage.histories().items():
+            if key.startswith("batch/"):
+                batch_stats_strs.append(f"{key} {buf.avg(20)}")
+        self.logger.info(", ".join(batch_stats_strs))
+class Trainer(DefaultTrainer):
+    @classmethod
+    def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
+        if isinstance(model, nn.parallel.DistributedDataParallel):
+            model = model.module
+        if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
+            return model.roi_heads.embedder
+        return None
+    # TODO: the only reason to copy the base class code here is to pass the embedder from
+    # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
+    @classmethod
+    def test(
+        cls,
+        cfg: CfgNode,
+        model: nn.Module,
+        evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
+    ):
+        """
+        Args:
+            cfg (CfgNode):
+            model (nn.Module):
+            evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
+                :meth:`build_evaluator`. Otherwise, must have the same length as
+                ``cfg.DATASETS.TEST``.
+        Returns:
+            dict: a dict of result metrics
+        """
+        logger = logging.getLogger(__name__)
+        if isinstance(evaluators, DatasetEvaluator):
+            evaluators = [evaluators]
+        if evaluators is not None:
+            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+                len(cfg.DATASETS.TEST), len(evaluators)
+            )
+        results = OrderedDict()
+        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+            data_loader = cls.build_test_loader(cfg, dataset_name)
+            # When evaluators are passed in as arguments,
+            # implicitly assume that evaluators can be created before data_loader.
+            if evaluators is not None:
+                evaluator = evaluators[idx]
+            else:
+                try:
+                    embedder = cls.extract_embedder_from_model(model)
+                    evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
+                except NotImplementedError:
+                    logger.warn(
+                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+                        "or implement its `build_evaluator` method."
+                    )
+                    results[dataset_name] = {}
+                    continue
+            if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
+                results_i = inference_on_dataset(model, data_loader, evaluator)
+            else:
+                results_i = {}
+            results[dataset_name] = results_i
+            if comm.is_main_process():
+                assert isinstance(
+                    results_i, dict
+                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+                    results_i
+                )
+                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+                print_csv_format(results_i)
+        if len(results) == 1:
+            results = list(results.values())[0]
+        return results
+    @classmethod
+    def build_evaluator(
+        cls,
+        cfg: CfgNode,
+        dataset_name: str,
+        output_folder: Optional[str] = None,
+        embedder: Optional[Embedder] = None,
+    ) -> DatasetEvaluators:
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluators = []
+        distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
+        # Note: we currently use COCO evaluator for both COCO and LVIS datasets
+        # to have compatible metrics. LVIS bbox evaluator could also be used
+        # with an adapter to properly handle filtered / mapped categories
+        # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+        # if evaluator_type == "coco":
+        #     evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+        # elif evaluator_type == "lvis":
+        #     evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
+        evaluators.append(
+            Detectron2COCOEvaluatorAdapter(
+                dataset_name, output_dir=output_folder, distributed=distributed
+            )
+        )
+        if cfg.MODEL.DENSEPOSE_ON:
+            storage = build_densepose_evaluator_storage(cfg, output_folder)
+            evaluators.append(
+                DensePoseCOCOEvaluator(
+                    dataset_name,
+                    distributed,
+                    output_folder,
+                    evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
+                    min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
+                    storage=storage,
+                    embedder=embedder,
+                    should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
+                    mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
+                )
+            )
+        return DatasetEvaluators(evaluators)
+    @classmethod
+    def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
+        params = get_default_optimizer_params(
+            model,
+            base_lr=cfg.SOLVER.BASE_LR,
+            weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
+            bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
+            weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
+            overrides={
+                "features": {
+                    "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
+                },
+                "embeddings": {
+                    "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
+                },
+            },
+        )
+        optimizer = torch.optim.SGD(
+            params,
+            cfg.SOLVER.BASE_LR,
+            momentum=cfg.SOLVER.MOMENTUM,
+            nesterov=cfg.SOLVER.NESTEROV,
+            weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+        )
+        # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
+        return maybe_add_gradient_clipping(cfg, optimizer)
+    @classmethod
+    def build_test_loader(cls, cfg: CfgNode, dataset_name):
+        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+    @classmethod
+    def build_train_loader(cls, cfg: CfgNode):
+        data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+        if not has_inference_based_loaders(cfg):
+            return data_loader
+        model = cls.build_model(cfg)
+        model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
+        DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
+        inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
+        loaders = [data_loader] + inference_based_loaders
+        ratios = [1.0] + ratios
+        combined_data_loader = build_combined_loader(cfg, loaders, ratios)
+        sample_counting_loader = SampleCountingLoader(combined_data_loader)
+        return sample_counting_loader
+    def build_writers(self):
+        writers = super().build_writers()
+        writers.append(SampleCountMetricPrinter())
+        return writers
+    @classmethod
+    def test_with_TTA(cls, cfg: CfgNode, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        transform_data = load_from_cfg(cfg)
+        model = DensePoseGeneralizedRCNNWithTTA(
+            cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
+        )
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)  # pyre-ignore[6]
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res

CatVTON/densepose/modeling/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from .filter import DensePoseDataFilter
+from .inference import densepose_inference
+from .utils import initialize_module_params
+from .build import (
+    build_densepose_data_filter,
+    build_densepose_embedder,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+)

CatVTON/densepose/modeling/build.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Optional
+from torch import nn
+from detectron2.config import CfgNode
+from .cse.embedder import Embedder
+from .filter import DensePoseDataFilter
+def build_densepose_predictor(cfg: CfgNode, input_channels: int):
+    """
+    Create an instance of DensePose predictor based on configuration options.
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose predictor
+    """
+    from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
+    predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
+    return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
+def build_densepose_data_filter(cfg: CfgNode):
+    """
+    Build DensePose data filter which selects data for training
+    Args:
+        cfg (CfgNode): configuration options
+    Return:
+        Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
+        An instance of DensePose filter, which takes feature tensors and proposals
+        as an input and returns filtered features and proposals
+    """
+    dp_filter = DensePoseDataFilter(cfg)
+    return dp_filter
+def build_densepose_head(cfg: CfgNode, input_channels: int):
+    """
+    Build DensePose head based on configurations options
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose head
+    """
+    from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
+    head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
+    return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
+def build_densepose_losses(cfg: CfgNode):
+    """
+    Build DensePose loss based on configurations options
+    Args:
+        cfg (CfgNode): configuration options
+    Return:
+        An instance of DensePose loss
+    """
+    from .losses import DENSEPOSE_LOSS_REGISTRY
+    loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
+    return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
+def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
+    """
+    Build embedder used to embed mesh vertices into an embedding space.
+    Embedder contains sub-embedders, one for each mesh ID.
+    Args:
+        cfg (cfgNode): configuration options
+    Return:
+        Embedding module
+    """
+    if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
+        return Embedder(cfg)
+    return None

CatVTON/densepose/modeling/confidence.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from dataclasses import dataclass
+from enum import Enum
+from detectron2.config import CfgNode
+class DensePoseUVConfidenceType(Enum):
+    """
+    Statistical model type for confidence learning, possible values:
+     - "iid_iso": statistically independent identically distributed residuals
+         with anisotropic covariance
+     - "indep_aniso": statistically independent residuals with anisotropic
+         covariances
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+    # fmt: off
+    IID_ISO     = "iid_iso"
+    INDEP_ANISO = "indep_aniso"
+    # fmt: on
+@dataclass
+class DensePoseUVConfidenceConfig:
+    """
+    Configuration options for confidence on UV data
+    """
+    enabled: bool = False
+    # lower bound on UV confidences
+    epsilon: float = 0.01
+    type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
+@dataclass
+class DensePoseSegmConfidenceConfig:
+    """
+    Configuration options for confidence on segmentation
+    """
+    enabled: bool = False
+    # lower bound on confidence values
+    epsilon: float = 0.01
+@dataclass
+class DensePoseConfidenceModelConfig:
+    """
+    Configuration options for confidence models
+    """
+    # confidence for U and V values
+    uv_confidence: DensePoseUVConfidenceConfig
+    # segmentation confidence
+    segm_confidence: DensePoseSegmConfidenceConfig
+    @staticmethod
+    def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
+        return DensePoseConfidenceModelConfig(
+            uv_confidence=DensePoseUVConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
+                type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
+            ),
+            segm_confidence=DensePoseSegmConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
+            ),
+        )

CatVTON/densepose/modeling/densepose_checkpoint.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from collections import OrderedDict
+from detectron2.checkpoint import DetectionCheckpointer
+def _rename_HRNet_weights(weights):
+    # We detect and  rename HRNet weights for DensePose. 1956 and 1716 are values that are
+    # common to all HRNet pretrained weights, and should be enough to accurately identify them
+    if (
+        len(weights["model"].keys()) == 1956
+        and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
+    ):
+        hrnet_weights = OrderedDict()
+        for k in weights["model"].keys():
+            hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
+        return {"model": hrnet_weights}
+    else:
+        return weights
+class DensePoseCheckpointer(DetectionCheckpointer):
+    """
+    Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
+    """
+    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+        super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
+    def _load_file(self, filename: str) -> object:
+        """
+        Adding hrnet support
+        """
+        weights = super()._load_file(filename)
+        return _rename_HRNet_weights(weights)

CatVTON/densepose/modeling/filter.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import List
+import torch
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from detectron2.structures.boxes import matched_pairwise_iou
+class DensePoseDataFilter:
+    def __init__(self, cfg: CfgNode):
+        self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
+        self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+    @torch.no_grad()
+    def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
+        """
+        Filters proposals with targets to keep only the ones relevant for
+        DensePose training
+        Args:
+            features (list[Tensor]): input data as a list of features,
+                each feature is a tensor. Axis 0 represents the number of
+                images `N` in the input data; axes 1-3 are channels,
+                height, and width, which may vary between features
+                (e.g., if a feature pyramid is used).
+            proposals_with_targets (list[Instances]): length `N` list of
+                `Instances`. The i-th `Instances` contains instances
+                (proposals, GT) for the i-th input image,
+        Returns:
+            list[Tensor]: filtered features
+            list[Instances]: filtered proposals
+        """
+        proposals_filtered = []
+        # TODO: the commented out code was supposed to correctly deal with situations
+        # where no valid DensePose GT is available for certain images. The corresponding
+        # image features were sliced and proposals were filtered. This led to performance
+        # deterioration, both in terms of runtime and in terms of evaluation results.
+        #
+        # feature_mask = torch.ones(
+        #    len(proposals_with_targets),
+        #    dtype=torch.bool,
+        #    device=features[0].device if len(features) > 0 else torch.device("cpu"),
+        # )
+        for i, proposals_per_image in enumerate(proposals_with_targets):
+            if not proposals_per_image.has("gt_densepose") and (
+                not proposals_per_image.has("gt_masks") or not self.keep_masks
+            ):
+                # feature_mask[i] = 0
+                continue
+            gt_boxes = proposals_per_image.gt_boxes
+            est_boxes = proposals_per_image.proposal_boxes
+            # apply match threshold for densepose head
+            iou = matched_pairwise_iou(gt_boxes, est_boxes)
+            iou_select = iou > self.iou_threshold
+            proposals_per_image = proposals_per_image[iou_select]  # pyre-ignore[6]
+            N_gt_boxes = len(proposals_per_image.gt_boxes)
+            assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
+                f"The number of GT boxes {N_gt_boxes} is different from the "
+                f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
+            )
+            # filter out any target without suitable annotation
+            if self.keep_masks:
+                gt_masks = (
+                    proposals_per_image.gt_masks
+                    if hasattr(proposals_per_image, "gt_masks")
+                    else [None] * N_gt_boxes
+                )
+            else:
+                gt_masks = [None] * N_gt_boxes
+            gt_densepose = (
+                proposals_per_image.gt_densepose
+                if hasattr(proposals_per_image, "gt_densepose")
+                else [None] * N_gt_boxes
+            )
+            assert len(gt_masks) == N_gt_boxes
+            assert len(gt_densepose) == N_gt_boxes
+            selected_indices = [
+                i
+                for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
+                if (dp_target is not None) or (mask_target is not None)
+            ]
+            # if not len(selected_indices):
+            #     feature_mask[i] = 0
+            #     continue
+            if len(selected_indices) != N_gt_boxes:
+                proposals_per_image = proposals_per_image[selected_indices]  # pyre-ignore[6]
+            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
+            proposals_filtered.append(proposals_per_image)
+        # features_filtered = [feature[feature_mask] for feature in features]
+        # return features_filtered, proposals_filtered
+        return features, proposals_filtered

CatVTON/densepose/modeling/hrfpn.py ADDED Viewed

	@@ -0,0 +1,184 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+"""
+MIT License
+Copyright (c) 2019 Microsoft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+from .hrnet import build_pose_hrnet_backbone
+class HRFPN(Backbone):
+    """HRFPN (High Resolution Feature Pyramids)
+    Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
+    arXiv: https://arxiv.org/abs/1904.04514
+    Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
+    Args:
+        bottom_up: (list) output of HRNet
+        in_features (list): names of the input features (output of HRNet)
+        in_channels (list): number of channels for each branch
+        out_channels (int): output channels of feature pyramids
+        n_out_features (int): number of output stages
+        pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
+        share_conv (bool): Have one conv per output, or share one with all the outputs
+    """
+    def __init__(
+        self,
+        bottom_up,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    ):
+        super(HRFPN, self).__init__()
+        assert isinstance(in_channels, list)
+        self.bottom_up = bottom_up
+        self.in_features = in_features
+        self.n_out_features = n_out_features
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_ins = len(in_channels)
+        self.share_conv = share_conv
+        if self.share_conv:
+            self.fpn_conv = nn.Conv2d(
+                in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
+            )
+        else:
+            self.fpn_conv = nn.ModuleList()
+            for _ in range(self.n_out_features):
+                self.fpn_conv.append(
+                    nn.Conv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                    )
+                )
+        # Custom change: Replaces a simple bilinear interpolation
+        self.interp_conv = nn.ModuleList()
+        for i in range(len(self.in_features)):
+            self.interp_conv.append(
+                nn.Sequential(
+                    nn.ConvTranspose2d(
+                        in_channels=in_channels[i],
+                        out_channels=in_channels[i],
+                        kernel_size=4,
+                        stride=2**i,
+                        padding=0,
+                        output_padding=0,
+                        bias=False,
+                    ),
+                    nn.BatchNorm2d(in_channels[i], momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+        # Custom change: Replaces a couple (reduction conv + pooling) by one conv
+        self.reduction_pooling_conv = nn.ModuleList()
+        for i in range(self.n_out_features):
+            self.reduction_pooling_conv.append(
+                nn.Sequential(
+                    nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
+                    nn.BatchNorm2d(out_channels, momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+        if pooling == "MAX":
+            self.pooling = F.max_pool2d
+        else:
+            self.pooling = F.avg_pool2d
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+        for i in range(self.n_out_features):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
+            self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, a=1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, inputs):
+        bottom_up_features = self.bottom_up(inputs)
+        assert len(bottom_up_features) == len(self.in_features)
+        inputs = [bottom_up_features[f] for f in self.in_features]
+        outs = []
+        for i in range(len(inputs)):
+            outs.append(self.interp_conv[i](inputs[i]))
+        shape_2 = min(o.shape[2] for o in outs)
+        shape_3 = min(o.shape[3] for o in outs)
+        out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
+        outs = []
+        for i in range(self.n_out_features):
+            outs.append(self.reduction_pooling_conv[i](out))
+        for i in range(len(outs)):  # Make shapes consistent
+            outs[-1 - i] = outs[-1 - i][
+                :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
+            ]
+        outputs = []
+        for i in range(len(outs)):
+            if self.share_conv:
+                outputs.append(self.fpn_conv(outs[i]))
+            else:
+                outputs.append(self.fpn_conv[i](outs[i]))
+        assert len(self._out_features) == len(outputs)
+        return dict(zip(self._out_features, outputs))
+@BACKBONE_REGISTRY.register()
+def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
+    in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+    in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
+    n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
+    out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
+    hrnet = build_pose_hrnet_backbone(cfg, input_shape)
+    hrfpn = HRFPN(
+        hrnet,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    )
+    return hrfpn

CatVTON/densepose/modeling/hrnet.py ADDED Viewed

	@@ -0,0 +1,476 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (leoxiaobin@gmail.com)
+# Modified by Bowen Cheng (bcheng9@illinois.edu)
+# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py  # noqa
+# ------------------------------------------------------------------------------
+# pyre-unsafe
+from __future__ import absolute_import, division, print_function
+import logging
+import torch.nn as nn
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class HighResolutionModule(nn.Module):
+    """HighResolutionModule
+    Building block of the PoseHigherResolutionNet (see lower)
+    arXiv: https://arxiv.org/abs/1908.10357
+    Args:
+        num_branches (int): number of branches of the modyle
+        blocks (str): type of block of the module
+        num_blocks (int): number of blocks of the module
+        num_inchannels (int): number of input channels of the module
+        num_channels (list): number of channels of each branch
+        multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
+    """
+    def __init__(
+        self,
+        num_branches,
+        blocks,
+        num_blocks,
+        num_inchannels,
+        num_channels,
+        multi_scale_output=True,
+    ):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+    def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_channels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+                num_branches, len(num_channels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_inchannels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+                num_branches, len(num_inchannels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+        downsample = None
+        if (
+            stride != 1
+            or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
+        ):
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(
+            block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
+        )
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+        for _ in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+                            nn.BatchNorm2d(num_inchannels[i]),
+                            nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True),
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
+                    y = y + z
+            x_fuse.append(self.relu(y))
+        return x_fuse
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+class PoseHigherResolutionNet(Backbone):
+    """PoseHigherResolutionNet
+    Composed of several HighResolutionModule tied together with ConvNets
+    Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
+    arXiv: https://arxiv.org/abs/1908.10357
+    """
+    def __init__(self, cfg, **kwargs):
+        self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
+        super(PoseHigherResolutionNet, self).__init__()
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+        self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
+        num_channels = self.stage2_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage2_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+        self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
+        num_channels = self.stage3_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage3_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+        self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
+        num_channels = self.stage4_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage4_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True
+        )
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+        for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update(
+                {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
+            )
+            self._out_feature_strides.update({self._out_features[-1]: 1})
+    def _get_deconv_cfg(self, deconv_kernel):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        return deconv_kernel, padding, output_padding
+    def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3,
+                                1,
+                                1,
+                                bias=False,
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config["NUM_MODULES"]
+        num_branches = layer_config["NUM_BRANCHES"]
+        num_blocks = layer_config["NUM_BLOCKS"]
+        num_channels = layer_config["NUM_CHANNELS"]
+        block = blocks_dict[layer_config["BLOCK"]]
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_cfg.NUM_BRANCHES):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_cfg.NUM_BRANCHES):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_cfg.NUM_BRANCHES):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+        assert len(self._out_features) == len(y_list)
+        return dict(zip(self._out_features, y_list))  # final_outputs
+@BACKBONE_REGISTRY.register()
+def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
+    model = PoseHigherResolutionNet(cfg)
+    return model

CatVTON/densepose/modeling/inference.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from dataclasses import fields
+from typing import Any, List
+import torch
+from detectron2.structures import Instances
+def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
+    """
+    Splits DensePose predictor outputs into chunks, each chunk corresponds to
+    detections on one image. Predictor output chunks are stored in `pred_densepose`
+    attribute of the corresponding `Instances` object.
+    Args:
+        densepose_predictor_output: a dataclass instance (can be of different types,
+            depending on predictor used for inference). Each field can be `None`
+            (if the corresponding output was not inferred) or a tensor of size
+            [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
+            detections on all images, N_1 is the number of detections on image 1,
+            N_2 is the number of detections on image 2, etc.
+        detections: a list of objects of type `Instance`, k-th object corresponds
+            to detections on k-th image.
+    """
+    k = 0
+    for detection_i in detections:
+        if densepose_predictor_output is None:
+            # don't add `pred_densepose` attribute
+            continue
+        n_i = detection_i.__len__()
+        PredictorOutput = type(densepose_predictor_output)
+        output_i_dict = {}
+        # we assume here that `densepose_predictor_output` is a dataclass object
+        for field in fields(densepose_predictor_output):
+            field_value = getattr(densepose_predictor_output, field.name)
+            # slice tensors
+            if isinstance(field_value, torch.Tensor):
+                output_i_dict[field.name] = field_value[k : k + n_i]
+            # leave others as is
+            else:
+                output_i_dict[field.name] = field_value
+        detection_i.pred_densepose = PredictorOutput(**output_i_dict)
+        k += n_i

CatVTON/densepose/modeling/test_time_augmentation.py ADDED Viewed

	@@ -0,0 +1,209 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+import copy
+import numpy as np
+import torch
+from fvcore.transforms import HFlipTransform, TransformList
+from torch.nn import functional as F
+from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
+from ..converters import HFlipConverter
+class DensePoseDatasetMapperTTA(DatasetMapperTTA):
+    def __init__(self, cfg):
+        super().__init__(cfg=cfg)
+        self.angles = cfg.TEST.AUG.ROTATION_ANGLES
+    def __call__(self, dataset_dict):
+        ret = super().__call__(dataset_dict=dataset_dict)
+        numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
+        for angle in self.angles:
+            rotate = RandomRotation(angle=angle, expand=True)
+            new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
+            torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
+            dic = copy.deepcopy(dataset_dict)
+            # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
+            # added at the beginning of each TransformList. That's '.transforms[0]'.
+            dic["transforms"] = TransformList(
+                [ret[-1]["transforms"].transforms[0]] + tfms.transforms
+            )
+            dic["image"] = torch_image
+            ret.append(dic)
+        return ret
+class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
+    def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            transform_data (DensePoseTransformData): contains symmetry label
+                transforms used for horizontal flip
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        self._transform_data = transform_data.to(model.device)
+        super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
+    # the implementation follows closely the one from detectron2/modeling
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict with "image" field being a CHW tensor
+        Returns:
+            dict: one output dict
+        """
+        orig_shape = (input["height"], input["width"])
+        # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
+        input["image"] = input["image"].to(torch.uint8)
+        augmented_inputs, tfms = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
+        merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
+        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
+            # Use the detected boxes to obtain new fields
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, tfms
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(augmented_inputs, augmented_instances)
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances
+            # average the predictions
+            if self.cfg.MODEL.MASK_ON:
+                merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
+            if self.cfg.MODEL.DENSEPOSE_ON:
+                merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
+            # postprocess
+            merged_instances = detector_postprocess(merged_instances, *orig_shape)
+            return {"instances": merged_instances}
+        else:
+            return {"instances": merged_instances}
+    def _get_augmented_boxes(self, augmented_inputs, tfms):
+        # Heavily based on detectron2/modeling/test_time_augmentation.py
+        # Only difference is that RotationTransform is excluded from bbox computation
+        # 1: forward with all augmented images
+        outputs = self._batch_inference(augmented_inputs)
+        # 2: union the results
+        all_boxes = []
+        all_scores = []
+        all_classes = []
+        for output, tfm in zip(outputs, tfms):
+            # Need to inverse the transforms on boxes, to obtain results on original image
+            if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
+                # Some transforms can't compute bbox correctly
+                pred_boxes = output.pred_boxes.tensor
+                original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
+                all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
+                all_scores.extend(output.scores)
+                all_classes.extend(output.pred_classes)
+        all_boxes = torch.cat(all_boxes, dim=0)
+        return all_boxes, all_scores, all_classes
+    def _reduce_pred_densepose(self, outputs, tfms):
+        # Should apply inverse transforms on densepose preds.
+        # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
+        # representation, so we handle the other ones specially
+        for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
+            for t in tfm.transforms:
+                for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+                    setattr(
+                        output.pred_densepose,
+                        attr,
+                        _inverse_rotation(
+                            getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
+                        ),
+                    )
+            if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                output.pred_densepose = HFlipConverter.convert(
+                    output.pred_densepose, self._transform_data
+                )
+            self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
+        return outputs[0].pred_densepose
+    # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
+    def _incremental_avg_dp(self, avg, new_el, idx):
+        for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+            setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
+            if idx:
+                # Deletion of the > 0 index intermediary values to prevent GPU OOM
+                setattr(new_el, attr, None)
+        return avg
+def _inverse_rotation(densepose_attrs, boxes, transform):
+    # resample outputs to image size and rotate back the densepose preds
+    # on the rotated images to the space of the original image
+    if len(boxes) == 0 or not isinstance(transform, RotationTransform):
+        return densepose_attrs
+    boxes = boxes.int().cpu().numpy()
+    wh_boxes = boxes[:, 2:] - boxes[:, :2]  # bboxes in the rotated space
+    inv_boxes = rotate_box_inverse(transform, boxes).astype(int)  # bboxes in original image
+    wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2  # diff between new/old bboxes
+    rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
+    rotation_matrix[:, :, -1] = 0
+    # To apply grid_sample for rotation, we need to have enough space to fit the original and
+    # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
+    # crop the difference once the rotation is done
+    l_bds = np.maximum(0, -wh_diff)
+    for i in range(len(densepose_attrs)):
+        if min(wh_boxes[i]) <= 0:
+            continue
+        densepose_attr = densepose_attrs[[i]].clone()
+        # 1. Interpolate densepose attribute to size of the rotated bbox
+        densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
+        # 2. Pad the interpolated attribute so it has room for the original + rotated bbox
+        densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
+        # 3. Compute rotation grid and transform
+        grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
+        densepose_attr = F.grid_sample(densepose_attr, grid)
+        # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
+        r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
+        densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
+        if min(densepose_attr.shape) > 0:
+            # Interpolate back to the original size of the densepose attribute
+            densepose_attr = F.interpolate(
+                densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
+            )
+            # Adding a very small probability to the background class to fill padded zones
+            densepose_attr[:, 0] += 1e-10
+            densepose_attrs[i] = densepose_attr
+    return densepose_attrs
+def rotate_box_inverse(rot_tfm, rotated_box):
+    """
+    rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
+    When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
+    So when a bbox is rotated then inverse-rotated, it is much bigger than the original
+    This function aims to invert the rotation on the box, but also resize it to its original size
+    """
+    # 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
+    invrot_box = rot_tfm.inverse().apply_box(rotated_box)
+    h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
+    ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
+    assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
+    # 2. Inverse the corresponding computation in the rotation transform
+    # to get the original height/width of the rotated boxes
+    orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+    orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+    # 3. Resize the inverse-rotated bboxes to their original size
+    invrot_box[:, 0] += (iw - orig_w) / 2
+    invrot_box[:, 1] += (ih - orig_h) / 2
+    invrot_box[:, 2] -= (iw - orig_w) / 2
+    invrot_box[:, 3] -= (ih - orig_h) / 2
+    return invrot_box

CatVTON/densepose/modeling/utils.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from torch import nn
+def initialize_module_params(module: nn.Module) -> None:
+    for name, param in module.named_parameters():
+        if "bias" in name:
+            nn.init.constant_(param, 0)
+        elif "weight" in name:
+            nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")

CatVTON/densepose/utils/__init__.py ADDED Viewed

File without changes

CatVTON/densepose/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (160 Bytes). View file

CatVTON/densepose/utils/__pycache__/transform.cpython-39.pyc ADDED Viewed

Binary file (733 Bytes). View file

CatVTON/densepose/utils/dbhelper.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from typing import Any, Dict, Optional, Tuple
+class EntrySelector:
+    """
+    Base class for entry selectors
+    """
+    @staticmethod
+    def from_string(spec: str) -> "EntrySelector":
+        if spec == "*":
+            return AllEntrySelector()
+        return FieldEntrySelector(spec)
+class AllEntrySelector(EntrySelector):
+    """
+    Selector that accepts all entries
+    """
+    SPECIFIER = "*"
+    def __call__(self, entry):
+        return True
+class FieldEntrySelector(EntrySelector):
+    """
+    Selector that accepts only entries that match provided field
+    specifier(s). Only a limited set of specifiers is supported for now:
+      <specifiers>::=<specifier>[<comma><specifiers>]
+      <specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
+      <field_name> is a valid identifier
+      <type> ::= "int" | "str"
+      <equal> ::= "="
+      <comma> ::= ","
+      <type_delim> ::= ":"
+      <value_or_range> ::= <value> | <range>
+      <range> ::= <value><range_delim><value>
+      <range_delim> ::= "-"
+      <value> is a string without spaces and special symbols
+        (e.g. <comma>, <equal>, <type_delim>, <range_delim>)
+    """
+    _SPEC_DELIM = ","
+    _TYPE_DELIM = ":"
+    _RANGE_DELIM = "-"
+    _EQUAL = "="
+    _ERROR_PREFIX = "Invalid field selector specifier"
+    class _FieldEntryValuePredicate:
+        """
+        Predicate that checks strict equality for the specified entry field
+        """
+        def __init__(self, name: str, typespec: Optional[str], value: str):
+            import builtins
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.value = value
+        def __call__(self, entry):
+            return entry[self.name] == self.type(self.value)
+    class _FieldEntryRangePredicate:
+        """
+        Predicate that checks whether an entry field falls into the specified range
+        """
+        def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str):
+            import builtins
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.vmin = vmin
+            self.vmax = vmax
+        def __call__(self, entry):
+            return (entry[self.name] >= self.type(self.vmin)) and (
+                entry[self.name] <= self.type(self.vmax)
+            )
+    def __init__(self, spec: str):
+        self._predicates = self._parse_specifier_into_predicates(spec)
+    def __call__(self, entry: Dict[str, Any]):
+        for predicate in self._predicates:
+            if not predicate(entry):
+                return False
+        return True
+    def _parse_specifier_into_predicates(self, spec: str):
+        predicates = []
+        specs = spec.split(self._SPEC_DELIM)
+        for subspec in specs:
+            eq_idx = subspec.find(self._EQUAL)
+            if eq_idx > 0:
+                field_name_with_type = subspec[:eq_idx]
+                field_name, field_type = self._parse_field_name_type(field_name_with_type)
+                field_value_or_range = subspec[eq_idx + 1 :]
+                if self._is_range_spec(field_value_or_range):
+                    vmin, vmax = self._get_range_spec(field_value_or_range)
+                    predicate = FieldEntrySelector._FieldEntryRangePredicate(
+                        field_name, field_type, vmin, vmax
+                    )
+                else:
+                    predicate = FieldEntrySelector._FieldEntryValuePredicate(
+                        field_name, field_type, field_value_or_range
+                    )
+                predicates.append(predicate)
+            elif eq_idx == 0:
+                self._parse_error(f'"{subspec}", field name is empty!')
+            else:
+                self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
+        return predicates
+    def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
+        type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
+        if type_delim_idx > 0:
+            field_name = field_name_with_type[:type_delim_idx]
+            field_type = field_name_with_type[type_delim_idx + 1 :]
+        elif type_delim_idx == 0:
+            self._parse_error(f'"{field_name_with_type}", field name is empty!')
+        else:
+            field_name = field_name_with_type
+            field_type = None
+        # pyre-fixme[61]: `field_name` may not be initialized here.
+        # pyre-fixme[61]: `field_type` may not be initialized here.
+        return field_name, field_type
+    def _is_range_spec(self, field_value_or_range):
+        delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+        return delim_idx > 0
+    def _get_range_spec(self, field_value_or_range):
+        if self._is_range_spec(field_value_or_range):
+            delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+            vmin = field_value_or_range[:delim_idx]
+            vmax = field_value_or_range[delim_idx + 1 :]
+            return vmin, vmax
+        else:
+            self._parse_error('"field_value_or_range", range of values expected!')
+    def _parse_error(self, msg):
+        raise ValueError(f"{self._ERROR_PREFIX}: {msg}")

CatVTON/densepose/utils/logger.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+import logging
+def verbosity_to_level(verbosity) -> int:
+    if verbosity is not None:
+        if verbosity == 0:
+            return logging.WARNING
+        elif verbosity == 1:
+            return logging.INFO
+        elif verbosity >= 2:
+            return logging.DEBUG
+    return logging.WARNING

CatVTON/densepose/utils/transform.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-unsafe
+from detectron2.data import MetadataCatalog
+from detectron2.utils.file_io import PathManager
+from densepose import DensePoseTransformData
+def load_for_dataset(dataset_name):
+    path = MetadataCatalog.get(dataset_name).densepose_transform_src
+    densepose_transform_data_fpath = PathManager.get_local_path(path)
+    return DensePoseTransformData.load(densepose_transform_data_fpath)
+def load_from_cfg(cfg):
+    return load_for_dataset(cfg.DATASETS.TEST[0])

CatVTON/model/DensePose/__init__.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import glob
+import os
+from random import randint
+import shutil
+import time
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from densepose import add_densepose_config
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.densepose_results import DensePoseResultsFineSegmentationVisualizer
+from densepose.vis.extractor import create_extractor, CompoundExtractor
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.engine.defaults import DefaultPredictor
+class DensePose:
+    """
+    DensePose used in this project is from Detectron2 (https://github.com/facebookresearch/detectron2).
+    These codes are modified from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose.
+    The checkpoint is downloaded from https://github.com/facebookresearch/detectron2/blob/main/projects/DensePose/doc/DENSEPOSE_IUV.md#ModelZoo.
+    We use the model R_50_FPN_s1x with id 165712039, but other models should also work.
+    The config file is downloaded from https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose/configs.
+    Noted that the config file should match the model checkpoint and Base-DensePose-RCNN-FPN.yaml is also needed.
+    """
+    def __init__(self, model_path="./checkpoints/densepose_", device="cuda"):
+        self.device = device
+        self.config_path = os.path.join(model_path, 'densepose_rcnn_R_50_FPN_s1x.yaml')
+        self.model_path = os.path.join(model_path, 'model_final_162be9.pkl')
+        self.visualizations = ["dp_segm"]
+        self.VISUALIZERS = {"dp_segm": DensePoseResultsFineSegmentationVisualizer}
+        self.min_score = 0.8
+        self.cfg = self.setup_config()
+        self.predictor = DefaultPredictor(self.cfg)
+        self.predictor.model.to(self.device)
+    def setup_config(self):
+        opts = ["MODEL.ROI_HEADS.SCORE_THRESH_TEST", str(self.min_score)]
+        cfg = get_cfg()
+        add_densepose_config(cfg)
+        cfg.merge_from_file(self.config_path)
+        cfg.merge_from_list(opts)
+        cfg.MODEL.WEIGHTS = self.model_path
+        cfg.freeze()
+        return cfg
+    @staticmethod
+    def _get_input_file_list(input_spec: str):
+        if os.path.isdir(input_spec):
+            file_list = [os.path.join(input_spec, fname) for fname in os.listdir(input_spec)
+                         if os.path.isfile(os.path.join(input_spec, fname))]
+        elif os.path.isfile(input_spec):
+            file_list = [input_spec]
+        else:
+            file_list = glob.glob(input_spec)
+        return file_list
+    def create_context(self, cfg, output_path):
+        vis_specs = self.visualizations
+        visualizers = []
+        extractors = []
+        for vis_spec in vis_specs:
+            texture_atlas = texture_atlases_dict = None
+            vis = self.VISUALIZERS[vis_spec](
+                cfg=cfg,
+                texture_atlas=texture_atlas,
+                texture_atlases_dict=texture_atlases_dict,
+                alpha=1.0
+            )
+            visualizers.append(vis)
+            extractor = create_extractor(vis)
+            extractors.append(extractor)
+        visualizer = CompoundVisualizer(visualizers)
+        extractor = CompoundExtractor(extractors)
+        context = {
+            "extractor": extractor,
+            "visualizer": visualizer,
+            "out_fname": output_path,
+            "entry_idx": 0,
+        }
+        return context
+    def execute_on_outputs(self, context, entry, outputs):
+        extractor = context["extractor"]
+        data = extractor(outputs)
+        H, W, _ = entry["image"].shape
+        result = np.zeros((H, W), dtype=np.uint8)
+        data, box = data[0]
+        x, y, w, h = [int(_) for _ in box[0].cpu().numpy()]
+        i_array = data[0].labels[None].cpu().numpy()[0]
+        result[y:y + h, x:x + w] = i_array
+        result = Image.fromarray(result)
+        result.save(context["out_fname"])
+    def __call__(self, image_or_path, resize=512) -> Image.Image:
+        """
+        :param image_or_path: Path of the input image.
+        :param resize: Resize the input image if its max size is larger than this value.
+        :return: Dense pose image.
+        """
+        # random tmp path with timestamp
+        tmp_path = f"./densepose_/tmp/"
+        if not os.path.exists(tmp_path):
+            os.makedirs(tmp_path)
+        image_path = os.path.join(tmp_path, f"{int(time.time())}-{self.device}-{randint(0, 100000)}.png")
+        if isinstance(image_or_path, str):
+            assert image_or_path.split(".")[-1] in ["jpg", "png"], "Only support jpg and png images."
+            shutil.copy(image_or_path, image_path)
+        elif isinstance(image_or_path, Image.Image):
+            image_or_path.save(image_path)
+        else:
+            shutil.rmtree(tmp_path)
+            raise TypeError("image_path must be str or PIL.Image.Image")
+        output_path = image_path.replace(".png", "_dense.png").replace(".jpg", "_dense.png")
+        w, h = Image.open(image_path).size
+        file_list = self._get_input_file_list(image_path)
+        assert len(file_list), "No input images found!"
+        context = self.create_context(self.cfg, output_path)
+        for file_name in file_list:
+            img = read_image(file_name, format="BGR")  # predictor expects BGR image.
+            # resize
+            if (_ := max(img.shape)) > resize:
+                scale = resize / _
+                img = cv2.resize(img, (int(img.shape[1] * scale), int(img.shape[0] * scale)))
+            with torch.no_grad():
+                outputs = self.predictor(img)["instances"]
+                try:
+                    self.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
+                except Exception as e:
+                    null_gray = Image.new('L', (1, 1))
+                    null_gray.save(output_path)
+        dense_gray = Image.open(output_path).convert("L")
+        dense_gray = dense_gray.resize((w, h), Image.NEAREST)
+        # remove image_path and output_path
+        os.remove(image_path)
+        os.remove(output_path)
+        return dense_gray
+if __name__ == '__main__':
+    pass

CatVTON/model/DensePose/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (5.85 kB). View file

CatVTON/model/DensePose/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (8.91 kB). View file

CatVTON/model/DensePose/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (5.83 kB). View file

CatVTON/model/SCHP/__init__.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from model.SCHP import networks
+from model.SCHP.utils.transforms import get_affine_transform, transform_logits
+from collections import OrderedDict
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+from torchvision import transforms
+def get_palette(num_cls):
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    n = num_cls
+    palette = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        palette[j * 3 + 0] = 0
+        palette[j * 3 + 1] = 0
+        palette[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+            i += 1
+            lab >>= 3
+    return palette
+dataset_settings = {
+    'lip': {
+        'input_size': [473, 473],
+        'num_classes': 20,
+        'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat',
+                  'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm',
+                  'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
+    },
+    'atr': {
+        'input_size': [512, 512],
+        'num_classes': 18,
+        'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt',
+                  'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
+    },
+    'pascal': {
+        'input_size': [512, 512],
+        'num_classes': 7,
+        'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'],
+    }
+}
+class SCHP:
+    def __init__(self, ckpt_path, device):
+        dataset_type = None
+        if 'lip' in ckpt_path:
+            dataset_type = 'lip'
+        elif 'atr' in ckpt_path:
+            dataset_type = 'atr'
+        elif 'pascal' in ckpt_path:
+            dataset_type = 'pascal'
+        assert dataset_type is not None, 'Dataset type not found in checkpoint path'
+        self.device = device
+        self.num_classes = dataset_settings[dataset_type]['num_classes']
+        self.input_size = dataset_settings[dataset_type]['input_size']
+        self.aspect_ratio = self.input_size[1] * 1.0 / self.input_size[0]
+        self.palette = get_palette(self.num_classes)
+        self.label = dataset_settings[dataset_type]['label']
+        self.model = networks.init_model('resnet101', num_classes=self.num_classes, pretrained=None).to(device)
+        self.load_ckpt(ckpt_path)
+        self.model.eval()
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
+        ])
+        self.upsample = torch.nn.Upsample(size=self.input_size, mode='bilinear', align_corners=True)
+    def load_ckpt(self, ckpt_path):
+        rename_map = {
+            "decoder.conv3.2.weight": "decoder.conv3.3.weight",
+            "decoder.conv3.3.weight": "decoder.conv3.4.weight",
+            "decoder.conv3.3.bias": "decoder.conv3.4.bias",
+            "decoder.conv3.3.running_mean": "decoder.conv3.4.running_mean",
+            "decoder.conv3.3.running_var": "decoder.conv3.4.running_var",
+            "fushion.3.weight": "fushion.4.weight",
+            "fushion.3.bias": "fushion.4.bias",
+        }
+        state_dict = torch.load(ckpt_path, map_location='cpu')['state_dict']
+        new_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            name = k[7:]  # remove `module.`
+            new_state_dict[name] = v
+        new_state_dict_ = OrderedDict()
+        for k, v in list(new_state_dict.items()):
+            if k in rename_map:
+                new_state_dict_[rename_map[k]] = v
+            else:
+                new_state_dict_[k] = v
+        self.model.load_state_dict(new_state_dict_, strict=False)
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array([w, h], dtype=np.float32)
+        return center, scale
+    def preprocess(self, image):
+        if isinstance(image, str):
+            img = cv2.imread(image, cv2.IMREAD_COLOR)
+        elif isinstance(image, Image.Image):
+            # to cv2 format
+            img = np.array(image)
+        h, w, _ = img.shape
+        # Get person center and scale
+        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
+        r = 0
+        trans = get_affine_transform(person_center, s, r, self.input_size)
+        input = cv2.warpAffine(
+            img,
+            trans,
+            (int(self.input_size[1]), int(self.input_size[0])),
+            flags=cv2.INTER_LINEAR,
+            borderMode=cv2.BORDER_CONSTANT,
+            borderValue=(0, 0, 0))
+        input = self.transform(input).to(self.device).unsqueeze(0)
+        meta = {
+                'center': person_center,
+                'height': h,
+                'width': w,
+                'scale': s,
+                'rotation': r
+        }
+        return input, meta
+    def __call__(self, image_or_path):
+        if isinstance(image_or_path, list):
+            image_list = []
+            meta_list = []
+            for image in image_or_path:
+                image, meta = self.preprocess(image)
+                image_list.append(image)
+                meta_list.append(meta)
+            image = torch.cat(image_list, dim=0)
+        else:
+            image, meta = self.preprocess(image_or_path)
+            meta_list = [meta]
+        output = self.model(image)
+        # upsample_outputs = self.upsample(output[0][-1])
+        upsample_outputs = self.upsample(output)
+        upsample_outputs = upsample_outputs.permute(0, 2, 3, 1)  # BCHW -> BHWC
+        output_img_list = []
+        for upsample_output, meta in zip(upsample_outputs, meta_list):
+            c, s, w, h = meta['center'], meta['scale'], meta['width'], meta['height']
+            logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=self.input_size)
+            parsing_result = np.argmax(logits_result, axis=2)
+            output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
+            output_img.putpalette(self.palette)
+            output_img_list.append(output_img)
+        return output_img_list[0] if len(output_img_list) == 1 else output_img_list