Add VITON implementation with UI
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- .gitignore +2 -0
- README.md +28 -8
- app.py +150 -0
- configs/VITONHD.yaml +32 -0
- lib/caption.py +19 -0
- lib/mask.py +64 -0
- lib/pose.py +36 -0
- preprocess/__init__.py +0 -0
- preprocess/humanparsing/__init__.py +0 -0
- preprocess/humanparsing/datasets/__init__.py +0 -0
- preprocess/humanparsing/datasets/datasets.py +201 -0
- preprocess/humanparsing/datasets/simple_extractor_dataset.py +89 -0
- preprocess/humanparsing/datasets/target_generation.py +40 -0
- preprocess/humanparsing/modules/__init__.py +5 -0
- preprocess/humanparsing/modules/bn.py +132 -0
- preprocess/humanparsing/modules/deeplab.py +84 -0
- preprocess/humanparsing/modules/dense.py +42 -0
- preprocess/humanparsing/modules/functions.py +245 -0
- preprocess/humanparsing/modules/misc.py +21 -0
- preprocess/humanparsing/modules/residual.py +182 -0
- preprocess/humanparsing/modules/src/checks.h +15 -0
- preprocess/humanparsing/modules/src/inplace_abn.cpp +95 -0
- preprocess/humanparsing/modules/src/inplace_abn.h +88 -0
- preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp +119 -0
- preprocess/humanparsing/modules/src/inplace_abn_cuda.cu +333 -0
- preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu +275 -0
- preprocess/humanparsing/modules/src/utils/checks.h +15 -0
- preprocess/humanparsing/modules/src/utils/common.h +49 -0
- preprocess/humanparsing/modules/src/utils/cuda.cuh +71 -0
- preprocess/humanparsing/networks/AugmentCE2P.py +388 -0
- preprocess/humanparsing/networks/__init__.py +12 -0
- preprocess/humanparsing/networks/backbone/mobilenetv2.py +156 -0
- preprocess/humanparsing/networks/backbone/resnet.py +205 -0
- preprocess/humanparsing/networks/backbone/resnext.py +149 -0
- preprocess/humanparsing/networks/context_encoding/aspp.py +64 -0
- preprocess/humanparsing/networks/context_encoding/ocnet.py +226 -0
- preprocess/humanparsing/networks/context_encoding/psp.py +48 -0
- preprocess/humanparsing/parsing_api.py +191 -0
- preprocess/humanparsing/run_parsing.py +44 -0
- preprocess/humanparsing/utils/__init__.py +0 -0
- preprocess/humanparsing/utils/consistency_loss.py +33 -0
- preprocess/humanparsing/utils/criterion.py +142 -0
- preprocess/humanparsing/utils/encoding.py +187 -0
- preprocess/humanparsing/utils/kl_loss.py +43 -0
- preprocess/humanparsing/utils/lovasz_softmax.py +279 -0
- preprocess/humanparsing/utils/miou.py +155 -0
- preprocess/humanparsing/utils/schp.py +80 -0
- preprocess/humanparsing/utils/soft_dice_loss.py +111 -0
- preprocess/humanparsing/utils/transforms.py +167 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
| 2 |
+
checkpoints
|
README.md
CHANGED
|
@@ -1,14 +1,34 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.34.2
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
| 10 |
-
license: cc-by-nc-sa-4.0
|
| 11 |
-
short_description: Virtual try-on
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Virtual Try-On
|
| 3 |
+
emoji: 👗
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.34.2
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Virtual Try-On Demo
|
| 13 |
+
This repository is the work demo implementation of [PromptDresser](https://arxiv.org/abs/2412.16978)
|
| 14 |
+
|
| 15 |
+
> **PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask**<br>
|
| 16 |
+
> [Jeongho Kim](https://scholar.google.co.kr/citations?user=4SCCBFwAAAAJ&hl=ko), [Hoiyeong Jin](https://scholar.google.com/citations?user=Jp-zhtUAAAAJ&hl=en), [Sunghyun Park](https://psh01087.github.io/), [Jaegul Choo](https://sites.google.com/site/jaegulchoo/)
|
| 17 |
+
|
| 18 |
+
[[arXiv Paper](https://arxiv.org/abs/2412.16978)]
|
| 19 |
+
|
| 20 |
+
## Citation
|
| 21 |
+
```
|
| 22 |
+
@misc{kim2024promptdresserimprovingqualitycontrollability,
|
| 23 |
+
title={PromptDresser: Improving the Quality and Controllability of Virtual Try-On via Generative Textual Prompt and Prompt-aware Mask},
|
| 24 |
+
author={Jeongho Kim and Hoiyeong Jin and Sunghyun Park and Jaegul Choo},
|
| 25 |
+
year={2024},
|
| 26 |
+
eprint={2412.16978},
|
| 27 |
+
archivePrefix={arXiv},
|
| 28 |
+
primaryClass={cs.CV},
|
| 29 |
+
url={https://arxiv.org/abs/2412.16978},
|
| 30 |
+
}
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## License
|
| 34 |
+
Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
|
app.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import tempfile
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
from diffusers import AutoencoderKL, DDPMScheduler
|
| 7 |
+
from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection
|
| 8 |
+
|
| 9 |
+
from promptdresser.models.unet import UNet2DConditionModel
|
| 10 |
+
from promptdresser.models.cloth_encoder import ClothEncoder
|
| 11 |
+
from promptdresser.pipelines.sdxl import PromptDresser
|
| 12 |
+
from lib.caption import generate_caption
|
| 13 |
+
from lib.mask import generate_clothing_mask
|
| 14 |
+
from lib.pose import generate_openpose
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
+
weight_dtype = torch.float16 if device == "cuda" else torch.float32
|
| 19 |
+
|
| 20 |
+
def load_models():
|
| 21 |
+
print("⚙️ Загрузка моделей...")
|
| 22 |
+
|
| 23 |
+
noise_scheduler = DDPMScheduler.from_pretrained(
|
| 24 |
+
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
|
| 25 |
+
subfolder="scheduler"
|
| 26 |
+
)
|
| 27 |
+
tokenizer = CLIPTokenizer.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", subfolder="tokenizer")
|
| 28 |
+
text_encoder = CLIPTextModel.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", subfolder="text_encoder")
|
| 29 |
+
tokenizer_2 = CLIPTokenizer.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", subfolder="tokenizer_2")
|
| 30 |
+
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", subfolder="text_encoder_2")
|
| 31 |
+
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix")
|
| 32 |
+
unet = UNet2DConditionModel.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", subfolder="unet")
|
| 33 |
+
cloth_encoder = ClothEncoder.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
|
| 34 |
+
|
| 35 |
+
unet_checkpoint_path = hf_hub_download(
|
| 36 |
+
repo_id="Benrise/VITON-HD",
|
| 37 |
+
filename="VITONHD/model/pytorch_model.bin",
|
| 38 |
+
cache_dir="checkpoints"
|
| 39 |
+
)
|
| 40 |
+
unet.load_state_dict(torch.load(unet_checkpoint_path))
|
| 41 |
+
|
| 42 |
+
models = {
|
| 43 |
+
"unet": unet.to(device, dtype=weight_dtype),
|
| 44 |
+
"vae": vae.to(device, dtype=weight_dtype),
|
| 45 |
+
"text_encoder": text_encoder.to(device, dtype=weight_dtype),
|
| 46 |
+
"text_encoder_2": text_encoder_2.to(device, dtype=weight_dtype),
|
| 47 |
+
"cloth_encoder": cloth_encoder.to(device, dtype=weight_dtype),
|
| 48 |
+
"noise_scheduler": noise_scheduler,
|
| 49 |
+
"tokenizer": tokenizer,
|
| 50 |
+
"tokenizer_2": tokenizer_2
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
pipeline = PromptDresser(
|
| 54 |
+
vae=models["vae"],
|
| 55 |
+
text_encoder=models["text_encoder"],
|
| 56 |
+
text_encoder_2=models["text_encoder_2"],
|
| 57 |
+
tokenizer=models["tokenizer"],
|
| 58 |
+
tokenizer_2=models["tokenizer_2"],
|
| 59 |
+
unet=models["unet"],
|
| 60 |
+
scheduler=models["noise_scheduler"],
|
| 61 |
+
).to(device, dtype=weight_dtype)
|
| 62 |
+
|
| 63 |
+
return {**models, "pipeline": pipeline}
|
| 64 |
+
|
| 65 |
+
models = load_models()
|
| 66 |
+
pipeline = models["pipeline"]
|
| 67 |
+
|
| 68 |
+
def generate_vton(person_image, cloth_image, outfit_prompt="", clothing_prompt=""):
|
| 69 |
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
| 70 |
+
person_path = os.path.join(tmp_dir, "person.png")
|
| 71 |
+
cloth_path = os.path.join(tmp_dir, "cloth.png")
|
| 72 |
+
|
| 73 |
+
person_image.save(person_path)
|
| 74 |
+
cloth_image.save(cloth_path)
|
| 75 |
+
|
| 76 |
+
mask_path = os.path.join(tmp_dir, "mask.png")
|
| 77 |
+
pose_path = os.path.join(tmp_dir, "pose.png")
|
| 78 |
+
|
| 79 |
+
mask_image = generate_clothing_mask(person_path, label=4, output_path=mask_path, show_result=False)
|
| 80 |
+
pose_image = generate_openpose(person_path, output_image_path=pose_path, show_result=False)
|
| 81 |
+
|
| 82 |
+
auto_outfit_prompt = generate_caption(person_path, device)
|
| 83 |
+
auto_clothing_prompt = generate_caption(cloth_path, device)
|
| 84 |
+
|
| 85 |
+
final_outfit_prompt = outfit_prompt or auto_outfit_prompt
|
| 86 |
+
final_clothing_prompt = clothing_prompt or auto_clothing_prompt
|
| 87 |
+
|
| 88 |
+
with torch.autocast(device):
|
| 89 |
+
result = pipeline(
|
| 90 |
+
image=person_image,
|
| 91 |
+
mask_image=mask_image,
|
| 92 |
+
pose_image=pose_image,
|
| 93 |
+
cloth_encoder=models["cloth_encoder"],
|
| 94 |
+
cloth_encoder_image=cloth_image,
|
| 95 |
+
prompt=final_outfit_prompt,
|
| 96 |
+
prompt_clothing=final_clothing_prompt,
|
| 97 |
+
height=1024,
|
| 98 |
+
width=768,
|
| 99 |
+
guidance_scale=2.0,
|
| 100 |
+
guidance_scale_img=4.5,
|
| 101 |
+
guidance_scale_text=7.5,
|
| 102 |
+
num_inference_steps=30,
|
| 103 |
+
strength=1,
|
| 104 |
+
interm_cloth_start_ratio=0.5,
|
| 105 |
+
generator=None,
|
| 106 |
+
).images[0]
|
| 107 |
+
|
| 108 |
+
return result
|
| 109 |
+
|
| 110 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container") as demo:
|
| 111 |
+
gr.Markdown("# 🧥 Virtual Try-On")
|
| 112 |
+
gr.Markdown("Загрузите фото человека и одежды для виртуальной примерки")
|
| 113 |
+
|
| 114 |
+
with gr.Row():
|
| 115 |
+
with gr.Column():
|
| 116 |
+
person_input = gr.Image(label="Фото человека", type="pil", sources=["upload"])
|
| 117 |
+
cloth_input = gr.Image(label="Фото одежды", type="pil", sources=["upload"])
|
| 118 |
+
outfit_prompt = gr.Textbox(label="Описание образа (опционально)", placeholder="Например: man in casual outfit")
|
| 119 |
+
clothing_prompt = gr.Textbox(label="Описание одежды (опционально)", placeholder="Например: red t-shirt with print")
|
| 120 |
+
generate_btn = gr.Button("Сгенерировать примерку", variant="primary")
|
| 121 |
+
|
| 122 |
+
gr.Examples(
|
| 123 |
+
examples=[
|
| 124 |
+
["./test/person2.png", "./test/00008_00.jpg", "man in skirt", "black longsleeve"]
|
| 125 |
+
],
|
| 126 |
+
inputs=[person_input, cloth_input, outfit_prompt, clothing_prompt],
|
| 127 |
+
label="Примеры для быстрого тестирования"
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
with gr.Column():
|
| 131 |
+
output_image = gr.Image(label="Результат примерки", interactive=False)
|
| 132 |
+
|
| 133 |
+
generate_btn.click(
|
| 134 |
+
fn=generate_vton,
|
| 135 |
+
inputs=[person_input, cloth_input, outfit_prompt, clothing_prompt],
|
| 136 |
+
outputs=output_image
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
gr.Markdown("### Инструкция:")
|
| 140 |
+
gr.Markdown("1. Загрузите четкое фото человека в полный рост\n"
|
| 141 |
+
"2. Загрузите фото одежды на белом фоне\n"
|
| 142 |
+
"3. При необходимости уточните описание образа или одежды\n"
|
| 143 |
+
"4. Нажмите кнопку 'Сгенерировать примерку'")
|
| 144 |
+
|
| 145 |
+
if __name__ == "__main__":
|
| 146 |
+
demo.queue(max_size=3).launch(
|
| 147 |
+
server_name="0.0.0.0" if os.getenv("SPACE_ID") else None,
|
| 148 |
+
share=os.getenv("GRADIO_SHARE") == "True",
|
| 149 |
+
debug=True
|
| 150 |
+
)
|
configs/VITONHD.yaml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
no_pose: True
|
| 2 |
+
use_jointcond: True
|
| 3 |
+
no_ipadapter: True
|
| 4 |
+
|
| 5 |
+
use_interm_cloth_mask: True
|
| 6 |
+
interm_cloth_start_ratio: 0.5
|
| 7 |
+
|
| 8 |
+
dataset:
|
| 9 |
+
dataset_name: "VITONHDDataset"
|
| 10 |
+
data_root_dir: "./DATA/zalando-hd-resized"
|
| 11 |
+
img_spatial_transform_lst:
|
| 12 |
+
- "randomresizedcrop"
|
| 13 |
+
- "randomaffine"
|
| 14 |
+
cloth_spatial_transform_lst:
|
| 15 |
+
- "randomresizedcrop"
|
| 16 |
+
- "randomaffine"
|
| 17 |
+
img_cloth_spatial_transform_lst:
|
| 18 |
+
- "hflip"
|
| 19 |
+
color_transform_lst:
|
| 20 |
+
- "colorjitter"
|
| 21 |
+
i_drop_rate: 0.05
|
| 22 |
+
pose_type: "densepose"
|
| 23 |
+
train_folder_name: train_fine
|
| 24 |
+
test_folder_name: test_fine
|
| 25 |
+
prompt_version: v12
|
| 26 |
+
text_file_postfix: "gpt4o.json"
|
| 27 |
+
train_folder_name_for_interm_cloth_mask: train_coarse
|
| 28 |
+
test_folder_name_for_interm_cloth_mask: test_coarse
|
| 29 |
+
use_rand_dilate: True
|
| 30 |
+
|
| 31 |
+
rand_dilate_miniter: 0
|
| 32 |
+
rand_dilate_maxiter: 200
|
lib/caption.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def generate_caption(image_path, device="cuda"):
|
| 6 |
+
print("Генерация подписи...")
|
| 7 |
+
processor = AutoProcessor.from_pretrained("microsoft/git-base", use_fast=False)
|
| 8 |
+
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base").to(device)
|
| 9 |
+
image = Image.open(image_path).convert("RGB")
|
| 10 |
+
|
| 11 |
+
inputs = processor(images=image, return_tensors="pt").to(device)
|
| 12 |
+
generated_ids = model.generate(
|
| 13 |
+
pixel_values=inputs.pixel_values,
|
| 14 |
+
max_length=50,
|
| 15 |
+
pad_token_id=processor.tokenizer.pad_token_id
|
| 16 |
+
)
|
| 17 |
+
caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 18 |
+
print("Сгенерированная подпись:", caption)
|
| 19 |
+
return caption
|
lib/mask.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import numpy as np
|
| 4 |
+
import requests
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
import torch
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
def generate_clothing_mask(
|
| 10 |
+
image_path: str,
|
| 11 |
+
label: int,
|
| 12 |
+
output_path: str = "./output_mask.png",
|
| 13 |
+
model_name: str = "mattmdjaga/segformer_b2_clothes",
|
| 14 |
+
show_result: bool = False
|
| 15 |
+
) -> Image.Image:
|
| 16 |
+
"""
|
| 17 |
+
Генерирует бинарную маску для указанного класса одежды и сохраняет её
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
image_path: Путь к изображению или URL
|
| 21 |
+
label: Класс для сегментации (0-17)
|
| 22 |
+
output_path: Путь для сохранения маски
|
| 23 |
+
model_name: Название модели HuggingFace
|
| 24 |
+
show_result: Показать результат matplotlib
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
PIL.Image: Бинарная маска (белый - выбранный класс, черный - остальное)
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
processor = SegformerImageProcessor.from_pretrained(model_name)
|
| 31 |
+
model = AutoModelForSemanticSegmentation.from_pretrained(model_name)
|
| 32 |
+
|
| 33 |
+
if image_path.startswith(('http://', 'https://')):
|
| 34 |
+
image = Image.open(requests.get(image_path, stream=True).raw)
|
| 35 |
+
else:
|
| 36 |
+
image = Image.open(image_path)
|
| 37 |
+
|
| 38 |
+
if image.mode != 'RGB':
|
| 39 |
+
image = image.convert('RGB')
|
| 40 |
+
|
| 41 |
+
image_np = np.array(image)
|
| 42 |
+
if len(image_np.shape) != 3 or image_np.shape[2] != 3:
|
| 43 |
+
raise ValueError("Изображение должно быть в формате RGB (H, W, 3)")
|
| 44 |
+
|
| 45 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = model(**inputs)
|
| 48 |
+
|
| 49 |
+
logits = outputs.logits
|
| 50 |
+
upsampled_logits = F.interpolate(
|
| 51 |
+
logits,
|
| 52 |
+
size=image.size[::-1],
|
| 53 |
+
mode="bilinear",
|
| 54 |
+
align_corners=False,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
pred_seg = upsampled_logits.argmax(dim=1)[0]
|
| 58 |
+
mask = (pred_seg == label).numpy().astype('uint8') * 255
|
| 59 |
+
mask_image = Image.fromarray(mask)
|
| 60 |
+
|
| 61 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 62 |
+
mask_image.save(output_path)
|
| 63 |
+
|
| 64 |
+
return mask_image
|
lib/pose.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from controlnet_aux import OpenposeDetector
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def generate_openpose(
|
| 7 |
+
input_image_path: str,
|
| 8 |
+
output_image_path: str = None,
|
| 9 |
+
device: str = "cuda" if torch.cuda.is_available() else "cpu",
|
| 10 |
+
show_result: bool = False
|
| 11 |
+
) -> Image.Image:
|
| 12 |
+
"""
|
| 13 |
+
Генерирует OpenPose карту позы из входного изображения.
|
| 14 |
+
|
| 15 |
+
Параметры:
|
| 16 |
+
input_image_path (str): Путь к исходному изображению
|
| 17 |
+
output_image_path (str, optional): Путь для сохранения результата. Если None - не сохраняется.
|
| 18 |
+
device (str): Устройство для обработки ('cuda' или 'cpu')
|
| 19 |
+
show_result (bool): Показывать ли результат сразу
|
| 20 |
+
|
| 21 |
+
Возвращает:
|
| 22 |
+
Image.Image: Изображение с OpenPose картой позы
|
| 23 |
+
"""
|
| 24 |
+
openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet").to(device)
|
| 25 |
+
|
| 26 |
+
image = Image.open(input_image_path).convert("RGB")
|
| 27 |
+
|
| 28 |
+
openpose_map = openpose(image)
|
| 29 |
+
|
| 30 |
+
if output_image_path:
|
| 31 |
+
openpose_map.save(output_image_path)
|
| 32 |
+
|
| 33 |
+
if show_result:
|
| 34 |
+
openpose_map.show()
|
| 35 |
+
|
| 36 |
+
return image
|
preprocess/__init__.py
ADDED
|
File without changes
|
preprocess/humanparsing/__init__.py
ADDED
|
File without changes
|
preprocess/humanparsing/datasets/__init__.py
ADDED
|
File without changes
|
preprocess/humanparsing/datasets/datasets.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : datasets.py
|
| 8 |
+
@Time : 8/4/19 3:35 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import numpy as np
|
| 16 |
+
import random
|
| 17 |
+
import torch
|
| 18 |
+
import cv2
|
| 19 |
+
from torch.utils import data
|
| 20 |
+
from utils.transforms import get_affine_transform
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class LIPDataSet(data.Dataset):
|
| 24 |
+
def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
|
| 25 |
+
rotation_factor=30, ignore_label=255, transform=None):
|
| 26 |
+
self.root = root
|
| 27 |
+
self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
|
| 28 |
+
self.crop_size = np.asarray(crop_size)
|
| 29 |
+
self.ignore_label = ignore_label
|
| 30 |
+
self.scale_factor = scale_factor
|
| 31 |
+
self.rotation_factor = rotation_factor
|
| 32 |
+
self.flip_prob = 0.5
|
| 33 |
+
self.transform = transform
|
| 34 |
+
self.dataset = dataset
|
| 35 |
+
|
| 36 |
+
list_path = os.path.join(self.root, self.dataset + '_id.txt')
|
| 37 |
+
train_list = [i_id.strip() for i_id in open(list_path)]
|
| 38 |
+
|
| 39 |
+
self.train_list = train_list
|
| 40 |
+
self.number_samples = len(self.train_list)
|
| 41 |
+
|
| 42 |
+
def __len__(self):
|
| 43 |
+
return self.number_samples
|
| 44 |
+
|
| 45 |
+
def _box2cs(self, box):
|
| 46 |
+
x, y, w, h = box[:4]
|
| 47 |
+
return self._xywh2cs(x, y, w, h)
|
| 48 |
+
|
| 49 |
+
def _xywh2cs(self, x, y, w, h):
|
| 50 |
+
center = np.zeros((2), dtype=np.float32)
|
| 51 |
+
center[0] = x + w * 0.5
|
| 52 |
+
center[1] = y + h * 0.5
|
| 53 |
+
if w > self.aspect_ratio * h:
|
| 54 |
+
h = w * 1.0 / self.aspect_ratio
|
| 55 |
+
elif w < self.aspect_ratio * h:
|
| 56 |
+
w = h * self.aspect_ratio
|
| 57 |
+
scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
|
| 58 |
+
return center, scale
|
| 59 |
+
|
| 60 |
+
def __getitem__(self, index):
|
| 61 |
+
train_item = self.train_list[index]
|
| 62 |
+
|
| 63 |
+
im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
|
| 64 |
+
parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
|
| 65 |
+
|
| 66 |
+
im = cv2.imread(im_path, cv2.IMREAD_COLOR)
|
| 67 |
+
h, w, _ = im.shape
|
| 68 |
+
parsing_anno = np.zeros((h, w), dtype=np.long)
|
| 69 |
+
|
| 70 |
+
# Get person center and scale
|
| 71 |
+
person_center, s = self._box2cs([0, 0, w - 1, h - 1])
|
| 72 |
+
r = 0
|
| 73 |
+
|
| 74 |
+
if self.dataset != 'test':
|
| 75 |
+
# Get pose annotation
|
| 76 |
+
parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
|
| 77 |
+
if self.dataset == 'train' or self.dataset == 'trainval':
|
| 78 |
+
sf = self.scale_factor
|
| 79 |
+
rf = self.rotation_factor
|
| 80 |
+
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
|
| 81 |
+
r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
|
| 82 |
+
|
| 83 |
+
if random.random() <= self.flip_prob:
|
| 84 |
+
im = im[:, ::-1, :]
|
| 85 |
+
parsing_anno = parsing_anno[:, ::-1]
|
| 86 |
+
person_center[0] = im.shape[1] - person_center[0] - 1
|
| 87 |
+
right_idx = [15, 17, 19]
|
| 88 |
+
left_idx = [14, 16, 18]
|
| 89 |
+
for i in range(0, 3):
|
| 90 |
+
right_pos = np.where(parsing_anno == right_idx[i])
|
| 91 |
+
left_pos = np.where(parsing_anno == left_idx[i])
|
| 92 |
+
parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
|
| 93 |
+
parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
|
| 94 |
+
|
| 95 |
+
trans = get_affine_transform(person_center, s, r, self.crop_size)
|
| 96 |
+
input = cv2.warpAffine(
|
| 97 |
+
im,
|
| 98 |
+
trans,
|
| 99 |
+
(int(self.crop_size[1]), int(self.crop_size[0])),
|
| 100 |
+
flags=cv2.INTER_LINEAR,
|
| 101 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 102 |
+
borderValue=(0, 0, 0))
|
| 103 |
+
|
| 104 |
+
if self.transform:
|
| 105 |
+
input = self.transform(input)
|
| 106 |
+
|
| 107 |
+
meta = {
|
| 108 |
+
'name': train_item,
|
| 109 |
+
'center': person_center,
|
| 110 |
+
'height': h,
|
| 111 |
+
'width': w,
|
| 112 |
+
'scale': s,
|
| 113 |
+
'rotation': r
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
if self.dataset == 'val' or self.dataset == 'test':
|
| 117 |
+
return input, meta
|
| 118 |
+
else:
|
| 119 |
+
label_parsing = cv2.warpAffine(
|
| 120 |
+
parsing_anno,
|
| 121 |
+
trans,
|
| 122 |
+
(int(self.crop_size[1]), int(self.crop_size[0])),
|
| 123 |
+
flags=cv2.INTER_NEAREST,
|
| 124 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 125 |
+
borderValue=(255))
|
| 126 |
+
|
| 127 |
+
label_parsing = torch.from_numpy(label_parsing)
|
| 128 |
+
|
| 129 |
+
return input, label_parsing, meta
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class LIPDataValSet(data.Dataset):
|
| 133 |
+
def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
|
| 134 |
+
self.root = root
|
| 135 |
+
self.crop_size = crop_size
|
| 136 |
+
self.transform = transform
|
| 137 |
+
self.flip = flip
|
| 138 |
+
self.dataset = dataset
|
| 139 |
+
self.root = root
|
| 140 |
+
self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
|
| 141 |
+
self.crop_size = np.asarray(crop_size)
|
| 142 |
+
|
| 143 |
+
list_path = os.path.join(self.root, self.dataset + '_id.txt')
|
| 144 |
+
val_list = [i_id.strip() for i_id in open(list_path)]
|
| 145 |
+
|
| 146 |
+
self.val_list = val_list
|
| 147 |
+
self.number_samples = len(self.val_list)
|
| 148 |
+
|
| 149 |
+
def __len__(self):
|
| 150 |
+
return len(self.val_list)
|
| 151 |
+
|
| 152 |
+
def _box2cs(self, box):
|
| 153 |
+
x, y, w, h = box[:4]
|
| 154 |
+
return self._xywh2cs(x, y, w, h)
|
| 155 |
+
|
| 156 |
+
def _xywh2cs(self, x, y, w, h):
|
| 157 |
+
center = np.zeros((2), dtype=np.float32)
|
| 158 |
+
center[0] = x + w * 0.5
|
| 159 |
+
center[1] = y + h * 0.5
|
| 160 |
+
if w > self.aspect_ratio * h:
|
| 161 |
+
h = w * 1.0 / self.aspect_ratio
|
| 162 |
+
elif w < self.aspect_ratio * h:
|
| 163 |
+
w = h * self.aspect_ratio
|
| 164 |
+
scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
|
| 165 |
+
|
| 166 |
+
return center, scale
|
| 167 |
+
|
| 168 |
+
def __getitem__(self, index):
|
| 169 |
+
val_item = self.val_list[index]
|
| 170 |
+
# Load training image
|
| 171 |
+
im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
|
| 172 |
+
im = cv2.imread(im_path, cv2.IMREAD_COLOR)
|
| 173 |
+
h, w, _ = im.shape
|
| 174 |
+
# Get person center and scale
|
| 175 |
+
person_center, s = self._box2cs([0, 0, w - 1, h - 1])
|
| 176 |
+
r = 0
|
| 177 |
+
trans = get_affine_transform(person_center, s, r, self.crop_size)
|
| 178 |
+
input = cv2.warpAffine(
|
| 179 |
+
im,
|
| 180 |
+
trans,
|
| 181 |
+
(int(self.crop_size[1]), int(self.crop_size[0])),
|
| 182 |
+
flags=cv2.INTER_LINEAR,
|
| 183 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 184 |
+
borderValue=(0, 0, 0))
|
| 185 |
+
input = self.transform(input)
|
| 186 |
+
flip_input = input.flip(dims=[-1])
|
| 187 |
+
if self.flip:
|
| 188 |
+
batch_input_im = torch.stack([input, flip_input])
|
| 189 |
+
else:
|
| 190 |
+
batch_input_im = input
|
| 191 |
+
|
| 192 |
+
meta = {
|
| 193 |
+
'name': val_item,
|
| 194 |
+
'center': person_center,
|
| 195 |
+
'height': h,
|
| 196 |
+
'width': w,
|
| 197 |
+
'scale': s,
|
| 198 |
+
'rotation': r
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
return batch_input_im, meta
|
preprocess/humanparsing/datasets/simple_extractor_dataset.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : dataset.py
|
| 8 |
+
@Time : 8/30/19 9:12 PM
|
| 9 |
+
@Desc : Dataset Definition
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import pdb
|
| 16 |
+
|
| 17 |
+
import cv2
|
| 18 |
+
import numpy as np
|
| 19 |
+
from PIL import Image
|
| 20 |
+
from torch.utils import data
|
| 21 |
+
from utils.transforms import get_affine_transform
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class SimpleFolderDataset(data.Dataset):
|
| 25 |
+
def __init__(self, root, input_size=[512, 512], transform=None):
|
| 26 |
+
self.root = root
|
| 27 |
+
self.input_size = input_size
|
| 28 |
+
self.transform = transform
|
| 29 |
+
self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
|
| 30 |
+
self.input_size = np.asarray(input_size)
|
| 31 |
+
self.is_pil_image = False
|
| 32 |
+
if isinstance(root, Image.Image):
|
| 33 |
+
self.file_list = [root]
|
| 34 |
+
self.is_pil_image = True
|
| 35 |
+
elif os.path.isfile(root):
|
| 36 |
+
self.file_list = [os.path.basename(root)]
|
| 37 |
+
self.root = os.path.dirname(root)
|
| 38 |
+
else:
|
| 39 |
+
self.file_list = os.listdir(self.root)
|
| 40 |
+
|
| 41 |
+
def __len__(self):
|
| 42 |
+
return len(self.file_list)
|
| 43 |
+
|
| 44 |
+
def _box2cs(self, box):
|
| 45 |
+
x, y, w, h = box[:4]
|
| 46 |
+
return self._xywh2cs(x, y, w, h)
|
| 47 |
+
|
| 48 |
+
def _xywh2cs(self, x, y, w, h):
|
| 49 |
+
center = np.zeros((2), dtype=np.float32)
|
| 50 |
+
center[0] = x + w * 0.5
|
| 51 |
+
center[1] = y + h * 0.5
|
| 52 |
+
if w > self.aspect_ratio * h:
|
| 53 |
+
h = w * 1.0 / self.aspect_ratio
|
| 54 |
+
elif w < self.aspect_ratio * h:
|
| 55 |
+
w = h * self.aspect_ratio
|
| 56 |
+
scale = np.array([w, h], dtype=np.float32)
|
| 57 |
+
return center, scale
|
| 58 |
+
|
| 59 |
+
def __getitem__(self, index):
|
| 60 |
+
if self.is_pil_image:
|
| 61 |
+
img = np.asarray(self.file_list[index])[:, :, [2, 1, 0]]
|
| 62 |
+
else:
|
| 63 |
+
img_name = self.file_list[index]
|
| 64 |
+
img_path = os.path.join(self.root, img_name)
|
| 65 |
+
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
|
| 66 |
+
h, w, _ = img.shape
|
| 67 |
+
|
| 68 |
+
# Get person center and scale
|
| 69 |
+
person_center, s = self._box2cs([0, 0, w - 1, h - 1])
|
| 70 |
+
r = 0
|
| 71 |
+
trans = get_affine_transform(person_center, s, r, self.input_size)
|
| 72 |
+
input = cv2.warpAffine(
|
| 73 |
+
img,
|
| 74 |
+
trans,
|
| 75 |
+
(int(self.input_size[1]), int(self.input_size[0])),
|
| 76 |
+
flags=cv2.INTER_LINEAR,
|
| 77 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 78 |
+
borderValue=(0, 0, 0))
|
| 79 |
+
|
| 80 |
+
input = self.transform(input)
|
| 81 |
+
meta = {
|
| 82 |
+
'center': person_center,
|
| 83 |
+
'height': h,
|
| 84 |
+
'width': w,
|
| 85 |
+
'scale': s,
|
| 86 |
+
'rotation': r
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
return input, meta
|
preprocess/humanparsing/datasets/target_generation.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.nn import functional as F
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def generate_edge_tensor(label, edge_width=3):
|
| 6 |
+
label = label.type(torch.cuda.FloatTensor)
|
| 7 |
+
if len(label.shape) == 2:
|
| 8 |
+
label = label.unsqueeze(0)
|
| 9 |
+
n, h, w = label.shape
|
| 10 |
+
edge = torch.zeros(label.shape, dtype=torch.float).cuda()
|
| 11 |
+
# right
|
| 12 |
+
edge_right = edge[:, 1:h, :]
|
| 13 |
+
edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
|
| 14 |
+
& (label[:, :h - 1, :] != 255)] = 1
|
| 15 |
+
|
| 16 |
+
# up
|
| 17 |
+
edge_up = edge[:, :, :w - 1]
|
| 18 |
+
edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
|
| 19 |
+
& (label[:, :, :w - 1] != 255)
|
| 20 |
+
& (label[:, :, 1:w] != 255)] = 1
|
| 21 |
+
|
| 22 |
+
# upright
|
| 23 |
+
edge_upright = edge[:, :h - 1, :w - 1]
|
| 24 |
+
edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
|
| 25 |
+
& (label[:, :h - 1, :w - 1] != 255)
|
| 26 |
+
& (label[:, 1:h, 1:w] != 255)] = 1
|
| 27 |
+
|
| 28 |
+
# bottomright
|
| 29 |
+
edge_bottomright = edge[:, :h - 1, 1:w]
|
| 30 |
+
edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
|
| 31 |
+
& (label[:, :h - 1, 1:w] != 255)
|
| 32 |
+
& (label[:, 1:h, :w - 1] != 255)] = 1
|
| 33 |
+
|
| 34 |
+
kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
|
| 35 |
+
with torch.no_grad():
|
| 36 |
+
edge = edge.unsqueeze(1)
|
| 37 |
+
edge = F.conv2d(edge, kernel, stride=1, padding=1)
|
| 38 |
+
edge[edge!=0] = 1
|
| 39 |
+
edge = edge.squeeze()
|
| 40 |
+
return edge
|
preprocess/humanparsing/modules/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .bn import ABN, InPlaceABN, InPlaceABNSync
|
| 2 |
+
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
| 3 |
+
from .misc import GlobalAvgPool2d, SingleGPU
|
| 4 |
+
from .residual import IdentityResidualBlock
|
| 5 |
+
from .dense import DenseModule
|
preprocess/humanparsing/modules/bn.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as functional
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
from queue import Queue
|
| 7 |
+
except ImportError:
|
| 8 |
+
from Queue import Queue
|
| 9 |
+
|
| 10 |
+
from .functions import *
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ABN(nn.Module):
|
| 14 |
+
"""Activated Batch Normalization
|
| 15 |
+
|
| 16 |
+
This gathers a `BatchNorm2d` and an activation function in a single module
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
| 20 |
+
"""Creates an Activated Batch Normalization module
|
| 21 |
+
|
| 22 |
+
Parameters
|
| 23 |
+
----------
|
| 24 |
+
num_features : int
|
| 25 |
+
Number of feature channels in the input and output.
|
| 26 |
+
eps : float
|
| 27 |
+
Small constant to prevent numerical issues.
|
| 28 |
+
momentum : float
|
| 29 |
+
Momentum factor applied to compute running statistics as.
|
| 30 |
+
affine : bool
|
| 31 |
+
If `True` apply learned scale and shift transformation after normalization.
|
| 32 |
+
activation : str
|
| 33 |
+
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
| 34 |
+
slope : float
|
| 35 |
+
Negative slope for the `leaky_relu` activation.
|
| 36 |
+
"""
|
| 37 |
+
super(ABN, self).__init__()
|
| 38 |
+
self.num_features = num_features
|
| 39 |
+
self.affine = affine
|
| 40 |
+
self.eps = eps
|
| 41 |
+
self.momentum = momentum
|
| 42 |
+
self.activation = activation
|
| 43 |
+
self.slope = slope
|
| 44 |
+
if self.affine:
|
| 45 |
+
self.weight = nn.Parameter(torch.ones(num_features))
|
| 46 |
+
self.bias = nn.Parameter(torch.zeros(num_features))
|
| 47 |
+
else:
|
| 48 |
+
self.register_parameter('weight', None)
|
| 49 |
+
self.register_parameter('bias', None)
|
| 50 |
+
self.register_buffer('running_mean', torch.zeros(num_features))
|
| 51 |
+
self.register_buffer('running_var', torch.ones(num_features))
|
| 52 |
+
self.reset_parameters()
|
| 53 |
+
|
| 54 |
+
def reset_parameters(self):
|
| 55 |
+
nn.init.constant_(self.running_mean, 0)
|
| 56 |
+
nn.init.constant_(self.running_var, 1)
|
| 57 |
+
if self.affine:
|
| 58 |
+
nn.init.constant_(self.weight, 1)
|
| 59 |
+
nn.init.constant_(self.bias, 0)
|
| 60 |
+
|
| 61 |
+
def forward(self, x):
|
| 62 |
+
x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
|
| 63 |
+
self.training, self.momentum, self.eps)
|
| 64 |
+
|
| 65 |
+
if self.activation == ACT_RELU:
|
| 66 |
+
return functional.relu(x, inplace=True)
|
| 67 |
+
elif self.activation == ACT_LEAKY_RELU:
|
| 68 |
+
return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
|
| 69 |
+
elif self.activation == ACT_ELU:
|
| 70 |
+
return functional.elu(x, inplace=True)
|
| 71 |
+
else:
|
| 72 |
+
return x
|
| 73 |
+
|
| 74 |
+
def __repr__(self):
|
| 75 |
+
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
| 76 |
+
' affine={affine}, activation={activation}'
|
| 77 |
+
if self.activation == "leaky_relu":
|
| 78 |
+
rep += ', slope={slope})'
|
| 79 |
+
else:
|
| 80 |
+
rep += ')'
|
| 81 |
+
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class InPlaceABN(ABN):
|
| 85 |
+
"""InPlace Activated Batch Normalization"""
|
| 86 |
+
|
| 87 |
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
| 88 |
+
"""Creates an InPlace Activated Batch Normalization module
|
| 89 |
+
|
| 90 |
+
Parameters
|
| 91 |
+
----------
|
| 92 |
+
num_features : int
|
| 93 |
+
Number of feature channels in the input and output.
|
| 94 |
+
eps : float
|
| 95 |
+
Small constant to prevent numerical issues.
|
| 96 |
+
momentum : float
|
| 97 |
+
Momentum factor applied to compute running statistics as.
|
| 98 |
+
affine : bool
|
| 99 |
+
If `True` apply learned scale and shift transformation after normalization.
|
| 100 |
+
activation : str
|
| 101 |
+
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
| 102 |
+
slope : float
|
| 103 |
+
Negative slope for the `leaky_relu` activation.
|
| 104 |
+
"""
|
| 105 |
+
super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
|
| 106 |
+
|
| 107 |
+
def forward(self, x):
|
| 108 |
+
x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
|
| 109 |
+
self.training, self.momentum, self.eps, self.activation, self.slope)
|
| 110 |
+
return x
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
class InPlaceABNSync(ABN):
|
| 114 |
+
"""InPlace Activated Batch Normalization with cross-GPU synchronization
|
| 115 |
+
This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
|
| 116 |
+
"""
|
| 117 |
+
|
| 118 |
+
def forward(self, x):
|
| 119 |
+
x, _, _ = inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
|
| 120 |
+
self.training, self.momentum, self.eps, self.activation, self.slope)
|
| 121 |
+
return x
|
| 122 |
+
|
| 123 |
+
def __repr__(self):
|
| 124 |
+
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
| 125 |
+
' affine={affine}, activation={activation}'
|
| 126 |
+
if self.activation == "leaky_relu":
|
| 127 |
+
rep += ', slope={slope})'
|
| 128 |
+
else:
|
| 129 |
+
rep += ')'
|
| 130 |
+
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
| 131 |
+
|
| 132 |
+
|
preprocess/humanparsing/modules/deeplab.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as functional
|
| 4 |
+
|
| 5 |
+
from models._util import try_index
|
| 6 |
+
from .bn import ABN
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DeeplabV3(nn.Module):
|
| 10 |
+
def __init__(self,
|
| 11 |
+
in_channels,
|
| 12 |
+
out_channels,
|
| 13 |
+
hidden_channels=256,
|
| 14 |
+
dilations=(12, 24, 36),
|
| 15 |
+
norm_act=ABN,
|
| 16 |
+
pooling_size=None):
|
| 17 |
+
super(DeeplabV3, self).__init__()
|
| 18 |
+
self.pooling_size = pooling_size
|
| 19 |
+
|
| 20 |
+
self.map_convs = nn.ModuleList([
|
| 21 |
+
nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
|
| 22 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
|
| 23 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
|
| 24 |
+
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
|
| 25 |
+
])
|
| 26 |
+
self.map_bn = norm_act(hidden_channels * 4)
|
| 27 |
+
|
| 28 |
+
self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
|
| 29 |
+
self.global_pooling_bn = norm_act(hidden_channels)
|
| 30 |
+
|
| 31 |
+
self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
|
| 32 |
+
self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
|
| 33 |
+
self.red_bn = norm_act(out_channels)
|
| 34 |
+
|
| 35 |
+
self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
|
| 36 |
+
|
| 37 |
+
def reset_parameters(self, activation, slope):
|
| 38 |
+
gain = nn.init.calculate_gain(activation, slope)
|
| 39 |
+
for m in self.modules():
|
| 40 |
+
if isinstance(m, nn.Conv2d):
|
| 41 |
+
nn.init.xavier_normal_(m.weight.data, gain)
|
| 42 |
+
if hasattr(m, "bias") and m.bias is not None:
|
| 43 |
+
nn.init.constant_(m.bias, 0)
|
| 44 |
+
elif isinstance(m, ABN):
|
| 45 |
+
if hasattr(m, "weight") and m.weight is not None:
|
| 46 |
+
nn.init.constant_(m.weight, 1)
|
| 47 |
+
if hasattr(m, "bias") and m.bias is not None:
|
| 48 |
+
nn.init.constant_(m.bias, 0)
|
| 49 |
+
|
| 50 |
+
def forward(self, x):
|
| 51 |
+
# Map convolutions
|
| 52 |
+
out = torch.cat([m(x) for m in self.map_convs], dim=1)
|
| 53 |
+
out = self.map_bn(out)
|
| 54 |
+
out = self.red_conv(out)
|
| 55 |
+
|
| 56 |
+
# Global pooling
|
| 57 |
+
pool = self._global_pooling(x)
|
| 58 |
+
pool = self.global_pooling_conv(pool)
|
| 59 |
+
pool = self.global_pooling_bn(pool)
|
| 60 |
+
pool = self.pool_red_conv(pool)
|
| 61 |
+
if self.training or self.pooling_size is None:
|
| 62 |
+
pool = pool.repeat(1, 1, x.size(2), x.size(3))
|
| 63 |
+
|
| 64 |
+
out += pool
|
| 65 |
+
out = self.red_bn(out)
|
| 66 |
+
return out
|
| 67 |
+
|
| 68 |
+
def _global_pooling(self, x):
|
| 69 |
+
if self.training or self.pooling_size is None:
|
| 70 |
+
pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
|
| 71 |
+
pool = pool.view(x.size(0), x.size(1), 1, 1)
|
| 72 |
+
else:
|
| 73 |
+
pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
|
| 74 |
+
min(try_index(self.pooling_size, 1), x.shape[3]))
|
| 75 |
+
padding = (
|
| 76 |
+
(pooling_size[1] - 1) // 2,
|
| 77 |
+
(pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
|
| 78 |
+
(pooling_size[0] - 1) // 2,
|
| 79 |
+
(pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
pool = functional.avg_pool2d(x, pooling_size, stride=1)
|
| 83 |
+
pool = functional.pad(pool, pad=padding, mode="replicate")
|
| 84 |
+
return pool
|
preprocess/humanparsing/modules/dense.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import OrderedDict
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
|
| 6 |
+
from .bn import ABN
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DenseModule(nn.Module):
|
| 10 |
+
def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
|
| 11 |
+
super(DenseModule, self).__init__()
|
| 12 |
+
self.in_channels = in_channels
|
| 13 |
+
self.growth = growth
|
| 14 |
+
self.layers = layers
|
| 15 |
+
|
| 16 |
+
self.convs1 = nn.ModuleList()
|
| 17 |
+
self.convs3 = nn.ModuleList()
|
| 18 |
+
for i in range(self.layers):
|
| 19 |
+
self.convs1.append(nn.Sequential(OrderedDict([
|
| 20 |
+
("bn", norm_act(in_channels)),
|
| 21 |
+
("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
|
| 22 |
+
])))
|
| 23 |
+
self.convs3.append(nn.Sequential(OrderedDict([
|
| 24 |
+
("bn", norm_act(self.growth * bottleneck_factor)),
|
| 25 |
+
("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
|
| 26 |
+
dilation=dilation))
|
| 27 |
+
])))
|
| 28 |
+
in_channels += self.growth
|
| 29 |
+
|
| 30 |
+
@property
|
| 31 |
+
def out_channels(self):
|
| 32 |
+
return self.in_channels + self.growth * self.layers
|
| 33 |
+
|
| 34 |
+
def forward(self, x):
|
| 35 |
+
inputs = [x]
|
| 36 |
+
for i in range(self.layers):
|
| 37 |
+
x = torch.cat(inputs, dim=1)
|
| 38 |
+
x = self.convs1[i](x)
|
| 39 |
+
x = self.convs3[i](x)
|
| 40 |
+
inputs += [x]
|
| 41 |
+
|
| 42 |
+
return torch.cat(inputs, dim=1)
|
preprocess/humanparsing/modules/functions.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdb
|
| 2 |
+
from os import path
|
| 3 |
+
import torch
|
| 4 |
+
import torch.distributed as dist
|
| 5 |
+
import torch.autograd as autograd
|
| 6 |
+
import torch.cuda.comm as comm
|
| 7 |
+
from torch.autograd.function import once_differentiable
|
| 8 |
+
from torch.utils.cpp_extension import load
|
| 9 |
+
|
| 10 |
+
_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
|
| 11 |
+
_backend = load(name="inplace_abn",
|
| 12 |
+
extra_cflags=["-O3"],
|
| 13 |
+
sources=[path.join(_src_path, f) for f in [
|
| 14 |
+
"inplace_abn.cpp",
|
| 15 |
+
"inplace_abn_cpu.cpp",
|
| 16 |
+
"inplace_abn_cuda.cu",
|
| 17 |
+
"inplace_abn_cuda_half.cu"
|
| 18 |
+
]],
|
| 19 |
+
extra_cuda_cflags=["--expt-extended-lambda"])
|
| 20 |
+
|
| 21 |
+
# Activation names
|
| 22 |
+
ACT_RELU = "relu"
|
| 23 |
+
ACT_LEAKY_RELU = "leaky_relu"
|
| 24 |
+
ACT_ELU = "elu"
|
| 25 |
+
ACT_NONE = "none"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _check(fn, *args, **kwargs):
|
| 29 |
+
success = fn(*args, **kwargs)
|
| 30 |
+
if not success:
|
| 31 |
+
raise RuntimeError("CUDA Error encountered in {}".format(fn))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _broadcast_shape(x):
|
| 35 |
+
out_size = []
|
| 36 |
+
for i, s in enumerate(x.size()):
|
| 37 |
+
if i != 1:
|
| 38 |
+
out_size.append(1)
|
| 39 |
+
else:
|
| 40 |
+
out_size.append(s)
|
| 41 |
+
return out_size
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _reduce(x):
|
| 45 |
+
if len(x.size()) == 2:
|
| 46 |
+
return x.sum(dim=0)
|
| 47 |
+
else:
|
| 48 |
+
n, c = x.size()[0:2]
|
| 49 |
+
return x.contiguous().view((n, c, -1)).sum(2).sum(0)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _count_samples(x):
|
| 53 |
+
count = 1
|
| 54 |
+
for i, s in enumerate(x.size()):
|
| 55 |
+
if i != 1:
|
| 56 |
+
count *= s
|
| 57 |
+
return count
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _act_forward(ctx, x):
|
| 61 |
+
if ctx.activation == ACT_LEAKY_RELU:
|
| 62 |
+
_backend.leaky_relu_forward(x, ctx.slope)
|
| 63 |
+
elif ctx.activation == ACT_ELU:
|
| 64 |
+
_backend.elu_forward(x)
|
| 65 |
+
elif ctx.activation == ACT_NONE:
|
| 66 |
+
pass
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _act_backward(ctx, x, dx):
|
| 70 |
+
if ctx.activation == ACT_LEAKY_RELU:
|
| 71 |
+
_backend.leaky_relu_backward(x, dx, ctx.slope)
|
| 72 |
+
elif ctx.activation == ACT_ELU:
|
| 73 |
+
_backend.elu_backward(x, dx)
|
| 74 |
+
elif ctx.activation == ACT_NONE:
|
| 75 |
+
pass
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class InPlaceABN(autograd.Function):
|
| 79 |
+
@staticmethod
|
| 80 |
+
def forward(ctx, x, weight, bias, running_mean, running_var,
|
| 81 |
+
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
|
| 82 |
+
# Save context
|
| 83 |
+
ctx.training = training
|
| 84 |
+
ctx.momentum = momentum
|
| 85 |
+
ctx.eps = eps
|
| 86 |
+
ctx.activation = activation
|
| 87 |
+
ctx.slope = slope
|
| 88 |
+
ctx.affine = weight is not None and bias is not None
|
| 89 |
+
|
| 90 |
+
# Prepare inputs
|
| 91 |
+
count = _count_samples(x)
|
| 92 |
+
x = x.contiguous()
|
| 93 |
+
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
| 94 |
+
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
| 95 |
+
|
| 96 |
+
if ctx.training:
|
| 97 |
+
mean, var = _backend.mean_var(x)
|
| 98 |
+
|
| 99 |
+
# Update running stats
|
| 100 |
+
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
| 101 |
+
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
|
| 102 |
+
|
| 103 |
+
# Mark in-place modified tensors
|
| 104 |
+
ctx.mark_dirty(x, running_mean, running_var)
|
| 105 |
+
else:
|
| 106 |
+
mean, var = running_mean.contiguous(), running_var.contiguous()
|
| 107 |
+
ctx.mark_dirty(x)
|
| 108 |
+
|
| 109 |
+
# BN forward + activation
|
| 110 |
+
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
| 111 |
+
_act_forward(ctx, x)
|
| 112 |
+
|
| 113 |
+
# Output
|
| 114 |
+
ctx.var = var
|
| 115 |
+
ctx.save_for_backward(x, var, weight, bias)
|
| 116 |
+
ctx.mark_non_differentiable(running_mean, running_var)
|
| 117 |
+
return x, running_mean, running_var
|
| 118 |
+
|
| 119 |
+
@staticmethod
|
| 120 |
+
@once_differentiable
|
| 121 |
+
def backward(ctx, dz, _drunning_mean, _drunning_var):
|
| 122 |
+
z, var, weight, bias = ctx.saved_tensors
|
| 123 |
+
dz = dz.contiguous()
|
| 124 |
+
|
| 125 |
+
# Undo activation
|
| 126 |
+
_act_backward(ctx, z, dz)
|
| 127 |
+
|
| 128 |
+
if ctx.training:
|
| 129 |
+
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
| 130 |
+
else:
|
| 131 |
+
# TODO: implement simplified CUDA backward for inference mode
|
| 132 |
+
edz = dz.new_zeros(dz.size(1))
|
| 133 |
+
eydz = dz.new_zeros(dz.size(1))
|
| 134 |
+
|
| 135 |
+
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
| 136 |
+
# dweight = eydz * weight.sign() if ctx.affine else None
|
| 137 |
+
dweight = eydz if ctx.affine else None
|
| 138 |
+
if dweight is not None:
|
| 139 |
+
dweight[weight < 0] *= -1
|
| 140 |
+
dbias = edz if ctx.affine else None
|
| 141 |
+
|
| 142 |
+
return dx, dweight, dbias, None, None, None, None, None, None, None
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
class InPlaceABNSync(autograd.Function):
|
| 146 |
+
@classmethod
|
| 147 |
+
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
|
| 148 |
+
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
|
| 149 |
+
# Save context
|
| 150 |
+
ctx.training = training
|
| 151 |
+
ctx.momentum = momentum
|
| 152 |
+
ctx.eps = eps
|
| 153 |
+
ctx.activation = activation
|
| 154 |
+
ctx.slope = slope
|
| 155 |
+
ctx.affine = weight is not None and bias is not None
|
| 156 |
+
|
| 157 |
+
# Prepare inputs
|
| 158 |
+
ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
|
| 159 |
+
|
| 160 |
+
# count = _count_samples(x)
|
| 161 |
+
batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)
|
| 162 |
+
|
| 163 |
+
x = x.contiguous()
|
| 164 |
+
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
| 165 |
+
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
| 166 |
+
|
| 167 |
+
if ctx.training:
|
| 168 |
+
mean, var = _backend.mean_var(x)
|
| 169 |
+
if ctx.world_size > 1:
|
| 170 |
+
# get global batch size
|
| 171 |
+
if equal_batches:
|
| 172 |
+
batch_size *= ctx.world_size
|
| 173 |
+
else:
|
| 174 |
+
dist.all_reduce(batch_size, dist.ReduceOp.SUM)
|
| 175 |
+
|
| 176 |
+
ctx.factor = x.shape[0] / float(batch_size.item())
|
| 177 |
+
|
| 178 |
+
mean_all = mean.clone() * ctx.factor
|
| 179 |
+
dist.all_reduce(mean_all, dist.ReduceOp.SUM)
|
| 180 |
+
|
| 181 |
+
var_all = (var + (mean - mean_all) ** 2) * ctx.factor
|
| 182 |
+
dist.all_reduce(var_all, dist.ReduceOp.SUM)
|
| 183 |
+
|
| 184 |
+
mean = mean_all
|
| 185 |
+
var = var_all
|
| 186 |
+
|
| 187 |
+
# Update running stats
|
| 188 |
+
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
| 189 |
+
count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
|
| 190 |
+
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
|
| 191 |
+
|
| 192 |
+
# Mark in-place modified tensors
|
| 193 |
+
ctx.mark_dirty(x, running_mean, running_var)
|
| 194 |
+
else:
|
| 195 |
+
mean, var = running_mean.contiguous(), running_var.contiguous()
|
| 196 |
+
ctx.mark_dirty(x)
|
| 197 |
+
|
| 198 |
+
# BN forward + activation
|
| 199 |
+
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
| 200 |
+
_act_forward(ctx, x)
|
| 201 |
+
|
| 202 |
+
# Output
|
| 203 |
+
ctx.var = var
|
| 204 |
+
ctx.save_for_backward(x, var, weight, bias)
|
| 205 |
+
ctx.mark_non_differentiable(running_mean, running_var)
|
| 206 |
+
return x, running_mean, running_var
|
| 207 |
+
|
| 208 |
+
@staticmethod
|
| 209 |
+
@once_differentiable
|
| 210 |
+
def backward(ctx, dz, _drunning_mean, _drunning_var):
|
| 211 |
+
z, var, weight, bias = ctx.saved_tensors
|
| 212 |
+
dz = dz.contiguous()
|
| 213 |
+
|
| 214 |
+
# Undo activation
|
| 215 |
+
_act_backward(ctx, z, dz)
|
| 216 |
+
|
| 217 |
+
if ctx.training:
|
| 218 |
+
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
| 219 |
+
edz_local = edz.clone()
|
| 220 |
+
eydz_local = eydz.clone()
|
| 221 |
+
|
| 222 |
+
if ctx.world_size > 1:
|
| 223 |
+
edz *= ctx.factor
|
| 224 |
+
dist.all_reduce(edz, dist.ReduceOp.SUM)
|
| 225 |
+
|
| 226 |
+
eydz *= ctx.factor
|
| 227 |
+
dist.all_reduce(eydz, dist.ReduceOp.SUM)
|
| 228 |
+
else:
|
| 229 |
+
edz_local = edz = dz.new_zeros(dz.size(1))
|
| 230 |
+
eydz_local = eydz = dz.new_zeros(dz.size(1))
|
| 231 |
+
|
| 232 |
+
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
| 233 |
+
# dweight = eydz_local * weight.sign() if ctx.affine else None
|
| 234 |
+
dweight = eydz_local if ctx.affine else None
|
| 235 |
+
if dweight is not None:
|
| 236 |
+
dweight[weight < 0] *= -1
|
| 237 |
+
dbias = edz_local if ctx.affine else None
|
| 238 |
+
|
| 239 |
+
return dx, dweight, dbias, None, None, None, None, None, None, None
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
inplace_abn = InPlaceABN.apply
|
| 243 |
+
inplace_abn_sync = InPlaceABNSync.apply
|
| 244 |
+
|
| 245 |
+
__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
|
preprocess/humanparsing/modules/misc.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
import torch
|
| 3 |
+
import torch.distributed as dist
|
| 4 |
+
|
| 5 |
+
class GlobalAvgPool2d(nn.Module):
|
| 6 |
+
def __init__(self):
|
| 7 |
+
"""Global average pooling over the input's spatial dimensions"""
|
| 8 |
+
super(GlobalAvgPool2d, self).__init__()
|
| 9 |
+
|
| 10 |
+
def forward(self, inputs):
|
| 11 |
+
in_size = inputs.size()
|
| 12 |
+
return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
|
| 13 |
+
|
| 14 |
+
class SingleGPU(nn.Module):
|
| 15 |
+
def __init__(self, module):
|
| 16 |
+
super(SingleGPU, self).__init__()
|
| 17 |
+
self.module=module
|
| 18 |
+
|
| 19 |
+
def forward(self, input):
|
| 20 |
+
return self.module(input.cuda(non_blocking=True))
|
| 21 |
+
|
preprocess/humanparsing/modules/residual.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import OrderedDict
|
| 2 |
+
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
|
| 5 |
+
from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
| 6 |
+
import torch.nn.functional as functional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class ResidualBlock(nn.Module):
|
| 10 |
+
"""Configurable residual block
|
| 11 |
+
|
| 12 |
+
Parameters
|
| 13 |
+
----------
|
| 14 |
+
in_channels : int
|
| 15 |
+
Number of input channels.
|
| 16 |
+
channels : list of int
|
| 17 |
+
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
|
| 18 |
+
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
|
| 19 |
+
`3 x 3` then `1 x 1` convolutions.
|
| 20 |
+
stride : int
|
| 21 |
+
Stride of the first `3 x 3` convolution
|
| 22 |
+
dilation : int
|
| 23 |
+
Dilation to apply to the `3 x 3` convolutions.
|
| 24 |
+
groups : int
|
| 25 |
+
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
|
| 26 |
+
bottleneck blocks.
|
| 27 |
+
norm_act : callable
|
| 28 |
+
Function to create normalization / activation Module.
|
| 29 |
+
dropout: callable
|
| 30 |
+
Function to create Dropout Module.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self,
|
| 34 |
+
in_channels,
|
| 35 |
+
channels,
|
| 36 |
+
stride=1,
|
| 37 |
+
dilation=1,
|
| 38 |
+
groups=1,
|
| 39 |
+
norm_act=ABN,
|
| 40 |
+
dropout=None):
|
| 41 |
+
super(ResidualBlock, self).__init__()
|
| 42 |
+
|
| 43 |
+
# Check parameters for inconsistencies
|
| 44 |
+
if len(channels) != 2 and len(channels) != 3:
|
| 45 |
+
raise ValueError("channels must contain either two or three values")
|
| 46 |
+
if len(channels) == 2 and groups != 1:
|
| 47 |
+
raise ValueError("groups > 1 are only valid if len(channels) == 3")
|
| 48 |
+
|
| 49 |
+
is_bottleneck = len(channels) == 3
|
| 50 |
+
need_proj_conv = stride != 1 or in_channels != channels[-1]
|
| 51 |
+
|
| 52 |
+
if not is_bottleneck:
|
| 53 |
+
bn2 = norm_act(channels[1])
|
| 54 |
+
bn2.activation = ACT_NONE
|
| 55 |
+
layers = [
|
| 56 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
|
| 57 |
+
dilation=dilation)),
|
| 58 |
+
("bn1", norm_act(channels[0])),
|
| 59 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
| 60 |
+
dilation=dilation)),
|
| 61 |
+
("bn2", bn2)
|
| 62 |
+
]
|
| 63 |
+
if dropout is not None:
|
| 64 |
+
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
|
| 65 |
+
else:
|
| 66 |
+
bn3 = norm_act(channels[2])
|
| 67 |
+
bn3.activation = ACT_NONE
|
| 68 |
+
layers = [
|
| 69 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)),
|
| 70 |
+
("bn1", norm_act(channels[0])),
|
| 71 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False,
|
| 72 |
+
groups=groups, dilation=dilation)),
|
| 73 |
+
("bn2", norm_act(channels[1])),
|
| 74 |
+
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)),
|
| 75 |
+
("bn3", bn3)
|
| 76 |
+
]
|
| 77 |
+
if dropout is not None:
|
| 78 |
+
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
|
| 79 |
+
self.convs = nn.Sequential(OrderedDict(layers))
|
| 80 |
+
|
| 81 |
+
if need_proj_conv:
|
| 82 |
+
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
|
| 83 |
+
self.proj_bn = norm_act(channels[-1])
|
| 84 |
+
self.proj_bn.activation = ACT_NONE
|
| 85 |
+
|
| 86 |
+
def forward(self, x):
|
| 87 |
+
if hasattr(self, "proj_conv"):
|
| 88 |
+
residual = self.proj_conv(x)
|
| 89 |
+
residual = self.proj_bn(residual)
|
| 90 |
+
else:
|
| 91 |
+
residual = x
|
| 92 |
+
x = self.convs(x) + residual
|
| 93 |
+
|
| 94 |
+
if self.convs.bn1.activation == ACT_LEAKY_RELU:
|
| 95 |
+
return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True)
|
| 96 |
+
elif self.convs.bn1.activation == ACT_ELU:
|
| 97 |
+
return functional.elu(x, inplace=True)
|
| 98 |
+
else:
|
| 99 |
+
return x
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
class IdentityResidualBlock(nn.Module):
|
| 103 |
+
def __init__(self,
|
| 104 |
+
in_channels,
|
| 105 |
+
channels,
|
| 106 |
+
stride=1,
|
| 107 |
+
dilation=1,
|
| 108 |
+
groups=1,
|
| 109 |
+
norm_act=ABN,
|
| 110 |
+
dropout=None):
|
| 111 |
+
"""Configurable identity-mapping residual block
|
| 112 |
+
|
| 113 |
+
Parameters
|
| 114 |
+
----------
|
| 115 |
+
in_channels : int
|
| 116 |
+
Number of input channels.
|
| 117 |
+
channels : list of int
|
| 118 |
+
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
|
| 119 |
+
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
|
| 120 |
+
`3 x 3` then `1 x 1` convolutions.
|
| 121 |
+
stride : int
|
| 122 |
+
Stride of the first `3 x 3` convolution
|
| 123 |
+
dilation : int
|
| 124 |
+
Dilation to apply to the `3 x 3` convolutions.
|
| 125 |
+
groups : int
|
| 126 |
+
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
|
| 127 |
+
bottleneck blocks.
|
| 128 |
+
norm_act : callable
|
| 129 |
+
Function to create normalization / activation Module.
|
| 130 |
+
dropout: callable
|
| 131 |
+
Function to create Dropout Module.
|
| 132 |
+
"""
|
| 133 |
+
super(IdentityResidualBlock, self).__init__()
|
| 134 |
+
|
| 135 |
+
# Check parameters for inconsistencies
|
| 136 |
+
if len(channels) != 2 and len(channels) != 3:
|
| 137 |
+
raise ValueError("channels must contain either two or three values")
|
| 138 |
+
if len(channels) == 2 and groups != 1:
|
| 139 |
+
raise ValueError("groups > 1 are only valid if len(channels) == 3")
|
| 140 |
+
|
| 141 |
+
is_bottleneck = len(channels) == 3
|
| 142 |
+
need_proj_conv = stride != 1 or in_channels != channels[-1]
|
| 143 |
+
|
| 144 |
+
self.bn1 = norm_act(in_channels)
|
| 145 |
+
if not is_bottleneck:
|
| 146 |
+
layers = [
|
| 147 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
|
| 148 |
+
dilation=dilation)),
|
| 149 |
+
("bn2", norm_act(channels[0])),
|
| 150 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
| 151 |
+
dilation=dilation))
|
| 152 |
+
]
|
| 153 |
+
if dropout is not None:
|
| 154 |
+
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
|
| 155 |
+
else:
|
| 156 |
+
layers = [
|
| 157 |
+
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
|
| 158 |
+
("bn2", norm_act(channels[0])),
|
| 159 |
+
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
| 160 |
+
groups=groups, dilation=dilation)),
|
| 161 |
+
("bn3", norm_act(channels[1])),
|
| 162 |
+
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
|
| 163 |
+
]
|
| 164 |
+
if dropout is not None:
|
| 165 |
+
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
|
| 166 |
+
self.convs = nn.Sequential(OrderedDict(layers))
|
| 167 |
+
|
| 168 |
+
if need_proj_conv:
|
| 169 |
+
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
|
| 170 |
+
|
| 171 |
+
def forward(self, x):
|
| 172 |
+
if hasattr(self, "proj_conv"):
|
| 173 |
+
bn1 = self.bn1(x)
|
| 174 |
+
shortcut = self.proj_conv(bn1)
|
| 175 |
+
else:
|
| 176 |
+
shortcut = x.clone()
|
| 177 |
+
bn1 = self.bn1(x)
|
| 178 |
+
|
| 179 |
+
out = self.convs(bn1)
|
| 180 |
+
out.add_(shortcut)
|
| 181 |
+
|
| 182 |
+
return out
|
preprocess/humanparsing/modules/src/checks.h
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/ATen.h>
|
| 4 |
+
|
| 5 |
+
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
| 6 |
+
#ifndef AT_CHECK
|
| 7 |
+
#define AT_CHECK AT_ASSERT
|
| 8 |
+
#endif
|
| 9 |
+
|
| 10 |
+
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
| 11 |
+
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
| 12 |
+
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
| 13 |
+
|
| 14 |
+
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
| 15 |
+
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
preprocess/humanparsing/modules/src/inplace_abn.cpp
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <torch/extension.h>
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
|
| 5 |
+
#include "inplace_abn.h"
|
| 6 |
+
|
| 7 |
+
std::vector<at::Tensor> mean_var(at::Tensor x) {
|
| 8 |
+
if (x.is_cuda()) {
|
| 9 |
+
if (x.type().scalarType() == at::ScalarType::Half) {
|
| 10 |
+
return mean_var_cuda_h(x);
|
| 11 |
+
} else {
|
| 12 |
+
return mean_var_cuda(x);
|
| 13 |
+
}
|
| 14 |
+
} else {
|
| 15 |
+
return mean_var_cpu(x);
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 20 |
+
bool affine, float eps) {
|
| 21 |
+
if (x.is_cuda()) {
|
| 22 |
+
if (x.type().scalarType() == at::ScalarType::Half) {
|
| 23 |
+
return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
|
| 24 |
+
} else {
|
| 25 |
+
return forward_cuda(x, mean, var, weight, bias, affine, eps);
|
| 26 |
+
}
|
| 27 |
+
} else {
|
| 28 |
+
return forward_cpu(x, mean, var, weight, bias, affine, eps);
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 33 |
+
bool affine, float eps) {
|
| 34 |
+
if (z.is_cuda()) {
|
| 35 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
| 36 |
+
return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
|
| 37 |
+
} else {
|
| 38 |
+
return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
|
| 39 |
+
}
|
| 40 |
+
} else {
|
| 41 |
+
return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 46 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
| 47 |
+
if (z.is_cuda()) {
|
| 48 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
| 49 |
+
return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
| 50 |
+
} else {
|
| 51 |
+
return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
| 52 |
+
}
|
| 53 |
+
} else {
|
| 54 |
+
return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
void leaky_relu_forward(at::Tensor z, float slope) {
|
| 59 |
+
at::leaky_relu_(z, slope);
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
|
| 63 |
+
if (z.is_cuda()) {
|
| 64 |
+
if (z.type().scalarType() == at::ScalarType::Half) {
|
| 65 |
+
return leaky_relu_backward_cuda_h(z, dz, slope);
|
| 66 |
+
} else {
|
| 67 |
+
return leaky_relu_backward_cuda(z, dz, slope);
|
| 68 |
+
}
|
| 69 |
+
} else {
|
| 70 |
+
return leaky_relu_backward_cpu(z, dz, slope);
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
void elu_forward(at::Tensor z) {
|
| 75 |
+
at::elu_(z);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
void elu_backward(at::Tensor z, at::Tensor dz) {
|
| 79 |
+
if (z.is_cuda()) {
|
| 80 |
+
return elu_backward_cuda(z, dz);
|
| 81 |
+
} else {
|
| 82 |
+
return elu_backward_cpu(z, dz);
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
| 87 |
+
m.def("mean_var", &mean_var, "Mean and variance computation");
|
| 88 |
+
m.def("forward", &forward, "In-place forward computation");
|
| 89 |
+
m.def("edz_eydz", &edz_eydz, "First part of backward computation");
|
| 90 |
+
m.def("backward", &backward, "Second part of backward computation");
|
| 91 |
+
m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
|
| 92 |
+
m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
|
| 93 |
+
m.def("elu_forward", &elu_forward, "Elu forward computation");
|
| 94 |
+
m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
|
| 95 |
+
}
|
preprocess/humanparsing/modules/src/inplace_abn.h
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/ATen.h>
|
| 4 |
+
|
| 5 |
+
#include <vector>
|
| 6 |
+
|
| 7 |
+
std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
|
| 8 |
+
std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
|
| 9 |
+
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
|
| 10 |
+
|
| 11 |
+
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 12 |
+
bool affine, float eps);
|
| 13 |
+
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 14 |
+
bool affine, float eps);
|
| 15 |
+
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 16 |
+
bool affine, float eps);
|
| 17 |
+
|
| 18 |
+
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 19 |
+
bool affine, float eps);
|
| 20 |
+
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 21 |
+
bool affine, float eps);
|
| 22 |
+
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 23 |
+
bool affine, float eps);
|
| 24 |
+
|
| 25 |
+
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 26 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
| 27 |
+
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 28 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
| 29 |
+
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 30 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
| 31 |
+
|
| 32 |
+
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
|
| 33 |
+
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
|
| 34 |
+
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
|
| 35 |
+
|
| 36 |
+
void elu_backward_cpu(at::Tensor z, at::Tensor dz);
|
| 37 |
+
void elu_backward_cuda(at::Tensor z, at::Tensor dz);
|
| 38 |
+
|
| 39 |
+
static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
|
| 40 |
+
num = x.size(0);
|
| 41 |
+
chn = x.size(1);
|
| 42 |
+
sp = 1;
|
| 43 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
| 44 |
+
sp *= x.size(i);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/*
|
| 48 |
+
* Specialized CUDA reduction functions for BN
|
| 49 |
+
*/
|
| 50 |
+
#ifdef __CUDACC__
|
| 51 |
+
|
| 52 |
+
#include "utils/cuda.cuh"
|
| 53 |
+
|
| 54 |
+
template <typename T, typename Op>
|
| 55 |
+
__device__ T reduce(Op op, int plane, int N, int S) {
|
| 56 |
+
T sum = (T)0;
|
| 57 |
+
for (int batch = 0; batch < N; ++batch) {
|
| 58 |
+
for (int x = threadIdx.x; x < S; x += blockDim.x) {
|
| 59 |
+
sum += op(batch, plane, x);
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
// sum over NumThreads within a warp
|
| 64 |
+
sum = warpSum(sum);
|
| 65 |
+
|
| 66 |
+
// 'transpose', and reduce within warp again
|
| 67 |
+
__shared__ T shared[32];
|
| 68 |
+
__syncthreads();
|
| 69 |
+
if (threadIdx.x % WARP_SIZE == 0) {
|
| 70 |
+
shared[threadIdx.x / WARP_SIZE] = sum;
|
| 71 |
+
}
|
| 72 |
+
if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
|
| 73 |
+
// zero out the other entries in shared
|
| 74 |
+
shared[threadIdx.x] = (T)0;
|
| 75 |
+
}
|
| 76 |
+
__syncthreads();
|
| 77 |
+
if (threadIdx.x / WARP_SIZE == 0) {
|
| 78 |
+
sum = warpSum(shared[threadIdx.x]);
|
| 79 |
+
if (threadIdx.x == 0) {
|
| 80 |
+
shared[0] = sum;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
__syncthreads();
|
| 84 |
+
|
| 85 |
+
// Everyone picks it up, should be broadcast into the whole gradInput
|
| 86 |
+
return shared[0];
|
| 87 |
+
}
|
| 88 |
+
#endif
|
preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <ATen/ATen.h>
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
|
| 5 |
+
#include "utils/checks.h"
|
| 6 |
+
#include "inplace_abn.h"
|
| 7 |
+
|
| 8 |
+
at::Tensor reduce_sum(at::Tensor x) {
|
| 9 |
+
if (x.ndimension() == 2) {
|
| 10 |
+
return x.sum(0);
|
| 11 |
+
} else {
|
| 12 |
+
auto x_view = x.view({x.size(0), x.size(1), -1});
|
| 13 |
+
return x_view.sum(-1).sum(0);
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
|
| 18 |
+
if (x.ndimension() == 2) {
|
| 19 |
+
return v;
|
| 20 |
+
} else {
|
| 21 |
+
std::vector<int64_t> broadcast_size = {1, -1};
|
| 22 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
| 23 |
+
broadcast_size.push_back(1);
|
| 24 |
+
|
| 25 |
+
return v.view(broadcast_size);
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
int64_t count(at::Tensor x) {
|
| 30 |
+
int64_t count = x.size(0);
|
| 31 |
+
for (int64_t i = 2; i < x.ndimension(); ++i)
|
| 32 |
+
count *= x.size(i);
|
| 33 |
+
|
| 34 |
+
return count;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
|
| 38 |
+
if (affine) {
|
| 39 |
+
return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
|
| 40 |
+
} else {
|
| 41 |
+
return z;
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
|
| 46 |
+
auto num = count(x);
|
| 47 |
+
auto mean = reduce_sum(x) / num;
|
| 48 |
+
auto diff = x - broadcast_to(mean, x);
|
| 49 |
+
auto var = reduce_sum(diff.pow(2)) / num;
|
| 50 |
+
|
| 51 |
+
return {mean, var};
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 55 |
+
bool affine, float eps) {
|
| 56 |
+
auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
|
| 57 |
+
auto mul = at::rsqrt(var + eps) * gamma;
|
| 58 |
+
|
| 59 |
+
x.sub_(broadcast_to(mean, x));
|
| 60 |
+
x.mul_(broadcast_to(mul, x));
|
| 61 |
+
if (affine) x.add_(broadcast_to(bias, x));
|
| 62 |
+
|
| 63 |
+
return x;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 67 |
+
bool affine, float eps) {
|
| 68 |
+
auto edz = reduce_sum(dz);
|
| 69 |
+
auto y = invert_affine(z, weight, bias, affine, eps);
|
| 70 |
+
auto eydz = reduce_sum(y * dz);
|
| 71 |
+
|
| 72 |
+
return {edz, eydz};
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 76 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
| 77 |
+
auto y = invert_affine(z, weight, bias, affine, eps);
|
| 78 |
+
auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
|
| 79 |
+
|
| 80 |
+
auto num = count(z);
|
| 81 |
+
auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
|
| 82 |
+
return dx;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
|
| 86 |
+
CHECK_CPU_INPUT(z);
|
| 87 |
+
CHECK_CPU_INPUT(dz);
|
| 88 |
+
|
| 89 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
|
| 90 |
+
int64_t count = z.numel();
|
| 91 |
+
auto *_z = z.data<scalar_t>();
|
| 92 |
+
auto *_dz = dz.data<scalar_t>();
|
| 93 |
+
|
| 94 |
+
for (int64_t i = 0; i < count; ++i) {
|
| 95 |
+
if (_z[i] < 0) {
|
| 96 |
+
_z[i] *= 1 / slope;
|
| 97 |
+
_dz[i] *= slope;
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}));
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
|
| 104 |
+
CHECK_CPU_INPUT(z);
|
| 105 |
+
CHECK_CPU_INPUT(dz);
|
| 106 |
+
|
| 107 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
|
| 108 |
+
int64_t count = z.numel();
|
| 109 |
+
auto *_z = z.data<scalar_t>();
|
| 110 |
+
auto *_dz = dz.data<scalar_t>();
|
| 111 |
+
|
| 112 |
+
for (int64_t i = 0; i < count; ++i) {
|
| 113 |
+
if (_z[i] < 0) {
|
| 114 |
+
_z[i] = log1p(_z[i]);
|
| 115 |
+
_dz[i] *= (_z[i] + 1.f);
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
}));
|
| 119 |
+
}
|
preprocess/humanparsing/modules/src/inplace_abn_cuda.cu
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <ATen/ATen.h>
|
| 2 |
+
|
| 3 |
+
#include <thrust/device_ptr.h>
|
| 4 |
+
#include <thrust/transform.h>
|
| 5 |
+
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
#include "utils/checks.h"
|
| 9 |
+
#include "utils/cuda.cuh"
|
| 10 |
+
#include "inplace_abn.h"
|
| 11 |
+
|
| 12 |
+
#include <ATen/cuda/CUDAContext.h>
|
| 13 |
+
|
| 14 |
+
// Operations for reduce
|
| 15 |
+
template<typename T>
|
| 16 |
+
struct SumOp {
|
| 17 |
+
__device__ SumOp(const T *t, int c, int s)
|
| 18 |
+
: tensor(t), chn(c), sp(s) {}
|
| 19 |
+
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
| 20 |
+
return tensor[(batch * chn + plane) * sp + n];
|
| 21 |
+
}
|
| 22 |
+
const T *tensor;
|
| 23 |
+
const int chn;
|
| 24 |
+
const int sp;
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
template<typename T>
|
| 28 |
+
struct VarOp {
|
| 29 |
+
__device__ VarOp(T m, const T *t, int c, int s)
|
| 30 |
+
: mean(m), tensor(t), chn(c), sp(s) {}
|
| 31 |
+
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
| 32 |
+
T val = tensor[(batch * chn + plane) * sp + n];
|
| 33 |
+
return (val - mean) * (val - mean);
|
| 34 |
+
}
|
| 35 |
+
const T mean;
|
| 36 |
+
const T *tensor;
|
| 37 |
+
const int chn;
|
| 38 |
+
const int sp;
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
template<typename T>
|
| 42 |
+
struct GradOp {
|
| 43 |
+
__device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
|
| 44 |
+
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
| 45 |
+
__device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
|
| 46 |
+
T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
|
| 47 |
+
T _dz = dz[(batch * chn + plane) * sp + n];
|
| 48 |
+
return Pair<T>(_dz, _y * _dz);
|
| 49 |
+
}
|
| 50 |
+
const T weight;
|
| 51 |
+
const T bias;
|
| 52 |
+
const T *z;
|
| 53 |
+
const T *dz;
|
| 54 |
+
const int chn;
|
| 55 |
+
const int sp;
|
| 56 |
+
};
|
| 57 |
+
|
| 58 |
+
/***********
|
| 59 |
+
* mean_var
|
| 60 |
+
***********/
|
| 61 |
+
|
| 62 |
+
template<typename T>
|
| 63 |
+
__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
|
| 64 |
+
int plane = blockIdx.x;
|
| 65 |
+
T norm = T(1) / T(num * sp);
|
| 66 |
+
|
| 67 |
+
T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
|
| 68 |
+
__syncthreads();
|
| 69 |
+
T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
|
| 70 |
+
|
| 71 |
+
if (threadIdx.x == 0) {
|
| 72 |
+
mean[plane] = _mean;
|
| 73 |
+
var[plane] = _var;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
|
| 78 |
+
CHECK_CUDA_INPUT(x);
|
| 79 |
+
|
| 80 |
+
// Extract dimensions
|
| 81 |
+
int64_t num, chn, sp;
|
| 82 |
+
get_dims(x, num, chn, sp);
|
| 83 |
+
|
| 84 |
+
// Prepare output tensors
|
| 85 |
+
auto mean = at::empty({chn}, x.options());
|
| 86 |
+
auto var = at::empty({chn}, x.options());
|
| 87 |
+
|
| 88 |
+
// Run kernel
|
| 89 |
+
dim3 blocks(chn);
|
| 90 |
+
dim3 threads(getNumThreads(sp));
|
| 91 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 92 |
+
AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
|
| 93 |
+
mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
| 94 |
+
x.data<scalar_t>(),
|
| 95 |
+
mean.data<scalar_t>(),
|
| 96 |
+
var.data<scalar_t>(),
|
| 97 |
+
num, chn, sp);
|
| 98 |
+
}));
|
| 99 |
+
|
| 100 |
+
return {mean, var};
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
/**********
|
| 104 |
+
* forward
|
| 105 |
+
**********/
|
| 106 |
+
|
| 107 |
+
template<typename T>
|
| 108 |
+
__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
|
| 109 |
+
bool affine, float eps, int num, int chn, int sp) {
|
| 110 |
+
int plane = blockIdx.x;
|
| 111 |
+
|
| 112 |
+
T _mean = mean[plane];
|
| 113 |
+
T _var = var[plane];
|
| 114 |
+
T _weight = affine ? abs(weight[plane]) + eps : T(1);
|
| 115 |
+
T _bias = affine ? bias[plane] : T(0);
|
| 116 |
+
|
| 117 |
+
T mul = rsqrt(_var + eps) * _weight;
|
| 118 |
+
|
| 119 |
+
for (int batch = 0; batch < num; ++batch) {
|
| 120 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
| 121 |
+
T _x = x[(batch * chn + plane) * sp + n];
|
| 122 |
+
T _y = (_x - _mean) * mul + _bias;
|
| 123 |
+
|
| 124 |
+
x[(batch * chn + plane) * sp + n] = _y;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 130 |
+
bool affine, float eps) {
|
| 131 |
+
CHECK_CUDA_INPUT(x);
|
| 132 |
+
CHECK_CUDA_INPUT(mean);
|
| 133 |
+
CHECK_CUDA_INPUT(var);
|
| 134 |
+
CHECK_CUDA_INPUT(weight);
|
| 135 |
+
CHECK_CUDA_INPUT(bias);
|
| 136 |
+
|
| 137 |
+
// Extract dimensions
|
| 138 |
+
int64_t num, chn, sp;
|
| 139 |
+
get_dims(x, num, chn, sp);
|
| 140 |
+
|
| 141 |
+
// Run kernel
|
| 142 |
+
dim3 blocks(chn);
|
| 143 |
+
dim3 threads(getNumThreads(sp));
|
| 144 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 145 |
+
AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
|
| 146 |
+
forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
| 147 |
+
x.data<scalar_t>(),
|
| 148 |
+
mean.data<scalar_t>(),
|
| 149 |
+
var.data<scalar_t>(),
|
| 150 |
+
weight.data<scalar_t>(),
|
| 151 |
+
bias.data<scalar_t>(),
|
| 152 |
+
affine, eps, num, chn, sp);
|
| 153 |
+
}));
|
| 154 |
+
|
| 155 |
+
return x;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
/***********
|
| 159 |
+
* edz_eydz
|
| 160 |
+
***********/
|
| 161 |
+
|
| 162 |
+
template<typename T>
|
| 163 |
+
__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
|
| 164 |
+
T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
|
| 165 |
+
int plane = blockIdx.x;
|
| 166 |
+
|
| 167 |
+
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
| 168 |
+
T _bias = affine ? bias[plane] : 0.f;
|
| 169 |
+
|
| 170 |
+
Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
| 171 |
+
__syncthreads();
|
| 172 |
+
|
| 173 |
+
if (threadIdx.x == 0) {
|
| 174 |
+
edz[plane] = res.v1;
|
| 175 |
+
eydz[plane] = res.v2;
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 180 |
+
bool affine, float eps) {
|
| 181 |
+
CHECK_CUDA_INPUT(z);
|
| 182 |
+
CHECK_CUDA_INPUT(dz);
|
| 183 |
+
CHECK_CUDA_INPUT(weight);
|
| 184 |
+
CHECK_CUDA_INPUT(bias);
|
| 185 |
+
|
| 186 |
+
// Extract dimensions
|
| 187 |
+
int64_t num, chn, sp;
|
| 188 |
+
get_dims(z, num, chn, sp);
|
| 189 |
+
|
| 190 |
+
auto edz = at::empty({chn}, z.options());
|
| 191 |
+
auto eydz = at::empty({chn}, z.options());
|
| 192 |
+
|
| 193 |
+
// Run kernel
|
| 194 |
+
dim3 blocks(chn);
|
| 195 |
+
dim3 threads(getNumThreads(sp));
|
| 196 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 197 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
|
| 198 |
+
edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
| 199 |
+
z.data<scalar_t>(),
|
| 200 |
+
dz.data<scalar_t>(),
|
| 201 |
+
weight.data<scalar_t>(),
|
| 202 |
+
bias.data<scalar_t>(),
|
| 203 |
+
edz.data<scalar_t>(),
|
| 204 |
+
eydz.data<scalar_t>(),
|
| 205 |
+
affine, eps, num, chn, sp);
|
| 206 |
+
}));
|
| 207 |
+
|
| 208 |
+
return {edz, eydz};
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
/***********
|
| 212 |
+
* backward
|
| 213 |
+
***********/
|
| 214 |
+
|
| 215 |
+
template<typename T>
|
| 216 |
+
__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
|
| 217 |
+
const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
|
| 218 |
+
int plane = blockIdx.x;
|
| 219 |
+
|
| 220 |
+
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
| 221 |
+
T _bias = affine ? bias[plane] : 0.f;
|
| 222 |
+
T _var = var[plane];
|
| 223 |
+
T _edz = edz[plane];
|
| 224 |
+
T _eydz = eydz[plane];
|
| 225 |
+
|
| 226 |
+
T _mul = _weight * rsqrt(_var + eps);
|
| 227 |
+
T count = T(num * sp);
|
| 228 |
+
|
| 229 |
+
for (int batch = 0; batch < num; ++batch) {
|
| 230 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
| 231 |
+
T _dz = dz[(batch * chn + plane) * sp + n];
|
| 232 |
+
T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
|
| 233 |
+
|
| 234 |
+
dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 240 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
| 241 |
+
CHECK_CUDA_INPUT(z);
|
| 242 |
+
CHECK_CUDA_INPUT(dz);
|
| 243 |
+
CHECK_CUDA_INPUT(var);
|
| 244 |
+
CHECK_CUDA_INPUT(weight);
|
| 245 |
+
CHECK_CUDA_INPUT(bias);
|
| 246 |
+
CHECK_CUDA_INPUT(edz);
|
| 247 |
+
CHECK_CUDA_INPUT(eydz);
|
| 248 |
+
|
| 249 |
+
// Extract dimensions
|
| 250 |
+
int64_t num, chn, sp;
|
| 251 |
+
get_dims(z, num, chn, sp);
|
| 252 |
+
|
| 253 |
+
auto dx = at::zeros_like(z);
|
| 254 |
+
|
| 255 |
+
// Run kernel
|
| 256 |
+
dim3 blocks(chn);
|
| 257 |
+
dim3 threads(getNumThreads(sp));
|
| 258 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 259 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
|
| 260 |
+
backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
| 261 |
+
z.data<scalar_t>(),
|
| 262 |
+
dz.data<scalar_t>(),
|
| 263 |
+
var.data<scalar_t>(),
|
| 264 |
+
weight.data<scalar_t>(),
|
| 265 |
+
bias.data<scalar_t>(),
|
| 266 |
+
edz.data<scalar_t>(),
|
| 267 |
+
eydz.data<scalar_t>(),
|
| 268 |
+
dx.data<scalar_t>(),
|
| 269 |
+
affine, eps, num, chn, sp);
|
| 270 |
+
}));
|
| 271 |
+
|
| 272 |
+
return dx;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
/**************
|
| 276 |
+
* activations
|
| 277 |
+
**************/
|
| 278 |
+
|
| 279 |
+
template<typename T>
|
| 280 |
+
inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
|
| 281 |
+
// Create thrust pointers
|
| 282 |
+
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
| 283 |
+
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
| 284 |
+
|
| 285 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 286 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
| 287 |
+
th_dz, th_dz + count, th_z, th_dz,
|
| 288 |
+
[slope] __device__ (const T& dz) { return dz * slope; },
|
| 289 |
+
[] __device__ (const T& z) { return z < 0; });
|
| 290 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
| 291 |
+
th_z, th_z + count, th_z,
|
| 292 |
+
[slope] __device__ (const T& z) { return z / slope; },
|
| 293 |
+
[] __device__ (const T& z) { return z < 0; });
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
|
| 297 |
+
CHECK_CUDA_INPUT(z);
|
| 298 |
+
CHECK_CUDA_INPUT(dz);
|
| 299 |
+
|
| 300 |
+
int64_t count = z.numel();
|
| 301 |
+
|
| 302 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
| 303 |
+
leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
|
| 304 |
+
}));
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
template<typename T>
|
| 308 |
+
inline void elu_backward_impl(T *z, T *dz, int64_t count) {
|
| 309 |
+
// Create thrust pointers
|
| 310 |
+
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
| 311 |
+
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
| 312 |
+
|
| 313 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 314 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
| 315 |
+
th_dz, th_dz + count, th_z, th_z, th_dz,
|
| 316 |
+
[] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
|
| 317 |
+
[] __device__ (const T& z) { return z < 0; });
|
| 318 |
+
thrust::transform_if(thrust::cuda::par.on(stream),
|
| 319 |
+
th_z, th_z + count, th_z,
|
| 320 |
+
[] __device__ (const T& z) { return log1p(z); },
|
| 321 |
+
[] __device__ (const T& z) { return z < 0; });
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
|
| 325 |
+
CHECK_CUDA_INPUT(z);
|
| 326 |
+
CHECK_CUDA_INPUT(dz);
|
| 327 |
+
|
| 328 |
+
int64_t count = z.numel();
|
| 329 |
+
|
| 330 |
+
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
| 331 |
+
elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
|
| 332 |
+
}));
|
| 333 |
+
}
|
preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <ATen/ATen.h>
|
| 2 |
+
|
| 3 |
+
#include <cuda_fp16.h>
|
| 4 |
+
|
| 5 |
+
#include <vector>
|
| 6 |
+
|
| 7 |
+
#include "utils/checks.h"
|
| 8 |
+
#include "utils/cuda.cuh"
|
| 9 |
+
#include "inplace_abn.h"
|
| 10 |
+
|
| 11 |
+
#include <ATen/cuda/CUDAContext.h>
|
| 12 |
+
|
| 13 |
+
// Operations for reduce
|
| 14 |
+
struct SumOpH {
|
| 15 |
+
__device__ SumOpH(const half *t, int c, int s)
|
| 16 |
+
: tensor(t), chn(c), sp(s) {}
|
| 17 |
+
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
| 18 |
+
return __half2float(tensor[(batch * chn + plane) * sp + n]);
|
| 19 |
+
}
|
| 20 |
+
const half *tensor;
|
| 21 |
+
const int chn;
|
| 22 |
+
const int sp;
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
struct VarOpH {
|
| 26 |
+
__device__ VarOpH(float m, const half *t, int c, int s)
|
| 27 |
+
: mean(m), tensor(t), chn(c), sp(s) {}
|
| 28 |
+
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
| 29 |
+
const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
|
| 30 |
+
return (t - mean) * (t - mean);
|
| 31 |
+
}
|
| 32 |
+
const float mean;
|
| 33 |
+
const half *tensor;
|
| 34 |
+
const int chn;
|
| 35 |
+
const int sp;
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
struct GradOpH {
|
| 39 |
+
__device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
|
| 40 |
+
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
| 41 |
+
__device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
|
| 42 |
+
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
|
| 43 |
+
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
| 44 |
+
return Pair<float>(_dz, _y * _dz);
|
| 45 |
+
}
|
| 46 |
+
const float weight;
|
| 47 |
+
const float bias;
|
| 48 |
+
const half *z;
|
| 49 |
+
const half *dz;
|
| 50 |
+
const int chn;
|
| 51 |
+
const int sp;
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
/***********
|
| 55 |
+
* mean_var
|
| 56 |
+
***********/
|
| 57 |
+
|
| 58 |
+
__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
|
| 59 |
+
int plane = blockIdx.x;
|
| 60 |
+
float norm = 1.f / static_cast<float>(num * sp);
|
| 61 |
+
|
| 62 |
+
float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
|
| 63 |
+
__syncthreads();
|
| 64 |
+
float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
|
| 65 |
+
|
| 66 |
+
if (threadIdx.x == 0) {
|
| 67 |
+
mean[plane] = _mean;
|
| 68 |
+
var[plane] = _var;
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
|
| 73 |
+
CHECK_CUDA_INPUT(x);
|
| 74 |
+
|
| 75 |
+
// Extract dimensions
|
| 76 |
+
int64_t num, chn, sp;
|
| 77 |
+
get_dims(x, num, chn, sp);
|
| 78 |
+
|
| 79 |
+
// Prepare output tensors
|
| 80 |
+
auto mean = at::empty({chn},x.options().dtype(at::kFloat));
|
| 81 |
+
auto var = at::empty({chn},x.options().dtype(at::kFloat));
|
| 82 |
+
|
| 83 |
+
// Run kernel
|
| 84 |
+
dim3 blocks(chn);
|
| 85 |
+
dim3 threads(getNumThreads(sp));
|
| 86 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 87 |
+
mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
|
| 88 |
+
reinterpret_cast<half*>(x.data<at::Half>()),
|
| 89 |
+
mean.data<float>(),
|
| 90 |
+
var.data<float>(),
|
| 91 |
+
num, chn, sp);
|
| 92 |
+
|
| 93 |
+
return {mean, var};
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/**********
|
| 97 |
+
* forward
|
| 98 |
+
**********/
|
| 99 |
+
|
| 100 |
+
__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
|
| 101 |
+
bool affine, float eps, int num, int chn, int sp) {
|
| 102 |
+
int plane = blockIdx.x;
|
| 103 |
+
|
| 104 |
+
const float _mean = mean[plane];
|
| 105 |
+
const float _var = var[plane];
|
| 106 |
+
const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
| 107 |
+
const float _bias = affine ? bias[plane] : 0.f;
|
| 108 |
+
|
| 109 |
+
const float mul = rsqrt(_var + eps) * _weight;
|
| 110 |
+
|
| 111 |
+
for (int batch = 0; batch < num; ++batch) {
|
| 112 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
| 113 |
+
half *x_ptr = x + (batch * chn + plane) * sp + n;
|
| 114 |
+
float _x = __half2float(*x_ptr);
|
| 115 |
+
float _y = (_x - _mean) * mul + _bias;
|
| 116 |
+
|
| 117 |
+
*x_ptr = __float2half(_y);
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 123 |
+
bool affine, float eps) {
|
| 124 |
+
CHECK_CUDA_INPUT(x);
|
| 125 |
+
CHECK_CUDA_INPUT(mean);
|
| 126 |
+
CHECK_CUDA_INPUT(var);
|
| 127 |
+
CHECK_CUDA_INPUT(weight);
|
| 128 |
+
CHECK_CUDA_INPUT(bias);
|
| 129 |
+
|
| 130 |
+
// Extract dimensions
|
| 131 |
+
int64_t num, chn, sp;
|
| 132 |
+
get_dims(x, num, chn, sp);
|
| 133 |
+
|
| 134 |
+
// Run kernel
|
| 135 |
+
dim3 blocks(chn);
|
| 136 |
+
dim3 threads(getNumThreads(sp));
|
| 137 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 138 |
+
forward_kernel_h<<<blocks, threads, 0, stream>>>(
|
| 139 |
+
reinterpret_cast<half*>(x.data<at::Half>()),
|
| 140 |
+
mean.data<float>(),
|
| 141 |
+
var.data<float>(),
|
| 142 |
+
weight.data<float>(),
|
| 143 |
+
bias.data<float>(),
|
| 144 |
+
affine, eps, num, chn, sp);
|
| 145 |
+
|
| 146 |
+
return x;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
|
| 150 |
+
float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
|
| 151 |
+
int plane = blockIdx.x;
|
| 152 |
+
|
| 153 |
+
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
| 154 |
+
float _bias = affine ? bias[plane] : 0.f;
|
| 155 |
+
|
| 156 |
+
Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
| 157 |
+
__syncthreads();
|
| 158 |
+
|
| 159 |
+
if (threadIdx.x == 0) {
|
| 160 |
+
edz[plane] = res.v1;
|
| 161 |
+
eydz[plane] = res.v2;
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
| 166 |
+
bool affine, float eps) {
|
| 167 |
+
CHECK_CUDA_INPUT(z);
|
| 168 |
+
CHECK_CUDA_INPUT(dz);
|
| 169 |
+
CHECK_CUDA_INPUT(weight);
|
| 170 |
+
CHECK_CUDA_INPUT(bias);
|
| 171 |
+
|
| 172 |
+
// Extract dimensions
|
| 173 |
+
int64_t num, chn, sp;
|
| 174 |
+
get_dims(z, num, chn, sp);
|
| 175 |
+
|
| 176 |
+
auto edz = at::empty({chn},z.options().dtype(at::kFloat));
|
| 177 |
+
auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
|
| 178 |
+
|
| 179 |
+
// Run kernel
|
| 180 |
+
dim3 blocks(chn);
|
| 181 |
+
dim3 threads(getNumThreads(sp));
|
| 182 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 183 |
+
edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
|
| 184 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
| 185 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
| 186 |
+
weight.data<float>(),
|
| 187 |
+
bias.data<float>(),
|
| 188 |
+
edz.data<float>(),
|
| 189 |
+
eydz.data<float>(),
|
| 190 |
+
affine, eps, num, chn, sp);
|
| 191 |
+
|
| 192 |
+
return {edz, eydz};
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
|
| 196 |
+
const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
|
| 197 |
+
int plane = blockIdx.x;
|
| 198 |
+
|
| 199 |
+
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
| 200 |
+
float _bias = affine ? bias[plane] : 0.f;
|
| 201 |
+
float _var = var[plane];
|
| 202 |
+
float _edz = edz[plane];
|
| 203 |
+
float _eydz = eydz[plane];
|
| 204 |
+
|
| 205 |
+
float _mul = _weight * rsqrt(_var + eps);
|
| 206 |
+
float count = float(num * sp);
|
| 207 |
+
|
| 208 |
+
for (int batch = 0; batch < num; ++batch) {
|
| 209 |
+
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
| 210 |
+
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
| 211 |
+
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
|
| 212 |
+
|
| 213 |
+
dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
| 219 |
+
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
| 220 |
+
CHECK_CUDA_INPUT(z);
|
| 221 |
+
CHECK_CUDA_INPUT(dz);
|
| 222 |
+
CHECK_CUDA_INPUT(var);
|
| 223 |
+
CHECK_CUDA_INPUT(weight);
|
| 224 |
+
CHECK_CUDA_INPUT(bias);
|
| 225 |
+
CHECK_CUDA_INPUT(edz);
|
| 226 |
+
CHECK_CUDA_INPUT(eydz);
|
| 227 |
+
|
| 228 |
+
// Extract dimensions
|
| 229 |
+
int64_t num, chn, sp;
|
| 230 |
+
get_dims(z, num, chn, sp);
|
| 231 |
+
|
| 232 |
+
auto dx = at::zeros_like(z);
|
| 233 |
+
|
| 234 |
+
// Run kernel
|
| 235 |
+
dim3 blocks(chn);
|
| 236 |
+
dim3 threads(getNumThreads(sp));
|
| 237 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 238 |
+
backward_kernel_h<<<blocks, threads, 0, stream>>>(
|
| 239 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
| 240 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
| 241 |
+
var.data<float>(),
|
| 242 |
+
weight.data<float>(),
|
| 243 |
+
bias.data<float>(),
|
| 244 |
+
edz.data<float>(),
|
| 245 |
+
eydz.data<float>(),
|
| 246 |
+
reinterpret_cast<half*>(dx.data<at::Half>()),
|
| 247 |
+
affine, eps, num, chn, sp);
|
| 248 |
+
|
| 249 |
+
return dx;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
|
| 253 |
+
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
|
| 254 |
+
float _z = __half2float(z[i]);
|
| 255 |
+
if (_z < 0) {
|
| 256 |
+
dz[i] = __float2half(__half2float(dz[i]) * slope);
|
| 257 |
+
z[i] = __float2half(_z / slope);
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
|
| 263 |
+
CHECK_CUDA_INPUT(z);
|
| 264 |
+
CHECK_CUDA_INPUT(dz);
|
| 265 |
+
|
| 266 |
+
int64_t count = z.numel();
|
| 267 |
+
dim3 threads(getNumThreads(count));
|
| 268 |
+
dim3 blocks = (count + threads.x - 1) / threads.x;
|
| 269 |
+
auto stream = at::cuda::getCurrentCUDAStream();
|
| 270 |
+
leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
|
| 271 |
+
reinterpret_cast<half*>(z.data<at::Half>()),
|
| 272 |
+
reinterpret_cast<half*>(dz.data<at::Half>()),
|
| 273 |
+
slope, count);
|
| 274 |
+
}
|
| 275 |
+
|
preprocess/humanparsing/modules/src/utils/checks.h
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/ATen.h>
|
| 4 |
+
|
| 5 |
+
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
| 6 |
+
#ifndef AT_CHECK
|
| 7 |
+
#define AT_CHECK AT_ASSERT
|
| 8 |
+
#endif
|
| 9 |
+
|
| 10 |
+
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
| 11 |
+
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
| 12 |
+
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
| 13 |
+
|
| 14 |
+
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
| 15 |
+
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
preprocess/humanparsing/modules/src/utils/common.h
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ATen/ATen.h>
|
| 4 |
+
|
| 5 |
+
/*
|
| 6 |
+
* Functions to share code between CPU and GPU
|
| 7 |
+
*/
|
| 8 |
+
|
| 9 |
+
#ifdef __CUDACC__
|
| 10 |
+
// CUDA versions
|
| 11 |
+
|
| 12 |
+
#define HOST_DEVICE __host__ __device__
|
| 13 |
+
#define INLINE_HOST_DEVICE __host__ __device__ inline
|
| 14 |
+
#define FLOOR(x) floor(x)
|
| 15 |
+
|
| 16 |
+
#if __CUDA_ARCH__ >= 600
|
| 17 |
+
// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
|
| 18 |
+
#define ACCUM(x,y) atomicAdd_block(&(x),(y))
|
| 19 |
+
#else
|
| 20 |
+
// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
|
| 21 |
+
// and use the known atomicCAS-based implementation for double
|
| 22 |
+
template<typename data_t>
|
| 23 |
+
__device__ inline data_t atomic_add(data_t *address, data_t val) {
|
| 24 |
+
return atomicAdd(address, val);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
template<>
|
| 28 |
+
__device__ inline double atomic_add(double *address, double val) {
|
| 29 |
+
unsigned long long int* address_as_ull = (unsigned long long int*)address;
|
| 30 |
+
unsigned long long int old = *address_as_ull, assumed;
|
| 31 |
+
do {
|
| 32 |
+
assumed = old;
|
| 33 |
+
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
|
| 34 |
+
} while (assumed != old);
|
| 35 |
+
return __longlong_as_double(old);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
#define ACCUM(x,y) atomic_add(&(x),(y))
|
| 39 |
+
#endif // #if __CUDA_ARCH__ >= 600
|
| 40 |
+
|
| 41 |
+
#else
|
| 42 |
+
// CPU versions
|
| 43 |
+
|
| 44 |
+
#define HOST_DEVICE
|
| 45 |
+
#define INLINE_HOST_DEVICE inline
|
| 46 |
+
#define FLOOR(x) std::floor(x)
|
| 47 |
+
#define ACCUM(x,y) (x) += (y)
|
| 48 |
+
|
| 49 |
+
#endif // #ifdef __CUDACC__
|
preprocess/humanparsing/modules/src/utils/cuda.cuh
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
/*
|
| 4 |
+
* General settings and functions
|
| 5 |
+
*/
|
| 6 |
+
const int WARP_SIZE = 32;
|
| 7 |
+
const int MAX_BLOCK_SIZE = 1024;
|
| 8 |
+
|
| 9 |
+
static int getNumThreads(int nElem) {
|
| 10 |
+
int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
|
| 11 |
+
for (int i = 0; i < 6; ++i) {
|
| 12 |
+
if (nElem <= threadSizes[i]) {
|
| 13 |
+
return threadSizes[i];
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
return MAX_BLOCK_SIZE;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/*
|
| 20 |
+
* Reduction utilities
|
| 21 |
+
*/
|
| 22 |
+
template <typename T>
|
| 23 |
+
__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
|
| 24 |
+
unsigned int mask = 0xffffffff) {
|
| 25 |
+
#if CUDART_VERSION >= 9000
|
| 26 |
+
return __shfl_xor_sync(mask, value, laneMask, width);
|
| 27 |
+
#else
|
| 28 |
+
return __shfl_xor(value, laneMask, width);
|
| 29 |
+
#endif
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
|
| 33 |
+
|
| 34 |
+
template<typename T>
|
| 35 |
+
struct Pair {
|
| 36 |
+
T v1, v2;
|
| 37 |
+
__device__ Pair() {}
|
| 38 |
+
__device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
|
| 39 |
+
__device__ Pair(T v) : v1(v), v2(v) {}
|
| 40 |
+
__device__ Pair(int v) : v1(v), v2(v) {}
|
| 41 |
+
__device__ Pair &operator+=(const Pair<T> &a) {
|
| 42 |
+
v1 += a.v1;
|
| 43 |
+
v2 += a.v2;
|
| 44 |
+
return *this;
|
| 45 |
+
}
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
template<typename T>
|
| 49 |
+
static __device__ __forceinline__ T warpSum(T val) {
|
| 50 |
+
#if __CUDA_ARCH__ >= 300
|
| 51 |
+
for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
|
| 52 |
+
val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
|
| 53 |
+
}
|
| 54 |
+
#else
|
| 55 |
+
__shared__ T values[MAX_BLOCK_SIZE];
|
| 56 |
+
values[threadIdx.x] = val;
|
| 57 |
+
__threadfence_block();
|
| 58 |
+
const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
|
| 59 |
+
for (int i = 1; i < WARP_SIZE; i++) {
|
| 60 |
+
val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
|
| 61 |
+
}
|
| 62 |
+
#endif
|
| 63 |
+
return val;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
template<typename T>
|
| 67 |
+
static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
|
| 68 |
+
value.v1 = warpSum(value.v1);
|
| 69 |
+
value.v2 = warpSum(value.v2);
|
| 70 |
+
return value;
|
| 71 |
+
}
|
preprocess/humanparsing/networks/AugmentCE2P.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : AugmentCE2P.py
|
| 8 |
+
@Time : 8/4/19 3:35 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import functools
|
| 15 |
+
import pdb
|
| 16 |
+
|
| 17 |
+
import torch
|
| 18 |
+
import torch.nn as nn
|
| 19 |
+
from torch.nn import functional as F
|
| 20 |
+
# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
|
| 21 |
+
# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
|
| 22 |
+
from modules import InPlaceABNSync
|
| 23 |
+
import numpy as np
|
| 24 |
+
|
| 25 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
| 26 |
+
|
| 27 |
+
affine_par = True
|
| 28 |
+
|
| 29 |
+
pretrained_settings = {
|
| 30 |
+
'resnet101': {
|
| 31 |
+
'imagenet': {
|
| 32 |
+
'input_space': 'BGR',
|
| 33 |
+
'input_size': [3, 224, 224],
|
| 34 |
+
'input_range': [0, 1],
|
| 35 |
+
'mean': [0.406, 0.456, 0.485],
|
| 36 |
+
'std': [0.225, 0.224, 0.229],
|
| 37 |
+
'num_classes': 1000
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
| 44 |
+
"3x3 convolution with padding"
|
| 45 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
| 46 |
+
padding=1, bias=False)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class Bottleneck(nn.Module):
|
| 50 |
+
expansion = 4
|
| 51 |
+
|
| 52 |
+
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
|
| 53 |
+
super(Bottleneck, self).__init__()
|
| 54 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 55 |
+
self.bn1 = BatchNorm2d(planes)
|
| 56 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
| 57 |
+
padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
|
| 58 |
+
self.bn2 = BatchNorm2d(planes)
|
| 59 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
| 60 |
+
self.bn3 = BatchNorm2d(planes * 4)
|
| 61 |
+
self.relu = nn.ReLU(inplace=False)
|
| 62 |
+
self.relu_inplace = nn.ReLU(inplace=True)
|
| 63 |
+
self.downsample = downsample
|
| 64 |
+
self.dilation = dilation
|
| 65 |
+
self.stride = stride
|
| 66 |
+
|
| 67 |
+
def forward(self, x):
|
| 68 |
+
residual = x
|
| 69 |
+
|
| 70 |
+
out = self.conv1(x)
|
| 71 |
+
out = self.bn1(out)
|
| 72 |
+
out = self.relu(out)
|
| 73 |
+
|
| 74 |
+
out = self.conv2(out)
|
| 75 |
+
out = self.bn2(out)
|
| 76 |
+
out = self.relu(out)
|
| 77 |
+
|
| 78 |
+
out = self.conv3(out)
|
| 79 |
+
out = self.bn3(out)
|
| 80 |
+
|
| 81 |
+
if self.downsample is not None:
|
| 82 |
+
residual = self.downsample(x)
|
| 83 |
+
|
| 84 |
+
out = out + residual
|
| 85 |
+
out = self.relu_inplace(out)
|
| 86 |
+
|
| 87 |
+
return out
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class CostomAdaptiveAvgPool2D(nn.Module):
|
| 91 |
+
|
| 92 |
+
def __init__(self, output_size):
|
| 93 |
+
|
| 94 |
+
super(CostomAdaptiveAvgPool2D, self).__init__()
|
| 95 |
+
|
| 96 |
+
self.output_size = output_size
|
| 97 |
+
|
| 98 |
+
def forward(self, x):
|
| 99 |
+
|
| 100 |
+
H_in, W_in = x.shape[-2:]
|
| 101 |
+
H_out, W_out = self.output_size
|
| 102 |
+
|
| 103 |
+
out_i = []
|
| 104 |
+
for i in range(H_out):
|
| 105 |
+
out_j = []
|
| 106 |
+
for j in range(W_out):
|
| 107 |
+
hs = int(np.floor(i * H_in / H_out))
|
| 108 |
+
he = int(np.ceil((i + 1) * H_in / H_out))
|
| 109 |
+
|
| 110 |
+
ws = int(np.floor(j * W_in / W_out))
|
| 111 |
+
we = int(np.ceil((j + 1) * W_in / W_out))
|
| 112 |
+
|
| 113 |
+
# print(hs, he, ws, we)
|
| 114 |
+
kernel_size = [he - hs, we - ws]
|
| 115 |
+
|
| 116 |
+
out = F.avg_pool2d(x[:, :, hs:he, ws:we], kernel_size)
|
| 117 |
+
out_j.append(out)
|
| 118 |
+
|
| 119 |
+
out_j = torch.concat(out_j, -1)
|
| 120 |
+
out_i.append(out_j)
|
| 121 |
+
|
| 122 |
+
out_i = torch.concat(out_i, -2)
|
| 123 |
+
return out_i
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
class PSPModule(nn.Module):
|
| 127 |
+
"""
|
| 128 |
+
Reference:
|
| 129 |
+
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
| 133 |
+
super(PSPModule, self).__init__()
|
| 134 |
+
|
| 135 |
+
self.stages = []
|
| 136 |
+
tmp = []
|
| 137 |
+
for size in sizes:
|
| 138 |
+
if size == 3 or size == 6:
|
| 139 |
+
tmp.append(self._make_stage_custom(features, out_features, size))
|
| 140 |
+
else:
|
| 141 |
+
tmp.append(self._make_stage(features, out_features, size))
|
| 142 |
+
self.stages = nn.ModuleList(tmp)
|
| 143 |
+
# self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
| 144 |
+
self.bottleneck = nn.Sequential(
|
| 145 |
+
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
| 146 |
+
bias=False),
|
| 147 |
+
InPlaceABNSync(out_features),
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
def _make_stage(self, features, out_features, size):
|
| 151 |
+
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
| 152 |
+
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
| 153 |
+
bn = InPlaceABNSync(out_features)
|
| 154 |
+
return nn.Sequential(prior, conv, bn)
|
| 155 |
+
|
| 156 |
+
def _make_stage_custom(self, features, out_features, size):
|
| 157 |
+
prior = CostomAdaptiveAvgPool2D(output_size=(size, size))
|
| 158 |
+
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
| 159 |
+
bn = InPlaceABNSync(out_features)
|
| 160 |
+
return nn.Sequential(prior, conv, bn)
|
| 161 |
+
|
| 162 |
+
def forward(self, feats):
|
| 163 |
+
h, w = feats.size(2), feats.size(3)
|
| 164 |
+
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
| 165 |
+
self.stages] + [feats]
|
| 166 |
+
bottle = self.bottleneck(torch.cat(priors, 1))
|
| 167 |
+
return bottle
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class ASPPModule(nn.Module):
|
| 171 |
+
"""
|
| 172 |
+
Reference:
|
| 173 |
+
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
|
| 177 |
+
super(ASPPModule, self).__init__()
|
| 178 |
+
|
| 179 |
+
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
| 180 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
| 181 |
+
bias=False),
|
| 182 |
+
InPlaceABNSync(inner_features))
|
| 183 |
+
self.conv2 = nn.Sequential(
|
| 184 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 185 |
+
InPlaceABNSync(inner_features))
|
| 186 |
+
self.conv3 = nn.Sequential(
|
| 187 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
| 188 |
+
InPlaceABNSync(inner_features))
|
| 189 |
+
self.conv4 = nn.Sequential(
|
| 190 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
| 191 |
+
InPlaceABNSync(inner_features))
|
| 192 |
+
self.conv5 = nn.Sequential(
|
| 193 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
| 194 |
+
InPlaceABNSync(inner_features))
|
| 195 |
+
|
| 196 |
+
self.bottleneck = nn.Sequential(
|
| 197 |
+
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 198 |
+
InPlaceABNSync(out_features),
|
| 199 |
+
nn.Dropout2d(0.1)
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
def forward(self, x):
|
| 203 |
+
_, _, h, w = x.size()
|
| 204 |
+
|
| 205 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 206 |
+
|
| 207 |
+
feat2 = self.conv2(x)
|
| 208 |
+
feat3 = self.conv3(x)
|
| 209 |
+
feat4 = self.conv4(x)
|
| 210 |
+
feat5 = self.conv5(x)
|
| 211 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
| 212 |
+
|
| 213 |
+
bottle = self.bottleneck(out)
|
| 214 |
+
return bottle
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
class Edge_Module(nn.Module):
|
| 218 |
+
"""
|
| 219 |
+
Edge Learning Branch
|
| 220 |
+
"""
|
| 221 |
+
|
| 222 |
+
def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
|
| 223 |
+
super(Edge_Module, self).__init__()
|
| 224 |
+
|
| 225 |
+
self.conv1 = nn.Sequential(
|
| 226 |
+
nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 227 |
+
InPlaceABNSync(mid_fea)
|
| 228 |
+
)
|
| 229 |
+
self.conv2 = nn.Sequential(
|
| 230 |
+
nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 231 |
+
InPlaceABNSync(mid_fea)
|
| 232 |
+
)
|
| 233 |
+
self.conv3 = nn.Sequential(
|
| 234 |
+
nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 235 |
+
InPlaceABNSync(mid_fea)
|
| 236 |
+
)
|
| 237 |
+
self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
|
| 238 |
+
self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
|
| 239 |
+
|
| 240 |
+
def forward(self, x1, x2, x3):
|
| 241 |
+
_, _, h, w = x1.size()
|
| 242 |
+
|
| 243 |
+
edge1_fea = self.conv1(x1)
|
| 244 |
+
edge1 = self.conv4(edge1_fea)
|
| 245 |
+
edge2_fea = self.conv2(x2)
|
| 246 |
+
edge2 = self.conv4(edge2_fea)
|
| 247 |
+
edge3_fea = self.conv3(x3)
|
| 248 |
+
edge3 = self.conv4(edge3_fea)
|
| 249 |
+
|
| 250 |
+
edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
|
| 251 |
+
edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
|
| 252 |
+
edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
|
| 253 |
+
edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
|
| 254 |
+
|
| 255 |
+
edge = torch.cat([edge1, edge2, edge3], dim=1)
|
| 256 |
+
edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
|
| 257 |
+
edge = self.conv5(edge)
|
| 258 |
+
|
| 259 |
+
return edge, edge_fea
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
class Decoder_Module(nn.Module):
|
| 263 |
+
"""
|
| 264 |
+
Parsing Branch Decoder Module.
|
| 265 |
+
"""
|
| 266 |
+
|
| 267 |
+
def __init__(self, num_classes):
|
| 268 |
+
super(Decoder_Module, self).__init__()
|
| 269 |
+
self.conv1 = nn.Sequential(
|
| 270 |
+
nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 271 |
+
InPlaceABNSync(256)
|
| 272 |
+
)
|
| 273 |
+
self.conv2 = nn.Sequential(
|
| 274 |
+
nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
|
| 275 |
+
InPlaceABNSync(48)
|
| 276 |
+
)
|
| 277 |
+
self.conv3 = nn.Sequential(
|
| 278 |
+
nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 279 |
+
InPlaceABNSync(256),
|
| 280 |
+
nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 281 |
+
InPlaceABNSync(256)
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
| 285 |
+
|
| 286 |
+
def forward(self, xt, xl):
|
| 287 |
+
_, _, h, w = xl.size()
|
| 288 |
+
xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
|
| 289 |
+
xl = self.conv2(xl)
|
| 290 |
+
x = torch.cat([xt, xl], dim=1)
|
| 291 |
+
x = self.conv3(x)
|
| 292 |
+
seg = self.conv4(x)
|
| 293 |
+
return seg, x
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
class ResNet(nn.Module):
|
| 297 |
+
def __init__(self, block, layers, num_classes):
|
| 298 |
+
self.inplanes = 128
|
| 299 |
+
super(ResNet, self).__init__()
|
| 300 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
| 301 |
+
self.bn1 = BatchNorm2d(64)
|
| 302 |
+
self.relu1 = nn.ReLU(inplace=False)
|
| 303 |
+
self.conv2 = conv3x3(64, 64)
|
| 304 |
+
self.bn2 = BatchNorm2d(64)
|
| 305 |
+
self.relu2 = nn.ReLU(inplace=False)
|
| 306 |
+
self.conv3 = conv3x3(64, 128)
|
| 307 |
+
self.bn3 = BatchNorm2d(128)
|
| 308 |
+
self.relu3 = nn.ReLU(inplace=False)
|
| 309 |
+
|
| 310 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
| 311 |
+
|
| 312 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
| 313 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
| 314 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
| 315 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
|
| 316 |
+
|
| 317 |
+
self.context_encoding = PSPModule(2048, 512)
|
| 318 |
+
|
| 319 |
+
self.edge = Edge_Module()
|
| 320 |
+
self.decoder = Decoder_Module(num_classes)
|
| 321 |
+
|
| 322 |
+
self.fushion = nn.Sequential(
|
| 323 |
+
nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 324 |
+
InPlaceABNSync(256),
|
| 325 |
+
nn.Dropout2d(0.1),
|
| 326 |
+
nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
|
| 330 |
+
downsample = None
|
| 331 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 332 |
+
downsample = nn.Sequential(
|
| 333 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
| 334 |
+
kernel_size=1, stride=stride, bias=False),
|
| 335 |
+
BatchNorm2d(planes * block.expansion, affine=affine_par))
|
| 336 |
+
|
| 337 |
+
layers = []
|
| 338 |
+
generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
|
| 339 |
+
layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
|
| 340 |
+
multi_grid=generate_multi_grid(0, multi_grid)))
|
| 341 |
+
self.inplanes = planes * block.expansion
|
| 342 |
+
for i in range(1, blocks):
|
| 343 |
+
layers.append(
|
| 344 |
+
block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
|
| 345 |
+
|
| 346 |
+
return nn.Sequential(*layers)
|
| 347 |
+
|
| 348 |
+
def forward(self, x):
|
| 349 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
| 350 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
| 351 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
| 352 |
+
x = self.maxpool(x)
|
| 353 |
+
x2 = self.layer1(x)
|
| 354 |
+
x3 = self.layer2(x2)
|
| 355 |
+
x4 = self.layer3(x3)
|
| 356 |
+
x5 = self.layer4(x4)
|
| 357 |
+
x = self.context_encoding(x5)
|
| 358 |
+
parsing_result, parsing_fea = self.decoder(x, x2)
|
| 359 |
+
# Edge Branch
|
| 360 |
+
edge_result, edge_fea = self.edge(x2, x3, x4)
|
| 361 |
+
# Fusion Branch
|
| 362 |
+
x = torch.cat([parsing_fea, edge_fea], dim=1)
|
| 363 |
+
fusion_result = self.fushion(x)
|
| 364 |
+
return [[parsing_result, fusion_result], edge_result]
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
|
| 368 |
+
model.input_space = settings['input_space']
|
| 369 |
+
model.input_size = settings['input_size']
|
| 370 |
+
model.input_range = settings['input_range']
|
| 371 |
+
model.mean = settings['mean']
|
| 372 |
+
model.std = settings['std']
|
| 373 |
+
|
| 374 |
+
if pretrained is not None:
|
| 375 |
+
saved_state_dict = torch.load(pretrained)
|
| 376 |
+
new_params = model.state_dict().copy()
|
| 377 |
+
for i in saved_state_dict:
|
| 378 |
+
i_parts = i.split('.')
|
| 379 |
+
if not i_parts[0] == 'fc':
|
| 380 |
+
new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
|
| 381 |
+
model.load_state_dict(new_params)
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
|
| 385 |
+
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
|
| 386 |
+
settings = pretrained_settings['resnet101']['imagenet']
|
| 387 |
+
initialize_pretrained_model(model, settings, pretrained)
|
| 388 |
+
return model
|
preprocess/humanparsing/networks/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
from networks.AugmentCE2P import resnet101
|
| 3 |
+
|
| 4 |
+
__factory = {
|
| 5 |
+
'resnet101': resnet101,
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def init_model(name, *args, **kwargs):
|
| 10 |
+
if name not in __factory.keys():
|
| 11 |
+
raise KeyError("Unknown model arch: {}".format(name))
|
| 12 |
+
return __factory[name](*args, **kwargs)
|
preprocess/humanparsing/networks/backbone/mobilenetv2.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : mobilenetv2.py
|
| 8 |
+
@Time : 8/4/19 3:35 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
import math
|
| 16 |
+
import functools
|
| 17 |
+
|
| 18 |
+
from modules import InPlaceABN, InPlaceABNSync
|
| 19 |
+
|
| 20 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
| 21 |
+
|
| 22 |
+
__all__ = ['mobilenetv2']
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def conv_bn(inp, oup, stride):
|
| 26 |
+
return nn.Sequential(
|
| 27 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
| 28 |
+
BatchNorm2d(oup),
|
| 29 |
+
nn.ReLU6(inplace=True)
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def conv_1x1_bn(inp, oup):
|
| 34 |
+
return nn.Sequential(
|
| 35 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
| 36 |
+
BatchNorm2d(oup),
|
| 37 |
+
nn.ReLU6(inplace=True)
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class InvertedResidual(nn.Module):
|
| 42 |
+
def __init__(self, inp, oup, stride, expand_ratio):
|
| 43 |
+
super(InvertedResidual, self).__init__()
|
| 44 |
+
self.stride = stride
|
| 45 |
+
assert stride in [1, 2]
|
| 46 |
+
|
| 47 |
+
hidden_dim = round(inp * expand_ratio)
|
| 48 |
+
self.use_res_connect = self.stride == 1 and inp == oup
|
| 49 |
+
|
| 50 |
+
if expand_ratio == 1:
|
| 51 |
+
self.conv = nn.Sequential(
|
| 52 |
+
# dw
|
| 53 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
| 54 |
+
BatchNorm2d(hidden_dim),
|
| 55 |
+
nn.ReLU6(inplace=True),
|
| 56 |
+
# pw-linear
|
| 57 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
| 58 |
+
BatchNorm2d(oup),
|
| 59 |
+
)
|
| 60 |
+
else:
|
| 61 |
+
self.conv = nn.Sequential(
|
| 62 |
+
# pw
|
| 63 |
+
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
| 64 |
+
BatchNorm2d(hidden_dim),
|
| 65 |
+
nn.ReLU6(inplace=True),
|
| 66 |
+
# dw
|
| 67 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
| 68 |
+
BatchNorm2d(hidden_dim),
|
| 69 |
+
nn.ReLU6(inplace=True),
|
| 70 |
+
# pw-linear
|
| 71 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
| 72 |
+
BatchNorm2d(oup),
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def forward(self, x):
|
| 76 |
+
if self.use_res_connect:
|
| 77 |
+
return x + self.conv(x)
|
| 78 |
+
else:
|
| 79 |
+
return self.conv(x)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class MobileNetV2(nn.Module):
|
| 83 |
+
def __init__(self, n_class=1000, input_size=224, width_mult=1.):
|
| 84 |
+
super(MobileNetV2, self).__init__()
|
| 85 |
+
block = InvertedResidual
|
| 86 |
+
input_channel = 32
|
| 87 |
+
last_channel = 1280
|
| 88 |
+
interverted_residual_setting = [
|
| 89 |
+
# t, c, n, s
|
| 90 |
+
[1, 16, 1, 1],
|
| 91 |
+
[6, 24, 2, 2], # layer 2
|
| 92 |
+
[6, 32, 3, 2], # layer 3
|
| 93 |
+
[6, 64, 4, 2],
|
| 94 |
+
[6, 96, 3, 1], # layer 4
|
| 95 |
+
[6, 160, 3, 2],
|
| 96 |
+
[6, 320, 1, 1], # layer 5
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
# building first layer
|
| 100 |
+
assert input_size % 32 == 0
|
| 101 |
+
input_channel = int(input_channel * width_mult)
|
| 102 |
+
self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
|
| 103 |
+
self.features = [conv_bn(3, input_channel, 2)]
|
| 104 |
+
# building inverted residual blocks
|
| 105 |
+
for t, c, n, s in interverted_residual_setting:
|
| 106 |
+
output_channel = int(c * width_mult)
|
| 107 |
+
for i in range(n):
|
| 108 |
+
if i == 0:
|
| 109 |
+
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
|
| 110 |
+
else:
|
| 111 |
+
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
|
| 112 |
+
input_channel = output_channel
|
| 113 |
+
# building last several layers
|
| 114 |
+
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
| 115 |
+
# make it nn.Sequential
|
| 116 |
+
self.features = nn.Sequential(*self.features)
|
| 117 |
+
|
| 118 |
+
# building classifier
|
| 119 |
+
self.classifier = nn.Sequential(
|
| 120 |
+
nn.Dropout(0.2),
|
| 121 |
+
nn.Linear(self.last_channel, n_class),
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
self._initialize_weights()
|
| 125 |
+
|
| 126 |
+
def forward(self, x):
|
| 127 |
+
x = self.features(x)
|
| 128 |
+
x = x.mean(3).mean(2)
|
| 129 |
+
x = self.classifier(x)
|
| 130 |
+
return x
|
| 131 |
+
|
| 132 |
+
def _initialize_weights(self):
|
| 133 |
+
for m in self.modules():
|
| 134 |
+
if isinstance(m, nn.Conv2d):
|
| 135 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
| 136 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
| 137 |
+
if m.bias is not None:
|
| 138 |
+
m.bias.data.zero_()
|
| 139 |
+
elif isinstance(m, BatchNorm2d):
|
| 140 |
+
m.weight.data.fill_(1)
|
| 141 |
+
m.bias.data.zero_()
|
| 142 |
+
elif isinstance(m, nn.Linear):
|
| 143 |
+
n = m.weight.size(1)
|
| 144 |
+
m.weight.data.normal_(0, 0.01)
|
| 145 |
+
m.bias.data.zero_()
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def mobilenetv2(pretrained=False, **kwargs):
|
| 149 |
+
"""Constructs a MobileNet_V2 model.
|
| 150 |
+
Args:
|
| 151 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 152 |
+
"""
|
| 153 |
+
model = MobileNetV2(n_class=1000, **kwargs)
|
| 154 |
+
if pretrained:
|
| 155 |
+
model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
|
| 156 |
+
return model
|
preprocess/humanparsing/networks/backbone/resnet.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : resnet.py
|
| 8 |
+
@Time : 8/4/19 3:35 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import functools
|
| 15 |
+
import torch.nn as nn
|
| 16 |
+
import math
|
| 17 |
+
from torch.utils.model_zoo import load_url
|
| 18 |
+
|
| 19 |
+
from modules import InPlaceABNSync
|
| 20 |
+
|
| 21 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
| 22 |
+
|
| 23 |
+
__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon!
|
| 24 |
+
|
| 25 |
+
model_urls = {
|
| 26 |
+
'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
|
| 27 |
+
'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
|
| 28 |
+
'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
| 33 |
+
"3x3 convolution with padding"
|
| 34 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
| 35 |
+
padding=1, bias=False)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class BasicBlock(nn.Module):
|
| 39 |
+
expansion = 1
|
| 40 |
+
|
| 41 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 42 |
+
super(BasicBlock, self).__init__()
|
| 43 |
+
self.conv1 = conv3x3(inplanes, planes, stride)
|
| 44 |
+
self.bn1 = BatchNorm2d(planes)
|
| 45 |
+
self.relu = nn.ReLU(inplace=True)
|
| 46 |
+
self.conv2 = conv3x3(planes, planes)
|
| 47 |
+
self.bn2 = BatchNorm2d(planes)
|
| 48 |
+
self.downsample = downsample
|
| 49 |
+
self.stride = stride
|
| 50 |
+
|
| 51 |
+
def forward(self, x):
|
| 52 |
+
residual = x
|
| 53 |
+
|
| 54 |
+
out = self.conv1(x)
|
| 55 |
+
out = self.bn1(out)
|
| 56 |
+
out = self.relu(out)
|
| 57 |
+
|
| 58 |
+
out = self.conv2(out)
|
| 59 |
+
out = self.bn2(out)
|
| 60 |
+
|
| 61 |
+
if self.downsample is not None:
|
| 62 |
+
residual = self.downsample(x)
|
| 63 |
+
|
| 64 |
+
out += residual
|
| 65 |
+
out = self.relu(out)
|
| 66 |
+
|
| 67 |
+
return out
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class Bottleneck(nn.Module):
|
| 71 |
+
expansion = 4
|
| 72 |
+
|
| 73 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 74 |
+
super(Bottleneck, self).__init__()
|
| 75 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 76 |
+
self.bn1 = BatchNorm2d(planes)
|
| 77 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
| 78 |
+
padding=1, bias=False)
|
| 79 |
+
self.bn2 = BatchNorm2d(planes)
|
| 80 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
| 81 |
+
self.bn3 = BatchNorm2d(planes * 4)
|
| 82 |
+
self.relu = nn.ReLU(inplace=True)
|
| 83 |
+
self.downsample = downsample
|
| 84 |
+
self.stride = stride
|
| 85 |
+
|
| 86 |
+
def forward(self, x):
|
| 87 |
+
residual = x
|
| 88 |
+
|
| 89 |
+
out = self.conv1(x)
|
| 90 |
+
out = self.bn1(out)
|
| 91 |
+
out = self.relu(out)
|
| 92 |
+
|
| 93 |
+
out = self.conv2(out)
|
| 94 |
+
out = self.bn2(out)
|
| 95 |
+
out = self.relu(out)
|
| 96 |
+
|
| 97 |
+
out = self.conv3(out)
|
| 98 |
+
out = self.bn3(out)
|
| 99 |
+
|
| 100 |
+
if self.downsample is not None:
|
| 101 |
+
residual = self.downsample(x)
|
| 102 |
+
|
| 103 |
+
out += residual
|
| 104 |
+
out = self.relu(out)
|
| 105 |
+
|
| 106 |
+
return out
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class ResNet(nn.Module):
|
| 110 |
+
|
| 111 |
+
def __init__(self, block, layers, num_classes=1000):
|
| 112 |
+
self.inplanes = 128
|
| 113 |
+
super(ResNet, self).__init__()
|
| 114 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
| 115 |
+
self.bn1 = BatchNorm2d(64)
|
| 116 |
+
self.relu1 = nn.ReLU(inplace=True)
|
| 117 |
+
self.conv2 = conv3x3(64, 64)
|
| 118 |
+
self.bn2 = BatchNorm2d(64)
|
| 119 |
+
self.relu2 = nn.ReLU(inplace=True)
|
| 120 |
+
self.conv3 = conv3x3(64, 128)
|
| 121 |
+
self.bn3 = BatchNorm2d(128)
|
| 122 |
+
self.relu3 = nn.ReLU(inplace=True)
|
| 123 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
| 124 |
+
|
| 125 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
| 126 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
| 127 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
| 128 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
| 129 |
+
self.avgpool = nn.AvgPool2d(7, stride=1)
|
| 130 |
+
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
| 131 |
+
|
| 132 |
+
for m in self.modules():
|
| 133 |
+
if isinstance(m, nn.Conv2d):
|
| 134 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
| 135 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
| 136 |
+
elif isinstance(m, BatchNorm2d):
|
| 137 |
+
m.weight.data.fill_(1)
|
| 138 |
+
m.bias.data.zero_()
|
| 139 |
+
|
| 140 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
| 141 |
+
downsample = None
|
| 142 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 143 |
+
downsample = nn.Sequential(
|
| 144 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
| 145 |
+
kernel_size=1, stride=stride, bias=False),
|
| 146 |
+
BatchNorm2d(planes * block.expansion),
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
layers = []
|
| 150 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
| 151 |
+
self.inplanes = planes * block.expansion
|
| 152 |
+
for i in range(1, blocks):
|
| 153 |
+
layers.append(block(self.inplanes, planes))
|
| 154 |
+
|
| 155 |
+
return nn.Sequential(*layers)
|
| 156 |
+
|
| 157 |
+
def forward(self, x):
|
| 158 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
| 159 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
| 160 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
| 161 |
+
x = self.maxpool(x)
|
| 162 |
+
|
| 163 |
+
x = self.layer1(x)
|
| 164 |
+
x = self.layer2(x)
|
| 165 |
+
x = self.layer3(x)
|
| 166 |
+
x = self.layer4(x)
|
| 167 |
+
|
| 168 |
+
x = self.avgpool(x)
|
| 169 |
+
x = x.view(x.size(0), -1)
|
| 170 |
+
x = self.fc(x)
|
| 171 |
+
|
| 172 |
+
return x
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def resnet18(pretrained=False, **kwargs):
|
| 176 |
+
"""Constructs a ResNet-18 model.
|
| 177 |
+
Args:
|
| 178 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 179 |
+
"""
|
| 180 |
+
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
| 181 |
+
if pretrained:
|
| 182 |
+
model.load_state_dict(load_url(model_urls['resnet18']))
|
| 183 |
+
return model
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def resnet50(pretrained=False, **kwargs):
|
| 187 |
+
"""Constructs a ResNet-50 model.
|
| 188 |
+
Args:
|
| 189 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 190 |
+
"""
|
| 191 |
+
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
| 192 |
+
if pretrained:
|
| 193 |
+
model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
|
| 194 |
+
return model
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def resnet101(pretrained=False, **kwargs):
|
| 198 |
+
"""Constructs a ResNet-101 model.
|
| 199 |
+
Args:
|
| 200 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 201 |
+
"""
|
| 202 |
+
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
| 203 |
+
if pretrained:
|
| 204 |
+
model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
|
| 205 |
+
return model
|
preprocess/humanparsing/networks/backbone/resnext.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : resnext.py.py
|
| 8 |
+
@Time : 8/11/19 8:58 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
import functools
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
import math
|
| 16 |
+
from torch.utils.model_zoo import load_url
|
| 17 |
+
|
| 18 |
+
from modules import InPlaceABNSync
|
| 19 |
+
|
| 20 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
| 21 |
+
|
| 22 |
+
__all__ = ['ResNeXt', 'resnext101'] # support resnext 101
|
| 23 |
+
|
| 24 |
+
model_urls = {
|
| 25 |
+
'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
|
| 26 |
+
'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
| 31 |
+
"3x3 convolution with padding"
|
| 32 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
| 33 |
+
padding=1, bias=False)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class GroupBottleneck(nn.Module):
|
| 37 |
+
expansion = 2
|
| 38 |
+
|
| 39 |
+
def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
|
| 40 |
+
super(GroupBottleneck, self).__init__()
|
| 41 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
| 42 |
+
self.bn1 = BatchNorm2d(planes)
|
| 43 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
| 44 |
+
padding=1, groups=groups, bias=False)
|
| 45 |
+
self.bn2 = BatchNorm2d(planes)
|
| 46 |
+
self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
|
| 47 |
+
self.bn3 = BatchNorm2d(planes * 2)
|
| 48 |
+
self.relu = nn.ReLU(inplace=True)
|
| 49 |
+
self.downsample = downsample
|
| 50 |
+
self.stride = stride
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
residual = x
|
| 54 |
+
|
| 55 |
+
out = self.conv1(x)
|
| 56 |
+
out = self.bn1(out)
|
| 57 |
+
out = self.relu(out)
|
| 58 |
+
|
| 59 |
+
out = self.conv2(out)
|
| 60 |
+
out = self.bn2(out)
|
| 61 |
+
out = self.relu(out)
|
| 62 |
+
|
| 63 |
+
out = self.conv3(out)
|
| 64 |
+
out = self.bn3(out)
|
| 65 |
+
|
| 66 |
+
if self.downsample is not None:
|
| 67 |
+
residual = self.downsample(x)
|
| 68 |
+
|
| 69 |
+
out += residual
|
| 70 |
+
out = self.relu(out)
|
| 71 |
+
|
| 72 |
+
return out
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class ResNeXt(nn.Module):
|
| 76 |
+
|
| 77 |
+
def __init__(self, block, layers, groups=32, num_classes=1000):
|
| 78 |
+
self.inplanes = 128
|
| 79 |
+
super(ResNeXt, self).__init__()
|
| 80 |
+
self.conv1 = conv3x3(3, 64, stride=2)
|
| 81 |
+
self.bn1 = BatchNorm2d(64)
|
| 82 |
+
self.relu1 = nn.ReLU(inplace=True)
|
| 83 |
+
self.conv2 = conv3x3(64, 64)
|
| 84 |
+
self.bn2 = BatchNorm2d(64)
|
| 85 |
+
self.relu2 = nn.ReLU(inplace=True)
|
| 86 |
+
self.conv3 = conv3x3(64, 128)
|
| 87 |
+
self.bn3 = BatchNorm2d(128)
|
| 88 |
+
self.relu3 = nn.ReLU(inplace=True)
|
| 89 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
| 90 |
+
|
| 91 |
+
self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
|
| 92 |
+
self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
|
| 93 |
+
self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
|
| 94 |
+
self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
|
| 95 |
+
self.avgpool = nn.AvgPool2d(7, stride=1)
|
| 96 |
+
self.fc = nn.Linear(1024 * block.expansion, num_classes)
|
| 97 |
+
|
| 98 |
+
for m in self.modules():
|
| 99 |
+
if isinstance(m, nn.Conv2d):
|
| 100 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
|
| 101 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
| 102 |
+
elif isinstance(m, BatchNorm2d):
|
| 103 |
+
m.weight.data.fill_(1)
|
| 104 |
+
m.bias.data.zero_()
|
| 105 |
+
|
| 106 |
+
def _make_layer(self, block, planes, blocks, stride=1, groups=1):
|
| 107 |
+
downsample = None
|
| 108 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 109 |
+
downsample = nn.Sequential(
|
| 110 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
| 111 |
+
kernel_size=1, stride=stride, bias=False),
|
| 112 |
+
BatchNorm2d(planes * block.expansion),
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
layers = []
|
| 116 |
+
layers.append(block(self.inplanes, planes, stride, groups, downsample))
|
| 117 |
+
self.inplanes = planes * block.expansion
|
| 118 |
+
for i in range(1, blocks):
|
| 119 |
+
layers.append(block(self.inplanes, planes, groups=groups))
|
| 120 |
+
|
| 121 |
+
return nn.Sequential(*layers)
|
| 122 |
+
|
| 123 |
+
def forward(self, x):
|
| 124 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
| 125 |
+
x = self.relu2(self.bn2(self.conv2(x)))
|
| 126 |
+
x = self.relu3(self.bn3(self.conv3(x)))
|
| 127 |
+
x = self.maxpool(x)
|
| 128 |
+
|
| 129 |
+
x = self.layer1(x)
|
| 130 |
+
x = self.layer2(x)
|
| 131 |
+
x = self.layer3(x)
|
| 132 |
+
x = self.layer4(x)
|
| 133 |
+
|
| 134 |
+
x = self.avgpool(x)
|
| 135 |
+
x = x.view(x.size(0), -1)
|
| 136 |
+
x = self.fc(x)
|
| 137 |
+
|
| 138 |
+
return x
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def resnext101(pretrained=False, **kwargs):
|
| 142 |
+
"""Constructs a ResNet-101 model.
|
| 143 |
+
Args:
|
| 144 |
+
pretrained (bool): If True, returns a model pre-trained on Places
|
| 145 |
+
"""
|
| 146 |
+
model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
|
| 147 |
+
if pretrained:
|
| 148 |
+
model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
|
| 149 |
+
return model
|
preprocess/humanparsing/networks/context_encoding/aspp.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : aspp.py
|
| 8 |
+
@Time : 8/4/19 3:36 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import torch
|
| 15 |
+
import torch.nn as nn
|
| 16 |
+
from torch.nn import functional as F
|
| 17 |
+
|
| 18 |
+
from modules import InPlaceABNSync
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ASPPModule(nn.Module):
|
| 22 |
+
"""
|
| 23 |
+
Reference:
|
| 24 |
+
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
| 25 |
+
"""
|
| 26 |
+
def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
|
| 27 |
+
super(ASPPModule, self).__init__()
|
| 28 |
+
|
| 29 |
+
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
| 30 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
| 31 |
+
bias=False),
|
| 32 |
+
InPlaceABNSync(inner_features))
|
| 33 |
+
self.conv2 = nn.Sequential(
|
| 34 |
+
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 35 |
+
InPlaceABNSync(inner_features))
|
| 36 |
+
self.conv3 = nn.Sequential(
|
| 37 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
| 38 |
+
InPlaceABNSync(inner_features))
|
| 39 |
+
self.conv4 = nn.Sequential(
|
| 40 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
| 41 |
+
InPlaceABNSync(inner_features))
|
| 42 |
+
self.conv5 = nn.Sequential(
|
| 43 |
+
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
| 44 |
+
InPlaceABNSync(inner_features))
|
| 45 |
+
|
| 46 |
+
self.bottleneck = nn.Sequential(
|
| 47 |
+
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 48 |
+
InPlaceABNSync(out_features),
|
| 49 |
+
nn.Dropout2d(0.1)
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
_, _, h, w = x.size()
|
| 54 |
+
|
| 55 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
| 56 |
+
|
| 57 |
+
feat2 = self.conv2(x)
|
| 58 |
+
feat3 = self.conv3(x)
|
| 59 |
+
feat4 = self.conv4(x)
|
| 60 |
+
feat5 = self.conv5(x)
|
| 61 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
| 62 |
+
|
| 63 |
+
bottle = self.bottleneck(out)
|
| 64 |
+
return bottle
|
preprocess/humanparsing/networks/context_encoding/ocnet.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : ocnet.py
|
| 8 |
+
@Time : 8/4/19 3:36 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import functools
|
| 15 |
+
|
| 16 |
+
import torch
|
| 17 |
+
import torch.nn as nn
|
| 18 |
+
from torch.autograd import Variable
|
| 19 |
+
from torch.nn import functional as F
|
| 20 |
+
|
| 21 |
+
from modules import InPlaceABNSync
|
| 22 |
+
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class _SelfAttentionBlock(nn.Module):
|
| 26 |
+
'''
|
| 27 |
+
The basic implementation for self-attention block/non-local block
|
| 28 |
+
Input:
|
| 29 |
+
N X C X H X W
|
| 30 |
+
Parameters:
|
| 31 |
+
in_channels : the dimension of the input feature map
|
| 32 |
+
key_channels : the dimension after the key/query transform
|
| 33 |
+
value_channels : the dimension after the value transform
|
| 34 |
+
scale : choose the scale to downsample the input feature maps (save memory cost)
|
| 35 |
+
Return:
|
| 36 |
+
N X C X H X W
|
| 37 |
+
position-aware context features.(w/o concate or add with the input)
|
| 38 |
+
'''
|
| 39 |
+
|
| 40 |
+
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
| 41 |
+
super(_SelfAttentionBlock, self).__init__()
|
| 42 |
+
self.scale = scale
|
| 43 |
+
self.in_channels = in_channels
|
| 44 |
+
self.out_channels = out_channels
|
| 45 |
+
self.key_channels = key_channels
|
| 46 |
+
self.value_channels = value_channels
|
| 47 |
+
if out_channels == None:
|
| 48 |
+
self.out_channels = in_channels
|
| 49 |
+
self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
|
| 50 |
+
self.f_key = nn.Sequential(
|
| 51 |
+
nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
|
| 52 |
+
kernel_size=1, stride=1, padding=0),
|
| 53 |
+
InPlaceABNSync(self.key_channels),
|
| 54 |
+
)
|
| 55 |
+
self.f_query = self.f_key
|
| 56 |
+
self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
|
| 57 |
+
kernel_size=1, stride=1, padding=0)
|
| 58 |
+
self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
|
| 59 |
+
kernel_size=1, stride=1, padding=0)
|
| 60 |
+
nn.init.constant(self.W.weight, 0)
|
| 61 |
+
nn.init.constant(self.W.bias, 0)
|
| 62 |
+
|
| 63 |
+
def forward(self, x):
|
| 64 |
+
batch_size, h, w = x.size(0), x.size(2), x.size(3)
|
| 65 |
+
if self.scale > 1:
|
| 66 |
+
x = self.pool(x)
|
| 67 |
+
|
| 68 |
+
value = self.f_value(x).view(batch_size, self.value_channels, -1)
|
| 69 |
+
value = value.permute(0, 2, 1)
|
| 70 |
+
query = self.f_query(x).view(batch_size, self.key_channels, -1)
|
| 71 |
+
query = query.permute(0, 2, 1)
|
| 72 |
+
key = self.f_key(x).view(batch_size, self.key_channels, -1)
|
| 73 |
+
|
| 74 |
+
sim_map = torch.matmul(query, key)
|
| 75 |
+
sim_map = (self.key_channels ** -.5) * sim_map
|
| 76 |
+
sim_map = F.softmax(sim_map, dim=-1)
|
| 77 |
+
|
| 78 |
+
context = torch.matmul(sim_map, value)
|
| 79 |
+
context = context.permute(0, 2, 1).contiguous()
|
| 80 |
+
context = context.view(batch_size, self.value_channels, *x.size()[2:])
|
| 81 |
+
context = self.W(context)
|
| 82 |
+
if self.scale > 1:
|
| 83 |
+
context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
|
| 84 |
+
return context
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class SelfAttentionBlock2D(_SelfAttentionBlock):
|
| 88 |
+
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
| 89 |
+
super(SelfAttentionBlock2D, self).__init__(in_channels,
|
| 90 |
+
key_channels,
|
| 91 |
+
value_channels,
|
| 92 |
+
out_channels,
|
| 93 |
+
scale)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class BaseOC_Module(nn.Module):
|
| 97 |
+
"""
|
| 98 |
+
Implementation of the BaseOC module
|
| 99 |
+
Parameters:
|
| 100 |
+
in_features / out_features: the channels of the input / output feature maps.
|
| 101 |
+
dropout: we choose 0.05 as the default value.
|
| 102 |
+
size: you can apply multiple sizes. Here we only use one size.
|
| 103 |
+
Return:
|
| 104 |
+
features fused with Object context information.
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
| 108 |
+
super(BaseOC_Module, self).__init__()
|
| 109 |
+
self.stages = []
|
| 110 |
+
self.stages = nn.ModuleList(
|
| 111 |
+
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
| 112 |
+
self.conv_bn_dropout = nn.Sequential(
|
| 113 |
+
nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
|
| 114 |
+
InPlaceABNSync(out_channels),
|
| 115 |
+
nn.Dropout2d(dropout)
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
| 119 |
+
return SelfAttentionBlock2D(in_channels,
|
| 120 |
+
key_channels,
|
| 121 |
+
value_channels,
|
| 122 |
+
output_channels,
|
| 123 |
+
size)
|
| 124 |
+
|
| 125 |
+
def forward(self, feats):
|
| 126 |
+
priors = [stage(feats) for stage in self.stages]
|
| 127 |
+
context = priors[0]
|
| 128 |
+
for i in range(1, len(priors)):
|
| 129 |
+
context += priors[i]
|
| 130 |
+
output = self.conv_bn_dropout(torch.cat([context, feats], 1))
|
| 131 |
+
return output
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
class BaseOC_Context_Module(nn.Module):
|
| 135 |
+
"""
|
| 136 |
+
Output only the context features.
|
| 137 |
+
Parameters:
|
| 138 |
+
in_features / out_features: the channels of the input / output feature maps.
|
| 139 |
+
dropout: specify the dropout ratio
|
| 140 |
+
fusion: We provide two different fusion method, "concat" or "add"
|
| 141 |
+
size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
|
| 142 |
+
Return:
|
| 143 |
+
features after "concat" or "add"
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
| 147 |
+
super(BaseOC_Context_Module, self).__init__()
|
| 148 |
+
self.stages = []
|
| 149 |
+
self.stages = nn.ModuleList(
|
| 150 |
+
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
| 151 |
+
self.conv_bn_dropout = nn.Sequential(
|
| 152 |
+
nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
|
| 153 |
+
InPlaceABNSync(out_channels),
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
| 157 |
+
return SelfAttentionBlock2D(in_channels,
|
| 158 |
+
key_channels,
|
| 159 |
+
value_channels,
|
| 160 |
+
output_channels,
|
| 161 |
+
size)
|
| 162 |
+
|
| 163 |
+
def forward(self, feats):
|
| 164 |
+
priors = [stage(feats) for stage in self.stages]
|
| 165 |
+
context = priors[0]
|
| 166 |
+
for i in range(1, len(priors)):
|
| 167 |
+
context += priors[i]
|
| 168 |
+
output = self.conv_bn_dropout(context)
|
| 169 |
+
return output
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
class ASP_OC_Module(nn.Module):
|
| 173 |
+
def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
|
| 174 |
+
super(ASP_OC_Module, self).__init__()
|
| 175 |
+
self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
|
| 176 |
+
InPlaceABNSync(out_features),
|
| 177 |
+
BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
|
| 178 |
+
key_channels=out_features // 2, value_channels=out_features,
|
| 179 |
+
dropout=0, sizes=([2])))
|
| 180 |
+
self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 181 |
+
InPlaceABNSync(out_features))
|
| 182 |
+
self.conv3 = nn.Sequential(
|
| 183 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
| 184 |
+
InPlaceABNSync(out_features))
|
| 185 |
+
self.conv4 = nn.Sequential(
|
| 186 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
| 187 |
+
InPlaceABNSync(out_features))
|
| 188 |
+
self.conv5 = nn.Sequential(
|
| 189 |
+
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
| 190 |
+
InPlaceABNSync(out_features))
|
| 191 |
+
|
| 192 |
+
self.conv_bn_dropout = nn.Sequential(
|
| 193 |
+
nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
| 194 |
+
InPlaceABNSync(out_features),
|
| 195 |
+
nn.Dropout2d(0.1)
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
|
| 199 |
+
assert (len(feat1) == len(feat2))
|
| 200 |
+
z = []
|
| 201 |
+
for i in range(len(feat1)):
|
| 202 |
+
z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
|
| 203 |
+
return z
|
| 204 |
+
|
| 205 |
+
def forward(self, x):
|
| 206 |
+
if isinstance(x, Variable):
|
| 207 |
+
_, _, h, w = x.size()
|
| 208 |
+
elif isinstance(x, tuple) or isinstance(x, list):
|
| 209 |
+
_, _, h, w = x[0].size()
|
| 210 |
+
else:
|
| 211 |
+
raise RuntimeError('unknown input type')
|
| 212 |
+
|
| 213 |
+
feat1 = self.context(x)
|
| 214 |
+
feat2 = self.conv2(x)
|
| 215 |
+
feat3 = self.conv3(x)
|
| 216 |
+
feat4 = self.conv4(x)
|
| 217 |
+
feat5 = self.conv5(x)
|
| 218 |
+
|
| 219 |
+
if isinstance(x, Variable):
|
| 220 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
| 221 |
+
elif isinstance(x, tuple) or isinstance(x, list):
|
| 222 |
+
out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
|
| 223 |
+
else:
|
| 224 |
+
raise RuntimeError('unknown input type')
|
| 225 |
+
output = self.conv_bn_dropout(out)
|
| 226 |
+
return output
|
preprocess/humanparsing/networks/context_encoding/psp.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : psp.py
|
| 8 |
+
@Time : 8/4/19 3:36 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import torch
|
| 15 |
+
import torch.nn as nn
|
| 16 |
+
from torch.nn import functional as F
|
| 17 |
+
|
| 18 |
+
from modules import InPlaceABNSync
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class PSPModule(nn.Module):
|
| 22 |
+
"""
|
| 23 |
+
Reference:
|
| 24 |
+
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
| 25 |
+
"""
|
| 26 |
+
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
| 27 |
+
super(PSPModule, self).__init__()
|
| 28 |
+
|
| 29 |
+
self.stages = []
|
| 30 |
+
self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
| 31 |
+
self.bottleneck = nn.Sequential(
|
| 32 |
+
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
| 33 |
+
bias=False),
|
| 34 |
+
InPlaceABNSync(out_features),
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
def _make_stage(self, features, out_features, size):
|
| 38 |
+
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
| 39 |
+
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
| 40 |
+
bn = InPlaceABNSync(out_features)
|
| 41 |
+
return nn.Sequential(prior, conv, bn)
|
| 42 |
+
|
| 43 |
+
def forward(self, feats):
|
| 44 |
+
h, w = feats.size(2), feats.size(3)
|
| 45 |
+
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
| 46 |
+
self.stages] + [feats]
|
| 47 |
+
bottle = self.bottleneck(torch.cat(priors, 1))
|
| 48 |
+
return bottle
|
preprocess/humanparsing/parsing_api.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
|
| 5 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 6 |
+
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
import torchvision.transforms as transforms
|
| 11 |
+
from datasets.simple_extractor_dataset import SimpleFolderDataset
|
| 12 |
+
from PIL import Image
|
| 13 |
+
from utils.transforms import transform_logits
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_palette(num_cls):
|
| 17 |
+
""" Returns the color map for visualizing the segmentation mask.
|
| 18 |
+
Args:
|
| 19 |
+
num_cls: Number of classes
|
| 20 |
+
Returns:
|
| 21 |
+
The color map
|
| 22 |
+
"""
|
| 23 |
+
n = num_cls
|
| 24 |
+
palette = [0] * (n * 3)
|
| 25 |
+
for j in range(0, n):
|
| 26 |
+
lab = j
|
| 27 |
+
palette[j * 3 + 0] = 0
|
| 28 |
+
palette[j * 3 + 1] = 0
|
| 29 |
+
palette[j * 3 + 2] = 0
|
| 30 |
+
i = 0
|
| 31 |
+
while lab:
|
| 32 |
+
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
|
| 33 |
+
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
|
| 34 |
+
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
|
| 35 |
+
i += 1
|
| 36 |
+
lab >>= 3
|
| 37 |
+
return palette
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def delete_irregular(logits_result):
|
| 41 |
+
parsing_result = np.argmax(logits_result, axis=2)
|
| 42 |
+
upper_cloth = np.where(parsing_result == 4, 255, 0)
|
| 43 |
+
contours, hierarchy = cv2.findContours(upper_cloth.astype(np.uint8),
|
| 44 |
+
cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
|
| 45 |
+
area = []
|
| 46 |
+
for i in range(len(contours)):
|
| 47 |
+
a = cv2.contourArea(contours[i], True)
|
| 48 |
+
area.append(abs(a))
|
| 49 |
+
if len(area) != 0:
|
| 50 |
+
top = area.index(max(area))
|
| 51 |
+
M = cv2.moments(contours[top])
|
| 52 |
+
cY = int(M["m01"] / M["m00"])
|
| 53 |
+
|
| 54 |
+
dresses = np.where(parsing_result == 7, 255, 0)
|
| 55 |
+
contours_dress, hierarchy_dress = cv2.findContours(dresses.astype(np.uint8),
|
| 56 |
+
cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
|
| 57 |
+
area_dress = []
|
| 58 |
+
for j in range(len(contours_dress)):
|
| 59 |
+
a_d = cv2.contourArea(contours_dress[j], True)
|
| 60 |
+
area_dress.append(abs(a_d))
|
| 61 |
+
if len(area_dress) != 0:
|
| 62 |
+
top_dress = area_dress.index(max(area_dress))
|
| 63 |
+
M_dress = cv2.moments(contours_dress[top_dress])
|
| 64 |
+
cY_dress = int(M_dress["m01"] / M_dress["m00"])
|
| 65 |
+
wear_type = "dresses"
|
| 66 |
+
if len(area) != 0:
|
| 67 |
+
if len(area_dress) != 0 and cY_dress > cY:
|
| 68 |
+
irregular_list = np.array([4, 5, 6])
|
| 69 |
+
logits_result[:, :, irregular_list] = -1
|
| 70 |
+
else:
|
| 71 |
+
irregular_list = np.array([5, 6, 7, 8, 9, 10, 12, 13])
|
| 72 |
+
logits_result[:cY, :, irregular_list] = -1
|
| 73 |
+
wear_type = "cloth_pant"
|
| 74 |
+
parsing_result = np.argmax(logits_result, axis=2)
|
| 75 |
+
# pad border
|
| 76 |
+
parsing_result = np.pad(parsing_result, pad_width=1, mode='constant', constant_values=0)
|
| 77 |
+
return parsing_result, wear_type
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def hole_fill(img):
|
| 81 |
+
img_copy = img.copy()
|
| 82 |
+
mask = np.zeros((img.shape[0] + 2, img.shape[1] + 2), dtype=np.uint8)
|
| 83 |
+
cv2.floodFill(img, mask, (0, 0), 255)
|
| 84 |
+
img_inverse = cv2.bitwise_not(img)
|
| 85 |
+
dst = cv2.bitwise_or(img_copy, img_inverse)
|
| 86 |
+
return dst
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def refine_mask(mask):
|
| 90 |
+
contours, hierarchy = cv2.findContours(mask.astype(np.uint8),
|
| 91 |
+
cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
|
| 92 |
+
area = []
|
| 93 |
+
for j in range(len(contours)):
|
| 94 |
+
a_d = cv2.contourArea(contours[j], True)
|
| 95 |
+
area.append(abs(a_d))
|
| 96 |
+
refine_mask = np.zeros_like(mask).astype(np.uint8)
|
| 97 |
+
if len(area) != 0:
|
| 98 |
+
i = area.index(max(area))
|
| 99 |
+
cv2.drawContours(refine_mask, contours, i, color=255, thickness=-1)
|
| 100 |
+
# keep large area in skin case
|
| 101 |
+
for j in range(len(area)):
|
| 102 |
+
if j != i and area[i] > 2000:
|
| 103 |
+
cv2.drawContours(refine_mask, contours, j, color=255, thickness=-1)
|
| 104 |
+
return refine_mask
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def refine_hole(parsing_result_filled, parsing_result, arm_mask):
|
| 108 |
+
filled_hole = cv2.bitwise_and(np.where(parsing_result_filled == 4, 255, 0),
|
| 109 |
+
np.where(parsing_result != 4, 255, 0)) - arm_mask * 255
|
| 110 |
+
contours, hierarchy = cv2.findContours(filled_hole, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
|
| 111 |
+
refine_hole_mask = np.zeros_like(parsing_result).astype(np.uint8)
|
| 112 |
+
for i in range(len(contours)):
|
| 113 |
+
a = cv2.contourArea(contours[i], True)
|
| 114 |
+
# keep hole > 2000 pixels
|
| 115 |
+
if abs(a) > 2000:
|
| 116 |
+
cv2.drawContours(refine_hole_mask, contours, i, color=255, thickness=-1)
|
| 117 |
+
return refine_hole_mask + arm_mask
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def onnx_inference(session, lip_session, input_dir):
|
| 121 |
+
transform = transforms.Compose([
|
| 122 |
+
transforms.ToTensor(),
|
| 123 |
+
transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
|
| 124 |
+
])
|
| 125 |
+
dataset = SimpleFolderDataset(root=input_dir, input_size=[512, 512], transform=transform)
|
| 126 |
+
# dataloader = DataLoader(dataset)
|
| 127 |
+
with torch.no_grad():
|
| 128 |
+
# for _, batch in enumerate(tqdm(dataloader, disable=True)):
|
| 129 |
+
image, meta = dataset[0]
|
| 130 |
+
image = image.unsqueeze(0)
|
| 131 |
+
|
| 132 |
+
# image, meta = batch
|
| 133 |
+
c = meta['center']
|
| 134 |
+
h = meta['height']
|
| 135 |
+
w = meta['width']
|
| 136 |
+
s = meta['scale']
|
| 137 |
+
output = session.run(None, {"input.1": image.numpy().astype(np.float32)})
|
| 138 |
+
upsample = torch.nn.Upsample(size=[512, 512], mode='bilinear', align_corners=True)
|
| 139 |
+
upsample_output = upsample(torch.from_numpy(output[1][0]).unsqueeze(0))
|
| 140 |
+
upsample_output = upsample_output.squeeze()
|
| 141 |
+
upsample_output = upsample_output.permute(1, 2, 0) # CHW -> HWC
|
| 142 |
+
logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=[512, 512])
|
| 143 |
+
parsing_result = np.argmax(logits_result, axis=2)
|
| 144 |
+
parsing_result = np.pad(parsing_result, pad_width=1, mode='constant', constant_values=0)
|
| 145 |
+
# try holefilling the clothes part
|
| 146 |
+
arm_mask = (parsing_result == 14).astype(np.float32) \
|
| 147 |
+
+ (parsing_result == 15).astype(np.float32)
|
| 148 |
+
upper_cloth_mask = (parsing_result == 4).astype(np.float32) + arm_mask
|
| 149 |
+
img = np.where(upper_cloth_mask, 255, 0)
|
| 150 |
+
dst = hole_fill(img.astype(np.uint8))
|
| 151 |
+
parsing_result_filled = dst / 255 * 4
|
| 152 |
+
parsing_result_woarm = np.where(parsing_result_filled == 4, parsing_result_filled, parsing_result)
|
| 153 |
+
# add back arm and refined hole between arm and cloth
|
| 154 |
+
refine_hole_mask = refine_hole(parsing_result_filled.astype(np.uint8), parsing_result.astype(np.uint8),
|
| 155 |
+
arm_mask.astype(np.uint8))
|
| 156 |
+
parsing_result = np.where(refine_hole_mask, parsing_result, parsing_result_woarm)
|
| 157 |
+
# remove padding
|
| 158 |
+
parsing_result = parsing_result[1:-1, 1:-1]
|
| 159 |
+
|
| 160 |
+
dataset_lip = SimpleFolderDataset(root=input_dir, input_size=[473, 473], transform=transform)
|
| 161 |
+
# dataloader_lip = DataLoader(dataset_lip)
|
| 162 |
+
with torch.no_grad():
|
| 163 |
+
# for _, batch in enumerate(tqdm(dataloader_lip, disable=True)):
|
| 164 |
+
|
| 165 |
+
image, meta = dataset_lip[0]
|
| 166 |
+
image = image.unsqueeze(0)
|
| 167 |
+
|
| 168 |
+
# image, meta = batch
|
| 169 |
+
c = meta['center']
|
| 170 |
+
s = meta['scale']
|
| 171 |
+
w = meta['width']
|
| 172 |
+
h = meta['height']
|
| 173 |
+
|
| 174 |
+
output_lip = lip_session.run(None, {"input.1": image.numpy().astype(np.float32)})
|
| 175 |
+
upsample = torch.nn.Upsample(size=[473, 473], mode='bilinear', align_corners=True)
|
| 176 |
+
upsample_output_lip = upsample(torch.from_numpy(output_lip[1][0]).unsqueeze(0))
|
| 177 |
+
upsample_output_lip = upsample_output_lip.squeeze()
|
| 178 |
+
upsample_output_lip = upsample_output_lip.permute(1, 2, 0) # CHW -> HWC
|
| 179 |
+
logits_result_lip = transform_logits(upsample_output_lip.data.cpu().numpy(), c, s, w, h,
|
| 180 |
+
input_size=[473, 473])
|
| 181 |
+
parsing_result_lip = np.argmax(logits_result_lip, axis=2)
|
| 182 |
+
# add neck parsing result
|
| 183 |
+
neck_mask = np.logical_and(np.logical_not((parsing_result_lip == 13).astype(np.float32)),
|
| 184 |
+
(parsing_result == 11).astype(np.float32))
|
| 185 |
+
parsing_result = np.where(neck_mask, 18, parsing_result)
|
| 186 |
+
palette = get_palette(19)
|
| 187 |
+
output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
|
| 188 |
+
output_img.putpalette(palette)
|
| 189 |
+
face_mask = torch.from_numpy((parsing_result == 11).astype(np.float32))
|
| 190 |
+
|
| 191 |
+
return output_img, face_mask
|
preprocess/humanparsing/run_parsing.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pdb
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import onnxruntime as ort
|
| 7 |
+
|
| 8 |
+
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
|
| 9 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 10 |
+
import torch
|
| 11 |
+
from parsing_api import onnx_inference
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Parsing:
|
| 15 |
+
def __init__(self, gpu_id: int):
|
| 16 |
+
self.gpu_id = gpu_id
|
| 17 |
+
# torch.cuda.set_device(gpu_id)
|
| 18 |
+
session_options = ort.SessionOptions()
|
| 19 |
+
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 20 |
+
session_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
| 21 |
+
#### jho modified >>>>
|
| 22 |
+
providers = [
|
| 23 |
+
('CUDAExecutionProvider', {
|
| 24 |
+
'device_id': gpu_id,
|
| 25 |
+
}),
|
| 26 |
+
'CPUExecutionProvider',
|
| 27 |
+
]
|
| 28 |
+
self.session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_atr.onnx'),
|
| 29 |
+
sess_options=session_options, providers=providers)
|
| 30 |
+
self.lip_session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_lip.onnx'),
|
| 31 |
+
sess_options=session_options, providers=providers)
|
| 32 |
+
#### jho modified <<<<
|
| 33 |
+
# session_options.add_session_config_entry('gpu_id', str(gpu_id))
|
| 34 |
+
# self.session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_atr.onnx'),
|
| 35 |
+
# sess_options=session_options, providers=['CUDAExecutionProvider'])
|
| 36 |
+
# self.lip_session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_lip.onnx'),
|
| 37 |
+
# sess_options=session_options, providers=['CUDAExecutionProvider'])
|
| 38 |
+
print(f"parsing init done (gpu: {gpu_id})")
|
| 39 |
+
|
| 40 |
+
def __call__(self, input_image):
|
| 41 |
+
torch.cuda.set_device(self.gpu_id)
|
| 42 |
+
parsed_image, face_mask = onnx_inference(self.session, self.lip_session, input_image)
|
| 43 |
+
return parsed_image, face_mask
|
| 44 |
+
|
preprocess/humanparsing/utils/__init__.py
ADDED
|
File without changes
|
preprocess/humanparsing/utils/consistency_loss.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : kl_loss.py
|
| 8 |
+
@Time : 7/23/19 4:02 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
import torch
|
| 14 |
+
import torch.nn.functional as F
|
| 15 |
+
from torch import nn
|
| 16 |
+
from datasets.target_generation import generate_edge_tensor
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ConsistencyLoss(nn.Module):
|
| 20 |
+
def __init__(self, ignore_index=255):
|
| 21 |
+
super(ConsistencyLoss, self).__init__()
|
| 22 |
+
self.ignore_index=ignore_index
|
| 23 |
+
|
| 24 |
+
def forward(self, parsing, edge, label):
|
| 25 |
+
parsing_pre = torch.argmax(parsing, dim=1)
|
| 26 |
+
parsing_pre[label==self.ignore_index]=self.ignore_index
|
| 27 |
+
generated_edge = generate_edge_tensor(parsing_pre)
|
| 28 |
+
edge_pre = torch.argmax(edge, dim=1)
|
| 29 |
+
v_generate_edge = generated_edge[label!=255]
|
| 30 |
+
v_edge_pre = edge_pre[label!=255]
|
| 31 |
+
v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor)
|
| 32 |
+
positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count
|
| 33 |
+
return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0))
|
preprocess/humanparsing/utils/criterion.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : criterion.py
|
| 8 |
+
@Time : 8/30/19 8:59 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
import torch
|
| 16 |
+
import numpy as np
|
| 17 |
+
from torch.nn import functional as F
|
| 18 |
+
from .lovasz_softmax import LovaszSoftmax
|
| 19 |
+
from .kl_loss import KLDivergenceLoss
|
| 20 |
+
from .consistency_loss import ConsistencyLoss
|
| 21 |
+
|
| 22 |
+
NUM_CLASSES = 20
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CriterionAll(nn.Module):
|
| 26 |
+
def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1,
|
| 27 |
+
num_classes=20):
|
| 28 |
+
super(CriterionAll, self).__init__()
|
| 29 |
+
self.ignore_index = ignore_index
|
| 30 |
+
self.use_class_weight = use_class_weight
|
| 31 |
+
self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
|
| 32 |
+
self.lovasz = LovaszSoftmax(ignore_index=ignore_index)
|
| 33 |
+
self.kldiv = KLDivergenceLoss(ignore_index=ignore_index)
|
| 34 |
+
self.reg = ConsistencyLoss(ignore_index=ignore_index)
|
| 35 |
+
self.lamda_1 = lambda_1
|
| 36 |
+
self.lamda_2 = lambda_2
|
| 37 |
+
self.lamda_3 = lambda_3
|
| 38 |
+
self.num_classes = num_classes
|
| 39 |
+
|
| 40 |
+
def parsing_loss(self, preds, target, cycle_n=None):
|
| 41 |
+
"""
|
| 42 |
+
Loss function definition.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
preds: [[parsing result1, parsing result2],[edge result]]
|
| 46 |
+
target: [parsing label, egde label]
|
| 47 |
+
soft_preds: [[parsing result1, parsing result2],[edge result]]
|
| 48 |
+
Returns:
|
| 49 |
+
Calculated Loss.
|
| 50 |
+
"""
|
| 51 |
+
h, w = target[0].size(1), target[0].size(2)
|
| 52 |
+
|
| 53 |
+
pos_num = torch.sum(target[1] == 1, dtype=torch.float)
|
| 54 |
+
neg_num = torch.sum(target[1] == 0, dtype=torch.float)
|
| 55 |
+
|
| 56 |
+
weight_pos = neg_num / (pos_num + neg_num)
|
| 57 |
+
weight_neg = pos_num / (pos_num + neg_num)
|
| 58 |
+
weights = torch.tensor([weight_neg, weight_pos]) # edge loss weight
|
| 59 |
+
|
| 60 |
+
loss = 0
|
| 61 |
+
|
| 62 |
+
# loss for segmentation
|
| 63 |
+
preds_parsing = preds[0]
|
| 64 |
+
for pred_parsing in preds_parsing:
|
| 65 |
+
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
|
| 66 |
+
mode='bilinear', align_corners=True)
|
| 67 |
+
|
| 68 |
+
loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0])
|
| 69 |
+
if target[2] is None:
|
| 70 |
+
loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0])
|
| 71 |
+
else:
|
| 72 |
+
soft_scale_pred = F.interpolate(input=target[2], size=(h, w),
|
| 73 |
+
mode='bilinear', align_corners=True)
|
| 74 |
+
soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes),
|
| 75 |
+
1.0 / (cycle_n + 1.0))
|
| 76 |
+
loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0])
|
| 77 |
+
|
| 78 |
+
# loss for edge
|
| 79 |
+
preds_edge = preds[1]
|
| 80 |
+
for pred_edge in preds_edge:
|
| 81 |
+
scale_pred = F.interpolate(input=pred_edge, size=(h, w),
|
| 82 |
+
mode='bilinear', align_corners=True)
|
| 83 |
+
if target[3] is None:
|
| 84 |
+
loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1],
|
| 85 |
+
weights.cuda(), ignore_index=self.ignore_index)
|
| 86 |
+
else:
|
| 87 |
+
soft_scale_edge = F.interpolate(input=target[3], size=(h, w),
|
| 88 |
+
mode='bilinear', align_corners=True)
|
| 89 |
+
soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2),
|
| 90 |
+
1.0 / (cycle_n + 1.0))
|
| 91 |
+
loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0])
|
| 92 |
+
|
| 93 |
+
# consistency regularization
|
| 94 |
+
preds_parsing = preds[0]
|
| 95 |
+
preds_edge = preds[1]
|
| 96 |
+
for pred_parsing in preds_parsing:
|
| 97 |
+
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
|
| 98 |
+
mode='bilinear', align_corners=True)
|
| 99 |
+
scale_edge = F.interpolate(input=preds_edge[0], size=(h, w),
|
| 100 |
+
mode='bilinear', align_corners=True)
|
| 101 |
+
loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0])
|
| 102 |
+
|
| 103 |
+
return loss
|
| 104 |
+
|
| 105 |
+
def forward(self, preds, target, cycle_n=None):
|
| 106 |
+
loss = self.parsing_loss(preds, target, cycle_n)
|
| 107 |
+
return loss
|
| 108 |
+
|
| 109 |
+
def _generate_weights(self, masks, num_classes):
|
| 110 |
+
"""
|
| 111 |
+
masks: torch.Tensor with shape [B, H, W]
|
| 112 |
+
"""
|
| 113 |
+
masks_label = masks.data.cpu().numpy().astype(np.int64)
|
| 114 |
+
pixel_nums = []
|
| 115 |
+
tot_pixels = 0
|
| 116 |
+
for i in range(num_classes):
|
| 117 |
+
pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float)
|
| 118 |
+
pixel_nums.append(pixel_num_of_cls_i)
|
| 119 |
+
tot_pixels += pixel_num_of_cls_i
|
| 120 |
+
weights = []
|
| 121 |
+
for i in range(num_classes):
|
| 122 |
+
weights.append(
|
| 123 |
+
(tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1)
|
| 124 |
+
)
|
| 125 |
+
weights = np.array(weights, dtype=np.float)
|
| 126 |
+
# weights = torch.from_numpy(weights).float().to(masks.device)
|
| 127 |
+
return weights
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def moving_average(target1, target2, alpha=1.0):
|
| 131 |
+
target = 0
|
| 132 |
+
target += (1.0 - alpha) * target1
|
| 133 |
+
target += target2 * alpha
|
| 134 |
+
return target
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def to_one_hot(tensor, num_cls, dim=1, ignore_index=255):
|
| 138 |
+
b, h, w = tensor.shape
|
| 139 |
+
tensor[tensor == ignore_index] = 0
|
| 140 |
+
onehot_tensor = torch.zeros(b, num_cls, h, w).cuda()
|
| 141 |
+
onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1)
|
| 142 |
+
return onehot_tensor
|
preprocess/humanparsing/utils/encoding.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
| 2 |
+
## Created by: Hang Zhang
|
| 3 |
+
## ECE Department, Rutgers University
|
| 4 |
+
## Email: zhang.hang@rutgers.edu
|
| 5 |
+
## Copyright (c) 2017
|
| 6 |
+
##
|
| 7 |
+
## This source code is licensed under the MIT-style license found in the
|
| 8 |
+
## LICENSE file in the root directory of this source tree
|
| 9 |
+
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
| 10 |
+
|
| 11 |
+
"""Encoding Data Parallel"""
|
| 12 |
+
import threading
|
| 13 |
+
import torch
|
| 14 |
+
from torch.autograd import Variable, Function
|
| 15 |
+
import torch.cuda.comm as comm
|
| 16 |
+
from torch.nn.parallel.data_parallel import DataParallel
|
| 17 |
+
from torch.nn.parallel.parallel_apply import get_a_var
|
| 18 |
+
from torch.nn.parallel._functions import Broadcast
|
| 19 |
+
|
| 20 |
+
torch_ver = torch.__version__[:3]
|
| 21 |
+
|
| 22 |
+
__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback']
|
| 23 |
+
|
| 24 |
+
def allreduce(*inputs):
|
| 25 |
+
"""Cross GPU all reduce autograd operation for calculate mean and
|
| 26 |
+
variance in SyncBN.
|
| 27 |
+
"""
|
| 28 |
+
return AllReduce.apply(*inputs)
|
| 29 |
+
|
| 30 |
+
class AllReduce(Function):
|
| 31 |
+
@staticmethod
|
| 32 |
+
def forward(ctx, num_inputs, *inputs):
|
| 33 |
+
ctx.num_inputs = num_inputs
|
| 34 |
+
ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
|
| 35 |
+
inputs = [inputs[i:i + num_inputs]
|
| 36 |
+
for i in range(0, len(inputs), num_inputs)]
|
| 37 |
+
# sort before reduce sum
|
| 38 |
+
inputs = sorted(inputs, key=lambda i: i[0].get_device())
|
| 39 |
+
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
|
| 40 |
+
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
|
| 41 |
+
return tuple([t for tensors in outputs for t in tensors])
|
| 42 |
+
|
| 43 |
+
@staticmethod
|
| 44 |
+
def backward(ctx, *inputs):
|
| 45 |
+
inputs = [i.data for i in inputs]
|
| 46 |
+
inputs = [inputs[i:i + ctx.num_inputs]
|
| 47 |
+
for i in range(0, len(inputs), ctx.num_inputs)]
|
| 48 |
+
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
|
| 49 |
+
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
|
| 50 |
+
return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
|
| 51 |
+
|
| 52 |
+
class Reduce(Function):
|
| 53 |
+
@staticmethod
|
| 54 |
+
def forward(ctx, *inputs):
|
| 55 |
+
ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
|
| 56 |
+
inputs = sorted(inputs, key=lambda i: i.get_device())
|
| 57 |
+
return comm.reduce_add(inputs)
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
def backward(ctx, gradOutput):
|
| 61 |
+
return Broadcast.apply(ctx.target_gpus, gradOutput)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class DataParallelModel(DataParallel):
|
| 65 |
+
"""Implements data parallelism at the module level.
|
| 66 |
+
|
| 67 |
+
This container parallelizes the application of the given module by
|
| 68 |
+
splitting the input across the specified devices by chunking in the
|
| 69 |
+
batch dimension.
|
| 70 |
+
In the forward pass, the module is replicated on each device,
|
| 71 |
+
and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
|
| 72 |
+
Note that the outputs are not gathered, please use compatible
|
| 73 |
+
:class:`encoding.parallel.DataParallelCriterion`.
|
| 74 |
+
|
| 75 |
+
The batch size should be larger than the number of GPUs used. It should
|
| 76 |
+
also be an integer multiple of the number of GPUs so that each chunk is
|
| 77 |
+
the same size (so that each GPU processes the same number of samples).
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
module: module to be parallelized
|
| 81 |
+
device_ids: CUDA devices (default: all devices)
|
| 82 |
+
|
| 83 |
+
Reference:
|
| 84 |
+
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
|
| 85 |
+
Amit Agrawal. “Context Encoding for Semantic Segmentation.
|
| 86 |
+
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
|
| 87 |
+
|
| 88 |
+
Example::
|
| 89 |
+
|
| 90 |
+
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
|
| 91 |
+
>>> y = net(x)
|
| 92 |
+
"""
|
| 93 |
+
def gather(self, outputs, output_device):
|
| 94 |
+
return outputs
|
| 95 |
+
|
| 96 |
+
def replicate(self, module, device_ids):
|
| 97 |
+
modules = super(DataParallelModel, self).replicate(module, device_ids)
|
| 98 |
+
return modules
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class DataParallelCriterion(DataParallel):
|
| 102 |
+
"""
|
| 103 |
+
Calculate loss in multiple-GPUs, which balance the memory usage for
|
| 104 |
+
Semantic Segmentation.
|
| 105 |
+
|
| 106 |
+
The targets are splitted across the specified devices by chunking in
|
| 107 |
+
the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
|
| 108 |
+
|
| 109 |
+
Reference:
|
| 110 |
+
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
|
| 111 |
+
Amit Agrawal. “Context Encoding for Semantic Segmentation.
|
| 112 |
+
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
|
| 113 |
+
|
| 114 |
+
Example::
|
| 115 |
+
|
| 116 |
+
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
|
| 117 |
+
>>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
|
| 118 |
+
>>> y = net(x)
|
| 119 |
+
>>> loss = criterion(y, target)
|
| 120 |
+
"""
|
| 121 |
+
def forward(self, inputs, *targets, **kwargs):
|
| 122 |
+
# input should be already scatterd
|
| 123 |
+
# scattering the targets instead
|
| 124 |
+
if not self.device_ids:
|
| 125 |
+
return self.module(inputs, *targets, **kwargs)
|
| 126 |
+
targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
|
| 127 |
+
if len(self.device_ids) == 1:
|
| 128 |
+
return self.module(inputs, *targets[0], **kwargs[0])
|
| 129 |
+
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
|
| 130 |
+
outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
|
| 131 |
+
return Reduce.apply(*outputs) / len(outputs)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
|
| 135 |
+
assert len(modules) == len(inputs)
|
| 136 |
+
assert len(targets) == len(inputs)
|
| 137 |
+
if kwargs_tup:
|
| 138 |
+
assert len(modules) == len(kwargs_tup)
|
| 139 |
+
else:
|
| 140 |
+
kwargs_tup = ({},) * len(modules)
|
| 141 |
+
if devices is not None:
|
| 142 |
+
assert len(modules) == len(devices)
|
| 143 |
+
else:
|
| 144 |
+
devices = [None] * len(modules)
|
| 145 |
+
|
| 146 |
+
lock = threading.Lock()
|
| 147 |
+
results = {}
|
| 148 |
+
if torch_ver != "0.3":
|
| 149 |
+
grad_enabled = torch.is_grad_enabled()
|
| 150 |
+
|
| 151 |
+
def _worker(i, module, input, target, kwargs, device=None):
|
| 152 |
+
if torch_ver != "0.3":
|
| 153 |
+
torch.set_grad_enabled(grad_enabled)
|
| 154 |
+
if device is None:
|
| 155 |
+
device = get_a_var(input).get_device()
|
| 156 |
+
try:
|
| 157 |
+
if not isinstance(input, tuple):
|
| 158 |
+
input = (input,)
|
| 159 |
+
with torch.cuda.device(device):
|
| 160 |
+
output = module(*(input + target), **kwargs)
|
| 161 |
+
with lock:
|
| 162 |
+
results[i] = output
|
| 163 |
+
except Exception as e:
|
| 164 |
+
with lock:
|
| 165 |
+
results[i] = e
|
| 166 |
+
|
| 167 |
+
if len(modules) > 1:
|
| 168 |
+
threads = [threading.Thread(target=_worker,
|
| 169 |
+
args=(i, module, input, target,
|
| 170 |
+
kwargs, device),)
|
| 171 |
+
for i, (module, input, target, kwargs, device) in
|
| 172 |
+
enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
|
| 173 |
+
|
| 174 |
+
for thread in threads:
|
| 175 |
+
thread.start()
|
| 176 |
+
for thread in threads:
|
| 177 |
+
thread.join()
|
| 178 |
+
else:
|
| 179 |
+
_worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
|
| 180 |
+
|
| 181 |
+
outputs = []
|
| 182 |
+
for i in range(len(inputs)):
|
| 183 |
+
output = results[i]
|
| 184 |
+
if isinstance(output, Exception):
|
| 185 |
+
raise output
|
| 186 |
+
outputs.append(output)
|
| 187 |
+
return outputs
|
preprocess/humanparsing/utils/kl_loss.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : kl_loss.py
|
| 8 |
+
@Time : 7/23/19 4:02 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
import torch.nn.functional as F
|
| 14 |
+
from torch import nn
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def flatten_probas(input, target, labels, ignore=255):
|
| 18 |
+
"""
|
| 19 |
+
Flattens predictions in the batch.
|
| 20 |
+
"""
|
| 21 |
+
B, C, H, W = input.size()
|
| 22 |
+
input = input.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
| 23 |
+
target = target.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
| 24 |
+
labels = labels.view(-1)
|
| 25 |
+
if ignore is None:
|
| 26 |
+
return input, target
|
| 27 |
+
valid = (labels != ignore)
|
| 28 |
+
vinput = input[valid.nonzero().squeeze()]
|
| 29 |
+
vtarget = target[valid.nonzero().squeeze()]
|
| 30 |
+
return vinput, vtarget
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class KLDivergenceLoss(nn.Module):
|
| 34 |
+
def __init__(self, ignore_index=255, T=1):
|
| 35 |
+
super(KLDivergenceLoss, self).__init__()
|
| 36 |
+
self.ignore_index=ignore_index
|
| 37 |
+
self.T = T
|
| 38 |
+
|
| 39 |
+
def forward(self, input, target, label):
|
| 40 |
+
log_input_prob = F.log_softmax(input / self.T, dim=1)
|
| 41 |
+
target_porb = F.softmax(target / self.T, dim=1)
|
| 42 |
+
loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index))
|
| 43 |
+
return self.T*self.T*loss # balanced
|
preprocess/humanparsing/utils/lovasz_softmax.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : lovasz_softmax.py
|
| 8 |
+
@Time : 8/30/19 7:12 PM
|
| 9 |
+
@Desc : Lovasz-Softmax and Jaccard hinge loss in PyTorch
|
| 10 |
+
Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
|
| 11 |
+
@License : This source code is licensed under the license found in the
|
| 12 |
+
LICENSE file in the root directory of this source tree.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from __future__ import print_function, division
|
| 16 |
+
|
| 17 |
+
import torch
|
| 18 |
+
from torch.autograd import Variable
|
| 19 |
+
import torch.nn.functional as F
|
| 20 |
+
import numpy as np
|
| 21 |
+
from torch import nn
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
from itertools import ifilterfalse
|
| 25 |
+
except ImportError: # py3k
|
| 26 |
+
from itertools import filterfalse as ifilterfalse
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def lovasz_grad(gt_sorted):
|
| 30 |
+
"""
|
| 31 |
+
Computes gradient of the Lovasz extension w.r.t sorted errors
|
| 32 |
+
See Alg. 1 in paper
|
| 33 |
+
"""
|
| 34 |
+
p = len(gt_sorted)
|
| 35 |
+
gts = gt_sorted.sum()
|
| 36 |
+
intersection = gts - gt_sorted.float().cumsum(0)
|
| 37 |
+
union = gts + (1 - gt_sorted).float().cumsum(0)
|
| 38 |
+
jaccard = 1. - intersection / union
|
| 39 |
+
if p > 1: # cover 1-pixel case
|
| 40 |
+
jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
|
| 41 |
+
return jaccard
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
|
| 45 |
+
"""
|
| 46 |
+
IoU for foreground class
|
| 47 |
+
binary: 1 foreground, 0 background
|
| 48 |
+
"""
|
| 49 |
+
if not per_image:
|
| 50 |
+
preds, labels = (preds,), (labels,)
|
| 51 |
+
ious = []
|
| 52 |
+
for pred, label in zip(preds, labels):
|
| 53 |
+
intersection = ((label == 1) & (pred == 1)).sum()
|
| 54 |
+
union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
|
| 55 |
+
if not union:
|
| 56 |
+
iou = EMPTY
|
| 57 |
+
else:
|
| 58 |
+
iou = float(intersection) / float(union)
|
| 59 |
+
ious.append(iou)
|
| 60 |
+
iou = mean(ious) # mean accross images if per_image
|
| 61 |
+
return 100 * iou
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
|
| 65 |
+
"""
|
| 66 |
+
Array of IoU for each (non ignored) class
|
| 67 |
+
"""
|
| 68 |
+
if not per_image:
|
| 69 |
+
preds, labels = (preds,), (labels,)
|
| 70 |
+
ious = []
|
| 71 |
+
for pred, label in zip(preds, labels):
|
| 72 |
+
iou = []
|
| 73 |
+
for i in range(C):
|
| 74 |
+
if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
|
| 75 |
+
intersection = ((label == i) & (pred == i)).sum()
|
| 76 |
+
union = ((label == i) | ((pred == i) & (label != ignore))).sum()
|
| 77 |
+
if not union:
|
| 78 |
+
iou.append(EMPTY)
|
| 79 |
+
else:
|
| 80 |
+
iou.append(float(intersection) / float(union))
|
| 81 |
+
ious.append(iou)
|
| 82 |
+
ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image
|
| 83 |
+
return 100 * np.array(ious)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# --------------------------- BINARY LOSSES ---------------------------
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def lovasz_hinge(logits, labels, per_image=True, ignore=None):
|
| 90 |
+
"""
|
| 91 |
+
Binary Lovasz hinge loss
|
| 92 |
+
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
|
| 93 |
+
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
|
| 94 |
+
per_image: compute the loss per image instead of per batch
|
| 95 |
+
ignore: void class id
|
| 96 |
+
"""
|
| 97 |
+
if per_image:
|
| 98 |
+
loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
|
| 99 |
+
for log, lab in zip(logits, labels))
|
| 100 |
+
else:
|
| 101 |
+
loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
|
| 102 |
+
return loss
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def lovasz_hinge_flat(logits, labels):
|
| 106 |
+
"""
|
| 107 |
+
Binary Lovasz hinge loss
|
| 108 |
+
logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
|
| 109 |
+
labels: [P] Tensor, binary ground truth labels (0 or 1)
|
| 110 |
+
ignore: label to ignore
|
| 111 |
+
"""
|
| 112 |
+
if len(labels) == 0:
|
| 113 |
+
# only void pixels, the gradients should be 0
|
| 114 |
+
return logits.sum() * 0.
|
| 115 |
+
signs = 2. * labels.float() - 1.
|
| 116 |
+
errors = (1. - logits * Variable(signs))
|
| 117 |
+
errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
|
| 118 |
+
perm = perm.data
|
| 119 |
+
gt_sorted = labels[perm]
|
| 120 |
+
grad = lovasz_grad(gt_sorted)
|
| 121 |
+
loss = torch.dot(F.relu(errors_sorted), Variable(grad))
|
| 122 |
+
return loss
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def flatten_binary_scores(scores, labels, ignore=None):
|
| 126 |
+
"""
|
| 127 |
+
Flattens predictions in the batch (binary case)
|
| 128 |
+
Remove labels equal to 'ignore'
|
| 129 |
+
"""
|
| 130 |
+
scores = scores.view(-1)
|
| 131 |
+
labels = labels.view(-1)
|
| 132 |
+
if ignore is None:
|
| 133 |
+
return scores, labels
|
| 134 |
+
valid = (labels != ignore)
|
| 135 |
+
vscores = scores[valid]
|
| 136 |
+
vlabels = labels[valid]
|
| 137 |
+
return vscores, vlabels
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class StableBCELoss(torch.nn.modules.Module):
|
| 141 |
+
def __init__(self):
|
| 142 |
+
super(StableBCELoss, self).__init__()
|
| 143 |
+
|
| 144 |
+
def forward(self, input, target):
|
| 145 |
+
neg_abs = - input.abs()
|
| 146 |
+
loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
|
| 147 |
+
return loss.mean()
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def binary_xloss(logits, labels, ignore=None):
|
| 151 |
+
"""
|
| 152 |
+
Binary Cross entropy loss
|
| 153 |
+
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
|
| 154 |
+
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
|
| 155 |
+
ignore: void class id
|
| 156 |
+
"""
|
| 157 |
+
logits, labels = flatten_binary_scores(logits, labels, ignore)
|
| 158 |
+
loss = StableBCELoss()(logits, Variable(labels.float()))
|
| 159 |
+
return loss
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
# --------------------------- MULTICLASS LOSSES ---------------------------
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None):
|
| 166 |
+
"""
|
| 167 |
+
Multi-class Lovasz-Softmax loss
|
| 168 |
+
probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
|
| 169 |
+
Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
|
| 170 |
+
labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
|
| 171 |
+
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
|
| 172 |
+
per_image: compute the loss per image instead of per batch
|
| 173 |
+
ignore: void class labels
|
| 174 |
+
"""
|
| 175 |
+
if per_image:
|
| 176 |
+
loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted)
|
| 177 |
+
for prob, lab in zip(probas, labels))
|
| 178 |
+
else:
|
| 179 |
+
loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted )
|
| 180 |
+
return loss
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def lovasz_softmax_flat(probas, labels, classes='present', weighted=None):
|
| 184 |
+
"""
|
| 185 |
+
Multi-class Lovasz-Softmax loss
|
| 186 |
+
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
|
| 187 |
+
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
|
| 188 |
+
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
|
| 189 |
+
"""
|
| 190 |
+
if probas.numel() == 0:
|
| 191 |
+
# only void pixels, the gradients should be 0
|
| 192 |
+
return probas * 0.
|
| 193 |
+
C = probas.size(1)
|
| 194 |
+
losses = []
|
| 195 |
+
class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
|
| 196 |
+
for c in class_to_sum:
|
| 197 |
+
fg = (labels == c).float() # foreground for class c
|
| 198 |
+
if (classes is 'present' and fg.sum() == 0):
|
| 199 |
+
continue
|
| 200 |
+
if C == 1:
|
| 201 |
+
if len(classes) > 1:
|
| 202 |
+
raise ValueError('Sigmoid output possible only with 1 class')
|
| 203 |
+
class_pred = probas[:, 0]
|
| 204 |
+
else:
|
| 205 |
+
class_pred = probas[:, c]
|
| 206 |
+
errors = (Variable(fg) - class_pred).abs()
|
| 207 |
+
errors_sorted, perm = torch.sort(errors, 0, descending=True)
|
| 208 |
+
perm = perm.data
|
| 209 |
+
fg_sorted = fg[perm]
|
| 210 |
+
if weighted is not None:
|
| 211 |
+
losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
|
| 212 |
+
else:
|
| 213 |
+
losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
|
| 214 |
+
return mean(losses)
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def flatten_probas(probas, labels, ignore=None):
|
| 218 |
+
"""
|
| 219 |
+
Flattens predictions in the batch
|
| 220 |
+
"""
|
| 221 |
+
if probas.dim() == 3:
|
| 222 |
+
# assumes output of a sigmoid layer
|
| 223 |
+
B, H, W = probas.size()
|
| 224 |
+
probas = probas.view(B, 1, H, W)
|
| 225 |
+
B, C, H, W = probas.size()
|
| 226 |
+
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
| 227 |
+
labels = labels.view(-1)
|
| 228 |
+
if ignore is None:
|
| 229 |
+
return probas, labels
|
| 230 |
+
valid = (labels != ignore)
|
| 231 |
+
vprobas = probas[valid.nonzero().squeeze()]
|
| 232 |
+
vlabels = labels[valid]
|
| 233 |
+
return vprobas, vlabels
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def xloss(logits, labels, ignore=None):
|
| 237 |
+
"""
|
| 238 |
+
Cross entropy loss
|
| 239 |
+
"""
|
| 240 |
+
return F.cross_entropy(logits, Variable(labels), ignore_index=255)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# --------------------------- HELPER FUNCTIONS ---------------------------
|
| 244 |
+
def isnan(x):
|
| 245 |
+
return x != x
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def mean(l, ignore_nan=False, empty=0):
|
| 249 |
+
"""
|
| 250 |
+
nanmean compatible with generators.
|
| 251 |
+
"""
|
| 252 |
+
l = iter(l)
|
| 253 |
+
if ignore_nan:
|
| 254 |
+
l = ifilterfalse(isnan, l)
|
| 255 |
+
try:
|
| 256 |
+
n = 1
|
| 257 |
+
acc = next(l)
|
| 258 |
+
except StopIteration:
|
| 259 |
+
if empty == 'raise':
|
| 260 |
+
raise ValueError('Empty mean')
|
| 261 |
+
return empty
|
| 262 |
+
for n, v in enumerate(l, 2):
|
| 263 |
+
acc += v
|
| 264 |
+
if n == 1:
|
| 265 |
+
return acc
|
| 266 |
+
return acc / n
|
| 267 |
+
|
| 268 |
+
# --------------------------- Class ---------------------------
|
| 269 |
+
class LovaszSoftmax(nn.Module):
|
| 270 |
+
def __init__(self, per_image=False, ignore_index=255, weighted=None):
|
| 271 |
+
super(LovaszSoftmax, self).__init__()
|
| 272 |
+
self.lovasz_softmax = lovasz_softmax
|
| 273 |
+
self.per_image = per_image
|
| 274 |
+
self.ignore_index=ignore_index
|
| 275 |
+
self.weighted = weighted
|
| 276 |
+
|
| 277 |
+
def forward(self, pred, label):
|
| 278 |
+
pred = F.softmax(pred, dim=1)
|
| 279 |
+
return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted)
|
preprocess/humanparsing/utils/miou.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import os
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from collections import OrderedDict
|
| 6 |
+
from PIL import Image as PILImage
|
| 7 |
+
from utils.transforms import transform_parsing
|
| 8 |
+
|
| 9 |
+
LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \
|
| 10 |
+
'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg',
|
| 11 |
+
'Right-leg', 'Left-shoe', 'Right-shoe']
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
|
| 15 |
+
|
| 16 |
+
def get_palette(num_cls):
|
| 17 |
+
""" Returns the color map for visualizing the segmentation mask.
|
| 18 |
+
Args:
|
| 19 |
+
num_cls: Number of classes
|
| 20 |
+
Returns:
|
| 21 |
+
The color map
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
n = num_cls
|
| 25 |
+
palette = [0] * (n * 3)
|
| 26 |
+
for j in range(0, n):
|
| 27 |
+
lab = j
|
| 28 |
+
palette[j * 3 + 0] = 0
|
| 29 |
+
palette[j * 3 + 1] = 0
|
| 30 |
+
palette[j * 3 + 2] = 0
|
| 31 |
+
i = 0
|
| 32 |
+
while lab:
|
| 33 |
+
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
|
| 34 |
+
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
|
| 35 |
+
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
|
| 36 |
+
i += 1
|
| 37 |
+
lab >>= 3
|
| 38 |
+
return palette
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_confusion_matrix(gt_label, pred_label, num_classes):
|
| 42 |
+
"""
|
| 43 |
+
Calcute the confusion matrix by given label and pred
|
| 44 |
+
:param gt_label: the ground truth label
|
| 45 |
+
:param pred_label: the pred label
|
| 46 |
+
:param num_classes: the nunber of class
|
| 47 |
+
:return: the confusion matrix
|
| 48 |
+
"""
|
| 49 |
+
index = (gt_label * num_classes + pred_label).astype('int32')
|
| 50 |
+
label_count = np.bincount(index)
|
| 51 |
+
confusion_matrix = np.zeros((num_classes, num_classes))
|
| 52 |
+
|
| 53 |
+
for i_label in range(num_classes):
|
| 54 |
+
for i_pred_label in range(num_classes):
|
| 55 |
+
cur_index = i_label * num_classes + i_pred_label
|
| 56 |
+
if cur_index < len(label_count):
|
| 57 |
+
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
|
| 58 |
+
|
| 59 |
+
return confusion_matrix
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'):
|
| 63 |
+
val_file = os.path.join(datadir, dataset + '_id.txt')
|
| 64 |
+
val_id = [i_id.strip() for i_id in open(val_file)]
|
| 65 |
+
|
| 66 |
+
confusion_matrix = np.zeros((num_classes, num_classes))
|
| 67 |
+
|
| 68 |
+
for i, pred_out in enumerate(preds):
|
| 69 |
+
im_name = val_id[i]
|
| 70 |
+
gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png')
|
| 71 |
+
gt = np.array(PILImage.open(gt_path))
|
| 72 |
+
h, w = gt.shape
|
| 73 |
+
s = scales[i]
|
| 74 |
+
c = centers[i]
|
| 75 |
+
pred = transform_parsing(pred_out, c, s, w, h, input_size)
|
| 76 |
+
|
| 77 |
+
gt = np.asarray(gt, dtype=np.int32)
|
| 78 |
+
pred = np.asarray(pred, dtype=np.int32)
|
| 79 |
+
|
| 80 |
+
ignore_index = gt != 255
|
| 81 |
+
|
| 82 |
+
gt = gt[ignore_index]
|
| 83 |
+
pred = pred[ignore_index]
|
| 84 |
+
|
| 85 |
+
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
|
| 86 |
+
|
| 87 |
+
pos = confusion_matrix.sum(1)
|
| 88 |
+
res = confusion_matrix.sum(0)
|
| 89 |
+
tp = np.diag(confusion_matrix)
|
| 90 |
+
|
| 91 |
+
pixel_accuracy = (tp.sum() / pos.sum()) * 100
|
| 92 |
+
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
|
| 93 |
+
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
|
| 94 |
+
IoU_array = IoU_array * 100
|
| 95 |
+
mean_IoU = IoU_array.mean()
|
| 96 |
+
print('Pixel accuracy: %f \n' % pixel_accuracy)
|
| 97 |
+
print('Mean accuracy: %f \n' % mean_accuracy)
|
| 98 |
+
print('Mean IU: %f \n' % mean_IoU)
|
| 99 |
+
name_value = []
|
| 100 |
+
|
| 101 |
+
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
|
| 102 |
+
name_value.append((label, iou))
|
| 103 |
+
|
| 104 |
+
name_value.append(('Pixel accuracy', pixel_accuracy))
|
| 105 |
+
name_value.append(('Mean accuracy', mean_accuracy))
|
| 106 |
+
name_value.append(('Mean IU', mean_IoU))
|
| 107 |
+
name_value = OrderedDict(name_value)
|
| 108 |
+
return name_value
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'):
|
| 112 |
+
list_path = os.path.join(datadir, dataset + '_id.txt')
|
| 113 |
+
val_id = [i_id.strip() for i_id in open(list_path)]
|
| 114 |
+
|
| 115 |
+
confusion_matrix = np.zeros((num_classes, num_classes))
|
| 116 |
+
|
| 117 |
+
for i, im_name in enumerate(val_id):
|
| 118 |
+
gt_path = os.path.join(datadir, 'segmentations', im_name + '.png')
|
| 119 |
+
gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
|
| 120 |
+
|
| 121 |
+
pred_path = os.path.join(preds_dir, im_name + '.png')
|
| 122 |
+
pred = np.asarray(PILImage.open(pred_path))
|
| 123 |
+
|
| 124 |
+
gt = np.asarray(gt, dtype=np.int32)
|
| 125 |
+
pred = np.asarray(pred, dtype=np.int32)
|
| 126 |
+
|
| 127 |
+
ignore_index = gt != 255
|
| 128 |
+
|
| 129 |
+
gt = gt[ignore_index]
|
| 130 |
+
pred = pred[ignore_index]
|
| 131 |
+
|
| 132 |
+
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
|
| 133 |
+
|
| 134 |
+
pos = confusion_matrix.sum(1)
|
| 135 |
+
res = confusion_matrix.sum(0)
|
| 136 |
+
tp = np.diag(confusion_matrix)
|
| 137 |
+
|
| 138 |
+
pixel_accuracy = (tp.sum() / pos.sum()) * 100
|
| 139 |
+
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
|
| 140 |
+
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
|
| 141 |
+
IoU_array = IoU_array * 100
|
| 142 |
+
mean_IoU = IoU_array.mean()
|
| 143 |
+
print('Pixel accuracy: %f \n' % pixel_accuracy)
|
| 144 |
+
print('Mean accuracy: %f \n' % mean_accuracy)
|
| 145 |
+
print('Mean IU: %f \n' % mean_IoU)
|
| 146 |
+
name_value = []
|
| 147 |
+
|
| 148 |
+
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
|
| 149 |
+
name_value.append((label, iou))
|
| 150 |
+
|
| 151 |
+
name_value.append(('Pixel accuracy', pixel_accuracy))
|
| 152 |
+
name_value.append(('Mean accuracy', mean_accuracy))
|
| 153 |
+
name_value.append(('Mean IU', mean_IoU))
|
| 154 |
+
name_value = OrderedDict(name_value)
|
| 155 |
+
return name_value
|
preprocess/humanparsing/utils/schp.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : schp.py
|
| 8 |
+
@Time : 4/8/19 2:11 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import torch
|
| 16 |
+
import modules
|
| 17 |
+
|
| 18 |
+
def moving_average(net1, net2, alpha=1):
|
| 19 |
+
for param1, param2 in zip(net1.parameters(), net2.parameters()):
|
| 20 |
+
param1.data *= (1.0 - alpha)
|
| 21 |
+
param1.data += param2.data * alpha
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _check_bn(module, flag):
|
| 25 |
+
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
| 26 |
+
flag[0] = True
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def check_bn(model):
|
| 30 |
+
flag = [False]
|
| 31 |
+
model.apply(lambda module: _check_bn(module, flag))
|
| 32 |
+
return flag[0]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def reset_bn(module):
|
| 36 |
+
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
| 37 |
+
module.running_mean = torch.zeros_like(module.running_mean)
|
| 38 |
+
module.running_var = torch.ones_like(module.running_var)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _get_momenta(module, momenta):
|
| 42 |
+
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
| 43 |
+
momenta[module] = module.momentum
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _set_momenta(module, momenta):
|
| 47 |
+
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
| 48 |
+
module.momentum = momenta[module]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def bn_re_estimate(loader, model):
|
| 52 |
+
if not check_bn(model):
|
| 53 |
+
print('No batch norm layer detected')
|
| 54 |
+
return
|
| 55 |
+
model.train()
|
| 56 |
+
momenta = {}
|
| 57 |
+
model.apply(reset_bn)
|
| 58 |
+
model.apply(lambda module: _get_momenta(module, momenta))
|
| 59 |
+
n = 0
|
| 60 |
+
for i_iter, batch in enumerate(loader):
|
| 61 |
+
images, labels, _ = batch
|
| 62 |
+
b = images.data.size(0)
|
| 63 |
+
momentum = b / (n + b)
|
| 64 |
+
for module in momenta.keys():
|
| 65 |
+
module.momentum = momentum
|
| 66 |
+
model(images)
|
| 67 |
+
n += b
|
| 68 |
+
model.apply(lambda module: _set_momenta(module, momenta))
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'):
|
| 72 |
+
save_path = os.path.join(output_dir, filename)
|
| 73 |
+
if os.path.exists(save_path):
|
| 74 |
+
os.remove(save_path)
|
| 75 |
+
torch.save(states, save_path)
|
| 76 |
+
if is_best_parsing and 'state_dict' in states:
|
| 77 |
+
best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar')
|
| 78 |
+
if os.path.exists(best_save_path):
|
| 79 |
+
os.remove(best_save_path)
|
| 80 |
+
torch.save(states, best_save_path)
|
preprocess/humanparsing/utils/soft_dice_loss.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- encoding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
@Author : Peike Li
|
| 6 |
+
@Contact : peike.li@yahoo.com
|
| 7 |
+
@File : soft_dice_loss.py
|
| 8 |
+
@Time : 8/13/19 5:09 PM
|
| 9 |
+
@Desc :
|
| 10 |
+
@License : This source code is licensed under the license found in the
|
| 11 |
+
LICENSE file in the root directory of this source tree.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import print_function, division
|
| 15 |
+
|
| 16 |
+
import torch
|
| 17 |
+
import torch.nn.functional as F
|
| 18 |
+
from torch import nn
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
from itertools import ifilterfalse
|
| 22 |
+
except ImportError: # py3k
|
| 23 |
+
from itertools import filterfalse as ifilterfalse
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6):
|
| 27 |
+
'''
|
| 28 |
+
Tversky loss function.
|
| 29 |
+
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
|
| 30 |
+
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
|
| 31 |
+
|
| 32 |
+
Same as soft dice loss when alpha=beta=0.5.
|
| 33 |
+
Same as Jaccord loss when alpha=beta=1.0.
|
| 34 |
+
See `Tversky loss function for image segmentation using 3D fully convolutional deep networks`
|
| 35 |
+
https://arxiv.org/pdf/1706.05721.pdf
|
| 36 |
+
'''
|
| 37 |
+
C = probas.size(1)
|
| 38 |
+
losses = []
|
| 39 |
+
for c in list(range(C)):
|
| 40 |
+
fg = (labels == c).float()
|
| 41 |
+
if fg.sum() == 0:
|
| 42 |
+
continue
|
| 43 |
+
class_pred = probas[:, c]
|
| 44 |
+
p0 = class_pred
|
| 45 |
+
p1 = 1 - class_pred
|
| 46 |
+
g0 = fg
|
| 47 |
+
g1 = 1 - fg
|
| 48 |
+
numerator = torch.sum(p0 * g0)
|
| 49 |
+
denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0)
|
| 50 |
+
losses.append(1 - ((numerator) / (denominator + epsilon)))
|
| 51 |
+
return mean(losses)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def flatten_probas(probas, labels, ignore=255):
|
| 55 |
+
"""
|
| 56 |
+
Flattens predictions in the batch
|
| 57 |
+
"""
|
| 58 |
+
B, C, H, W = probas.size()
|
| 59 |
+
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
| 60 |
+
labels = labels.view(-1)
|
| 61 |
+
if ignore is None:
|
| 62 |
+
return probas, labels
|
| 63 |
+
valid = (labels != ignore)
|
| 64 |
+
vprobas = probas[valid.nonzero().squeeze()]
|
| 65 |
+
vlabels = labels[valid]
|
| 66 |
+
return vprobas, vlabels
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def isnan(x):
|
| 70 |
+
return x != x
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def mean(l, ignore_nan=False, empty=0):
|
| 74 |
+
"""
|
| 75 |
+
nanmean compatible with generators.
|
| 76 |
+
"""
|
| 77 |
+
l = iter(l)
|
| 78 |
+
if ignore_nan:
|
| 79 |
+
l = ifilterfalse(isnan, l)
|
| 80 |
+
try:
|
| 81 |
+
n = 1
|
| 82 |
+
acc = next(l)
|
| 83 |
+
except StopIteration:
|
| 84 |
+
if empty == 'raise':
|
| 85 |
+
raise ValueError('Empty mean')
|
| 86 |
+
return empty
|
| 87 |
+
for n, v in enumerate(l, 2):
|
| 88 |
+
acc += v
|
| 89 |
+
if n == 1:
|
| 90 |
+
return acc
|
| 91 |
+
return acc / n
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class SoftDiceLoss(nn.Module):
|
| 95 |
+
def __init__(self, ignore_index=255):
|
| 96 |
+
super(SoftDiceLoss, self).__init__()
|
| 97 |
+
self.ignore_index = ignore_index
|
| 98 |
+
|
| 99 |
+
def forward(self, pred, label):
|
| 100 |
+
pred = F.softmax(pred, dim=1)
|
| 101 |
+
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class SoftJaccordLoss(nn.Module):
|
| 105 |
+
def __init__(self, ignore_index=255):
|
| 106 |
+
super(SoftJaccordLoss, self).__init__()
|
| 107 |
+
self.ignore_index = ignore_index
|
| 108 |
+
|
| 109 |
+
def forward(self, pred, label):
|
| 110 |
+
pred = F.softmax(pred, dim=1)
|
| 111 |
+
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0)
|
preprocess/humanparsing/utils/transforms.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ------------------------------------------------------------------------------
|
| 2 |
+
# Copyright (c) Microsoft
|
| 3 |
+
# Licensed under the MIT License.
|
| 4 |
+
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
|
| 5 |
+
# ------------------------------------------------------------------------------
|
| 6 |
+
|
| 7 |
+
from __future__ import absolute_import
|
| 8 |
+
from __future__ import division
|
| 9 |
+
from __future__ import print_function
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import cv2
|
| 13 |
+
import torch
|
| 14 |
+
|
| 15 |
+
class BRG2Tensor_transform(object):
|
| 16 |
+
def __call__(self, pic):
|
| 17 |
+
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
| 18 |
+
if isinstance(img, torch.ByteTensor):
|
| 19 |
+
return img.float()
|
| 20 |
+
else:
|
| 21 |
+
return img
|
| 22 |
+
|
| 23 |
+
class BGR2RGB_transform(object):
|
| 24 |
+
def __call__(self, tensor):
|
| 25 |
+
return tensor[[2,1,0],:,:]
|
| 26 |
+
|
| 27 |
+
def flip_back(output_flipped, matched_parts):
|
| 28 |
+
'''
|
| 29 |
+
ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
|
| 30 |
+
'''
|
| 31 |
+
assert output_flipped.ndim == 4,\
|
| 32 |
+
'output_flipped should be [batch_size, num_joints, height, width]'
|
| 33 |
+
|
| 34 |
+
output_flipped = output_flipped[:, :, :, ::-1]
|
| 35 |
+
|
| 36 |
+
for pair in matched_parts:
|
| 37 |
+
tmp = output_flipped[:, pair[0], :, :].copy()
|
| 38 |
+
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
| 39 |
+
output_flipped[:, pair[1], :, :] = tmp
|
| 40 |
+
|
| 41 |
+
return output_flipped
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def fliplr_joints(joints, joints_vis, width, matched_parts):
|
| 45 |
+
"""
|
| 46 |
+
flip coords
|
| 47 |
+
"""
|
| 48 |
+
# Flip horizontal
|
| 49 |
+
joints[:, 0] = width - joints[:, 0] - 1
|
| 50 |
+
|
| 51 |
+
# Change left-right parts
|
| 52 |
+
for pair in matched_parts:
|
| 53 |
+
joints[pair[0], :], joints[pair[1], :] = \
|
| 54 |
+
joints[pair[1], :], joints[pair[0], :].copy()
|
| 55 |
+
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
|
| 56 |
+
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
|
| 57 |
+
|
| 58 |
+
return joints*joints_vis, joints_vis
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def transform_preds(coords, center, scale, input_size):
|
| 62 |
+
target_coords = np.zeros(coords.shape)
|
| 63 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
| 64 |
+
for p in range(coords.shape[0]):
|
| 65 |
+
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
|
| 66 |
+
return target_coords
|
| 67 |
+
|
| 68 |
+
def transform_parsing(pred, center, scale, width, height, input_size):
|
| 69 |
+
|
| 70 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
| 71 |
+
target_pred = cv2.warpAffine(
|
| 72 |
+
pred,
|
| 73 |
+
trans,
|
| 74 |
+
(int(width), int(height)), #(int(width), int(height)),
|
| 75 |
+
flags=cv2.INTER_NEAREST,
|
| 76 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 77 |
+
borderValue=(0))
|
| 78 |
+
|
| 79 |
+
return target_pred
|
| 80 |
+
|
| 81 |
+
def transform_logits(logits, center, scale, width, height, input_size):
|
| 82 |
+
|
| 83 |
+
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
| 84 |
+
channel = logits.shape[2]
|
| 85 |
+
target_logits = []
|
| 86 |
+
for i in range(channel):
|
| 87 |
+
target_logit = cv2.warpAffine(
|
| 88 |
+
logits[:,:,i],
|
| 89 |
+
trans,
|
| 90 |
+
(int(width), int(height)), #(int(width), int(height)),
|
| 91 |
+
flags=cv2.INTER_LINEAR,
|
| 92 |
+
borderMode=cv2.BORDER_CONSTANT,
|
| 93 |
+
borderValue=(0))
|
| 94 |
+
target_logits.append(target_logit)
|
| 95 |
+
target_logits = np.stack(target_logits,axis=2)
|
| 96 |
+
|
| 97 |
+
return target_logits
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def get_affine_transform(center,
|
| 101 |
+
scale,
|
| 102 |
+
rot,
|
| 103 |
+
output_size,
|
| 104 |
+
shift=np.array([0, 0], dtype=np.float32),
|
| 105 |
+
inv=0):
|
| 106 |
+
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
|
| 107 |
+
print(scale)
|
| 108 |
+
scale = np.array([scale, scale])
|
| 109 |
+
|
| 110 |
+
scale_tmp = scale
|
| 111 |
+
|
| 112 |
+
src_w = scale_tmp[0]
|
| 113 |
+
dst_w = output_size[1]
|
| 114 |
+
dst_h = output_size[0]
|
| 115 |
+
|
| 116 |
+
rot_rad = np.pi * rot / 180
|
| 117 |
+
src_dir = get_dir([0, src_w * -0.5], rot_rad)
|
| 118 |
+
dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
|
| 119 |
+
|
| 120 |
+
src = np.zeros((3, 2), dtype=np.float32)
|
| 121 |
+
dst = np.zeros((3, 2), dtype=np.float32)
|
| 122 |
+
src[0, :] = center + scale_tmp * shift
|
| 123 |
+
src[1, :] = center + src_dir + scale_tmp * shift
|
| 124 |
+
dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
|
| 125 |
+
dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
|
| 126 |
+
|
| 127 |
+
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
| 128 |
+
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
| 129 |
+
|
| 130 |
+
if inv:
|
| 131 |
+
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
| 132 |
+
else:
|
| 133 |
+
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
| 134 |
+
|
| 135 |
+
return trans
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def affine_transform(pt, t):
|
| 139 |
+
new_pt = np.array([pt[0], pt[1], 1.]).T
|
| 140 |
+
new_pt = np.dot(t, new_pt)
|
| 141 |
+
return new_pt[:2]
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def get_3rd_point(a, b):
|
| 145 |
+
direct = a - b
|
| 146 |
+
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def get_dir(src_point, rot_rad):
|
| 150 |
+
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
| 151 |
+
|
| 152 |
+
src_result = [0, 0]
|
| 153 |
+
src_result[0] = src_point[0] * cs - src_point[1] * sn
|
| 154 |
+
src_result[1] = src_point[0] * sn + src_point[1] * cs
|
| 155 |
+
|
| 156 |
+
return src_result
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def crop(img, center, scale, output_size, rot=0):
|
| 160 |
+
trans = get_affine_transform(center, scale, rot, output_size)
|
| 161 |
+
|
| 162 |
+
dst_img = cv2.warpAffine(img,
|
| 163 |
+
trans,
|
| 164 |
+
(int(output_size[1]), int(output_size[0])),
|
| 165 |
+
flags=cv2.INTER_LINEAR)
|
| 166 |
+
|
| 167 |
+
return dst_img
|