venbab commited on
Commit
2411f32
·
verified ·
1 Parent(s): e2172de

Create preprocess_onepair.py

Browse files
Files changed (1) hide show
  1. preprocess_onepair.py +136 -0
preprocess_onepair.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # preprocess_onepair.py
2
+ # Build a one-pair VITON-like dataset from a user photo + garment.
3
+ # Creates: /<root>/test/{image,cloth,edge,image-parse,pose,warp_feat} + pairs.txt
4
+ #
5
+ # NOTES:
6
+ # - Cloth edge is made with rembg (background removal).
7
+ # - Human mask uses MediaPipe Selfie Segmentation (coarse but works).
8
+ # - pose, image-parse, warp_feat are stubbed for now (zeros/placeholders).
9
+ # You can replace them later with SCHP/OpenPose/PF-AFN for higher quality.
10
+
11
+ from __future__ import annotations
12
+ import os
13
+ import io
14
+ import shutil
15
+ from pathlib import Path
16
+ from typing import Optional, Tuple
17
+
18
+ import numpy as np
19
+ from PIL import Image
20
+ import cv2
21
+
22
+ # Background removal for cloth
23
+ from rembg import remove
24
+
25
+ # Coarse human mask (person vs background)
26
+ import mediapipe as mp
27
+ _mp_seg = mp.solutions.selfie_segmentation.SelfieSegmentation(model_selection=1)
28
+
29
+
30
+ def _to_pil(img) -> Image.Image:
31
+ if isinstance(img, Image.Image):
32
+ return img
33
+ if isinstance(img, (str, os.PathLike)):
34
+ return Image.open(img).convert("RGB")
35
+ if isinstance(img, bytes):
36
+ return Image.open(io.BytesIO(img)).convert("RGB")
37
+ raise TypeError("Unsupported image type")
38
+
39
+
40
+ def _resize_pad(im: Image.Image, size: Tuple[int, int]=(512, 512)) -> Image.Image:
41
+ """Letterbox to target size, keeping aspect, filled with white."""
42
+ w, h = im.size
43
+ tw, th = size
44
+ scale = min(tw / w, th / h)
45
+ nw, nh = int(w * scale), int(h * scale)
46
+ im2 = im.resize((nw, nh), Image.BICUBIC)
47
+ canvas = Image.new("RGB", (tw, th), (255, 255, 255))
48
+ canvas.paste(im2, ((tw - nw)//2, (th - nh)//2))
49
+ return canvas
50
+
51
+
52
+ def _cloth_edge(garment_rgb: Image.Image) -> Image.Image:
53
+ """Remove background → alpha → binary edge (white cloth on black)."""
54
+ arr = np.array(garment_rgb)
55
+ cut = remove(arr) # RGBA
56
+ if cut.shape[2] == 4:
57
+ alpha = cut[:, :, 3]
58
+ else:
59
+ alpha = np.ones(arr.shape[:2], dtype=np.uint8) * 255
60
+
61
+ # Edge is "white where cloth exists"
62
+ edge = np.zeros_like(alpha, dtype=np.uint8)
63
+ edge[alpha > 10] = 255
64
+ return Image.fromarray(edge)
65
+
66
+
67
+ def _human_mask(human_rgb: Image.Image) -> Image.Image:
68
+ """Coarse person mask via MediaPipe (returns white=person, black=bg)."""
69
+ arr = np.array(human_rgb)
70
+ rgb = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
71
+ rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
72
+ res = _mp_seg.process(rgb)
73
+ mask = (res.segmentation_mask > 0.5).astype(np.uint8) * 255
74
+ return Image.fromarray(mask)
75
+
76
+
77
+ def build_temp_dataset(
78
+ person_img,
79
+ garment_img,
80
+ root: Path | str
81
+ ) -> str:
82
+ """
83
+ Build one-pair dataset under <root>/test and return dataroot as string.
84
+ """
85
+ root = Path(root)
86
+ test_root = root / "test"
87
+
88
+ # Clean slate
89
+ if test_root.exists():
90
+ shutil.rmtree(test_root)
91
+ (test_root / "image").mkdir(parents=True, exist_ok=True)
92
+ (test_root / "cloth").mkdir(parents=True, exist_ok=True)
93
+ (test_root / "edge").mkdir(parents=True, exist_ok=True)
94
+ (test_root / "image-parse").mkdir(parents=True, exist_ok=True)
95
+ (test_root / "pose").mkdir(parents=True, exist_ok=True)
96
+ (test_root / "warp_feat").mkdir(parents=True, exist_ok=True)
97
+
98
+ # Normalize to 512×512 canvases
99
+ person_pil = _resize_pad(_to_pil(person_img), (512, 512))
100
+ garment_pil = _resize_pad(_to_pil(garment_img), (512, 512))
101
+
102
+ # Save base images
103
+ person_name = "user_0001.jpg"
104
+ cloth_name = "cloth_0001.jpg"
105
+ (test_root / "image" / person_name).parent.mkdir(parents=True, exist_ok=True)
106
+ (test_root / "cloth" / cloth_name).parent.mkdir(parents=True, exist_ok=True)
107
+
108
+ person_pil.save(test_root / "image" / person_name, quality=95)
109
+ garment_pil.save(test_root / "cloth" / cloth_name, quality=95)
110
+
111
+ # Cloth edge (white cloth mask on black)
112
+ edge_pil = _cloth_edge(garment_pil)
113
+ edge_pil = edge_pil.convert("L").resize((512, 512), Image.NEAREST)
114
+ edge_pil.save(test_root / "edge" / cloth_name.replace(".jpg", ".png"))
115
+
116
+ # Human parse (stub = coarse person mask)
117
+ # Most CP datasets expect a PNG with label IDs; we give a soft body mask
118
+ # where torso is expected to be > 0; downstream code will use it as inpaint mask.
119
+ parse_pil = _human_mask(person_pil).convert("L")
120
+ parse_pil.save(test_root / "image-parse" / person_name.replace(".jpg", ".png"))
121
+
122
+ # Pose (stub): create empty JSON-like signal as a png (many repos read JSON;
123
+ # your CPDataset may not require pose if inpaint mask is present)
124
+ dummy_pose = np.zeros((512, 512, 3), dtype=np.uint8)
125
+ Image.fromarray(dummy_pose).save(test_root / "pose" / person_name.replace(".jpg", "_keypoints.png"))
126
+
127
+ # Warp feat (stub): Some loaders expect precomputed warped feat tensor images.
128
+ # We drop a zero tile; if your CPDataset ignores it at test time, fine.
129
+ wfeat = np.zeros((256, 256, 3), dtype=np.uint8)
130
+ Image.fromarray(wfeat).save(test_root / "warp_feat" / f"{person_name[:-4]}_{cloth_name[:-4]}.png")
131
+
132
+ # pairs.txt
133
+ with open(test_root / "pairs.txt", "w") as f:
134
+ f.write(f"{person_name} {cloth_name}\n")
135
+
136
+ return str(root)