Spaces:
Runtime error
Runtime error
加注释
Browse files- .gitignore +2 -1
- app.py +43 -19
.gitignore
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
*.pyc
|
|
|
|
|
|
| 1 |
+
*.pyc
|
| 2 |
+
.idea
|
app.py
CHANGED
|
@@ -124,16 +124,31 @@ pipe = TryonPipeline.from_pretrained(
|
|
| 124 |
pipe.unet_encoder = UNet_Encoder
|
| 125 |
|
| 126 |
@spaces.GPU
|
| 127 |
-
def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_steps,seed,category):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
device = "cuda"
|
| 129 |
-
|
| 130 |
openpose_model.preprocessor.body_estimation.model.to(device)
|
| 131 |
pipe.to(device)
|
| 132 |
pipe.unet_encoder.to(device)
|
| 133 |
|
| 134 |
-
|
|
|
|
| 135 |
human_img_orig = dict["background"].convert("RGB")
|
| 136 |
|
|
|
|
| 137 |
if is_checked_crop:
|
| 138 |
width, height = human_img_orig.size
|
| 139 |
target_width = int(min(width, height * (3 / 4)))
|
|
@@ -148,38 +163,44 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
| 148 |
else:
|
| 149 |
human_img = human_img_orig.resize((768,1024))
|
| 150 |
|
| 151 |
-
|
| 152 |
if is_checked:
|
|
|
|
|
|
|
| 153 |
keypoints = openpose_model(human_img.resize((384,512)))
|
|
|
|
| 154 |
model_parse, _ = parsing_model(human_img.resize((384,512)))
|
|
|
|
| 155 |
mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
|
| 156 |
mask = mask.resize((768,1024))
|
| 157 |
else:
|
|
|
|
| 158 |
mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
|
| 159 |
-
|
| 160 |
-
|
| 161 |
mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
|
| 162 |
mask_gray = to_pil_image((mask_gray+1.0)/2.0)
|
| 163 |
|
| 164 |
-
|
|
|
|
| 165 |
human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
|
| 166 |
human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
|
| 171 |
-
# verbosity = getattr(args, "verbosity", None)
|
| 172 |
pose_img = args.func(args,human_img_arg)
|
| 173 |
pose_img = pose_img[:,:,::-1]
|
| 174 |
pose_img = Image.fromarray(pose_img).resize((768,1024))
|
| 175 |
|
|
|
|
| 176 |
with torch.no_grad():
|
| 177 |
-
# Extract the images
|
| 178 |
with torch.cuda.amp.autocast():
|
| 179 |
with torch.no_grad():
|
|
|
|
| 180 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
|
| 181 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
| 182 |
with torch.inference_mode():
|
|
|
|
| 183 |
(
|
| 184 |
prompt_embeds,
|
| 185 |
negative_prompt_embeds,
|
|
@@ -192,6 +213,7 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
| 192 |
negative_prompt=negative_prompt,
|
| 193 |
)
|
| 194 |
|
|
|
|
| 195 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
|
| 196 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
| 197 |
if not isinstance(prompt, List):
|
|
@@ -211,11 +233,12 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
| 211 |
negative_prompt=negative_prompt,
|
| 212 |
)
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
|
| 218 |
generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
|
|
|
|
|
|
|
| 219 |
images = pipe(
|
| 220 |
prompt_embeds=prompt_embeds.to(device,torch.float16),
|
| 221 |
negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
|
|
@@ -223,18 +246,19 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
| 223 |
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
|
| 224 |
num_inference_steps=denoise_steps,
|
| 225 |
generator=generator,
|
| 226 |
-
strength
|
| 227 |
-
pose_img
|
| 228 |
text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
|
| 229 |
-
cloth
|
| 230 |
mask_image=mask,
|
| 231 |
image=human_img,
|
| 232 |
height=1024,
|
| 233 |
width=768,
|
| 234 |
-
ip_adapter_image
|
| 235 |
guidance_scale=2.0,
|
| 236 |
)[0]
|
| 237 |
|
|
|
|
| 238 |
if is_checked_crop:
|
| 239 |
out_img = images[0].resize(crop_size)
|
| 240 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
|
|
| 124 |
pipe.unet_encoder = UNet_Encoder
|
| 125 |
|
| 126 |
@spaces.GPU
|
| 127 |
+
def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed, category):
|
| 128 |
+
"""虚拟试衣主函数
|
| 129 |
+
Args:
|
| 130 |
+
dict: 输入图像字典,包含背景和图层信息
|
| 131 |
+
garm_img: 服装图片
|
| 132 |
+
garment_des: 服装描述文本
|
| 133 |
+
is_checked: 是否启用自动检测模式
|
| 134 |
+
is_checked_crop: 是否启用图像裁剪
|
| 135 |
+
denoise_steps: 去噪步数
|
| 136 |
+
seed: 随机种子
|
| 137 |
+
category: 服装类别
|
| 138 |
+
Returns:
|
| 139 |
+
生成的试衣结果图像和灰度遮罩
|
| 140 |
+
"""
|
| 141 |
+
# 1. 初始化和设备设置 - 使用GPU进行处理
|
| 142 |
device = "cuda"
|
|
|
|
| 143 |
openpose_model.preprocessor.body_estimation.model.to(device)
|
| 144 |
pipe.to(device)
|
| 145 |
pipe.unet_encoder.to(device)
|
| 146 |
|
| 147 |
+
# 2. 图像预处理 - 调整服装和人物图像大小
|
| 148 |
+
garm_img = garm_img.convert("RGB").resize((768,1024))
|
| 149 |
human_img_orig = dict["background"].convert("RGB")
|
| 150 |
|
| 151 |
+
# 2.1 如果启用裁剪,按3:4比例裁剪人物图像
|
| 152 |
if is_checked_crop:
|
| 153 |
width, height = human_img_orig.size
|
| 154 |
target_width = int(min(width, height * (3 / 4)))
|
|
|
|
| 163 |
else:
|
| 164 |
human_img = human_img_orig.resize((768,1024))
|
| 165 |
|
| 166 |
+
# 3. 生成遮罩
|
| 167 |
if is_checked:
|
| 168 |
+
# 3.1 使用自动检测模式
|
| 169 |
+
# 使用OpenPose检测人体关键点
|
| 170 |
keypoints = openpose_model(human_img.resize((384,512)))
|
| 171 |
+
# 使用解析模型生成人体部位解析
|
| 172 |
model_parse, _ = parsing_model(human_img.resize((384,512)))
|
| 173 |
+
# 根据类别和关键点生成遮罩
|
| 174 |
mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
|
| 175 |
mask = mask.resize((768,1024))
|
| 176 |
else:
|
| 177 |
+
# 3.2 使用手动提供的遮罩
|
| 178 |
mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
|
| 179 |
+
|
| 180 |
+
# 3.3 生成灰度遮罩
|
| 181 |
mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
|
| 182 |
mask_gray = to_pil_image((mask_gray+1.0)/2.0)
|
| 183 |
|
| 184 |
+
# 4. 姿态处理
|
| 185 |
+
# 4.1 调整图像方向并转换格式
|
| 186 |
human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
|
| 187 |
human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
|
| 188 |
|
| 189 |
+
# 4.2 使用DensePose生成姿态信息
|
|
|
|
| 190 |
args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
|
|
|
|
| 191 |
pose_img = args.func(args,human_img_arg)
|
| 192 |
pose_img = pose_img[:,:,::-1]
|
| 193 |
pose_img = Image.fromarray(pose_img).resize((768,1024))
|
| 194 |
|
| 195 |
+
# 5. AI生成过程
|
| 196 |
with torch.no_grad():
|
|
|
|
| 197 |
with torch.cuda.amp.autocast():
|
| 198 |
with torch.no_grad():
|
| 199 |
+
# 5.1 生成正面提示词嵌入
|
| 200 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
|
| 201 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
| 202 |
with torch.inference_mode():
|
| 203 |
+
# 编码提示词
|
| 204 |
(
|
| 205 |
prompt_embeds,
|
| 206 |
negative_prompt_embeds,
|
|
|
|
| 213 |
negative_prompt=negative_prompt,
|
| 214 |
)
|
| 215 |
|
| 216 |
+
# 5.2 生成服装相关的提示词嵌入
|
| 217 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
|
| 218 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
| 219 |
if not isinstance(prompt, List):
|
|
|
|
| 233 |
negative_prompt=negative_prompt,
|
| 234 |
)
|
| 235 |
|
| 236 |
+
# 5.3 准备输入张量
|
| 237 |
+
pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
|
| 238 |
+
garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
|
|
|
|
| 239 |
generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
|
| 240 |
+
|
| 241 |
+
# 6. 使用Stable Diffusion XL管道生成图像
|
| 242 |
images = pipe(
|
| 243 |
prompt_embeds=prompt_embeds.to(device,torch.float16),
|
| 244 |
negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
|
|
|
|
| 246 |
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
|
| 247 |
num_inference_steps=denoise_steps,
|
| 248 |
generator=generator,
|
| 249 |
+
strength=1.0,
|
| 250 |
+
pose_img=pose_img.to(device,torch.float16),
|
| 251 |
text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
|
| 252 |
+
cloth=garm_tensor.to(device,torch.float16),
|
| 253 |
mask_image=mask,
|
| 254 |
image=human_img,
|
| 255 |
height=1024,
|
| 256 |
width=768,
|
| 257 |
+
ip_adapter_image=garm_img.resize((768,1024)),
|
| 258 |
guidance_scale=2.0,
|
| 259 |
)[0]
|
| 260 |
|
| 261 |
+
# 7. 后处理 - 处理裁剪情况并返回结果
|
| 262 |
if is_checked_crop:
|
| 263 |
out_img = images[0].resize(crop_size)
|
| 264 |
human_img_orig.paste(out_img, (int(left), int(top)))
|