|
|
|
|
|
import os |
|
|
import io |
|
|
import math |
|
|
from typing import Tuple, Dict, Any |
|
|
from PIL import Image, ImageOps |
|
|
import numpy as np |
|
|
|
|
|
import torch |
|
|
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler |
|
|
from transformers import logging as hf_logging |
|
|
hf_logging.set_verbosity_error() |
|
|
|
|
|
|
|
|
from controlnet_aux import OpenposeDetector |
|
|
|
|
|
|
|
|
from rembg import remove |
|
|
|
|
|
|
|
|
MODEL_ID = "runwayml/stable-diffusion-v1-5" |
|
|
CONTROLNET_ID = "lllyasviel/sd-controlnet-openpose" |
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
_PIPELINE = None |
|
|
_OP_DETECTOR = None |
|
|
|
|
|
def get_openpose_detector(): |
|
|
global _OP_DETECTOR |
|
|
if _OP_DETECTOR is None: |
|
|
_OP_DETECTOR = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") |
|
|
return _OP_DETECTOR |
|
|
|
|
|
def load_pipeline(): |
|
|
""" |
|
|
Carrega o pipeline ControlNet + Stable Diffusion (com half precision quando possível). |
|
|
""" |
|
|
global _PIPELINE |
|
|
if _PIPELINE is not None: |
|
|
return _PIPELINE |
|
|
|
|
|
|
|
|
controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32) |
|
|
|
|
|
pipe = StableDiffusionControlNetPipeline.from_pretrained( |
|
|
MODEL_ID, |
|
|
controlnet=controlnet, |
|
|
safety_checker=None, |
|
|
torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32, |
|
|
) |
|
|
|
|
|
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) |
|
|
if DEVICE == "cuda": |
|
|
pipe.enable_attention_slicing() |
|
|
pipe.to("cuda") |
|
|
else: |
|
|
pipe.to("cpu") |
|
|
|
|
|
|
|
|
_PIPELINE = pipe |
|
|
return _PIPELINE |
|
|
|
|
|
def remove_background(pil_img: Image.Image) -> Image.Image: |
|
|
""" |
|
|
Remove fundo da imagem da peça usando rembg (retorna RGBA com alpha). |
|
|
""" |
|
|
|
|
|
img_bytes = io.BytesIO() |
|
|
pil_img.convert("RGBA").save(img_bytes, format="PNG") |
|
|
img_bytes = img_bytes.getvalue() |
|
|
out = remove(img_bytes) |
|
|
|
|
|
out_img = Image.open(io.BytesIO(out)).convert("RGBA") |
|
|
return out_img |
|
|
|
|
|
def simple_align_garment_to_model(model_img: Image.Image, garment_rgba: Image.Image, pose_keypoints=None) -> Image.Image: |
|
|
""" |
|
|
Faz um alinhamento simples: escala a peça pela distância entre ombros (estimada) |
|
|
e cola-a sobre a modelo aproximadamente no torso. Retorna imagem RGBA (com a modelo). |
|
|
Isso é só a iniciação — o SD+ControlNet fará o refinamento. |
|
|
""" |
|
|
model = model_img.convert("RGBA") |
|
|
g = garment_rgba |
|
|
|
|
|
Wm, Hm = model.size |
|
|
Wg, Hg = g.size |
|
|
|
|
|
|
|
|
if pose_keypoints is None: |
|
|
|
|
|
target_w = int(Wm * 0.5) |
|
|
scale = target_w / Wg |
|
|
new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
|
|
g_resized = g.resize(new_size, resample=Image.LANCZOS) |
|
|
pos = ((Wm - new_size[0]) // 2, int(Hm * 0.28)) |
|
|
canvas = model.copy() |
|
|
canvas.paste(g_resized, pos, g_resized) |
|
|
return canvas |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
ls = pose_keypoints.get("left_shoulder") |
|
|
rs = pose_keypoints.get("right_shoulder") |
|
|
if ls and rs: |
|
|
shoulder_dist = math.hypot(rs[0]-ls[0], rs[1]-ls[1]) |
|
|
|
|
|
target_w = int(shoulder_dist * 1.4) |
|
|
scale = max(0.1, target_w / Wg) |
|
|
new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
|
|
g_resized = g.resize(new_size, resample=Image.LANCZOS) |
|
|
|
|
|
center_x = int((ls[0] + rs[0]) / 2) |
|
|
top_y = int((ls[1] + rs[1]) / 1.8) |
|
|
pos = (max(0, center_x - new_size[0]//2), max(0, top_y - new_size[1]//6)) |
|
|
canvas = model.copy() |
|
|
canvas.paste(g_resized, pos, g_resized) |
|
|
return canvas |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
return simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=None) |
|
|
|
|
|
def extract_pose_and_keypoints(model_img: Image.Image) -> Tuple[Image.Image, Dict[str, Tuple[int,int]]]: |
|
|
""" |
|
|
Usa controlnet_aux.OpenposeDetector para gerar a pose map (imagem) e tenta retornar |
|
|
keypoints úteis (ombros). keypoints dict = {"left_shoulder":(x,y), ...} |
|
|
""" |
|
|
detector = get_openpose_detector() |
|
|
try: |
|
|
|
|
|
pose_image = detector(model_img) |
|
|
pose_image = pose_image.convert("RGB") |
|
|
|
|
|
|
|
|
keypoints = {} |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
pass |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return pose_image, keypoints |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
blank = Image.new("RGB", model_img.size, (255,255,255)) |
|
|
return blank, {} |
|
|
|
|
|
def run_pipeline(model_image: Image.Image, garment_image: Image.Image, prompt_extra: str = "") -> Tuple[Image.Image, Dict[str,Any]]: |
|
|
""" |
|
|
Função principal que: |
|
|
1) extrai pose (pose_map) |
|
|
2) remove fundo da peça (garment) e alinha simplisticamente |
|
|
3) monta uma imagem inicial (init_image) com a peça sobre a modelo (RGBA) |
|
|
4) chama Stable Diffusion + ControlNet (image2image) usando pose_map como conditioning image |
|
|
Retorna: pil_image_result, info_dict |
|
|
""" |
|
|
|
|
|
max_side = 768 |
|
|
model_img = model_image.convert("RGB") |
|
|
W, H = model_img.size |
|
|
scale = max_side / max(W, H) if max(W, H) > max_side else 1.0 |
|
|
if scale != 1.0: |
|
|
model_img = model_img.resize((int(W*scale), int(H*scale)), Image.LANCZOS) |
|
|
|
|
|
|
|
|
garment_rgba = remove_background(garment_image) |
|
|
|
|
|
|
|
|
pose_map, keypoints = extract_pose_and_keypoints(model_img) |
|
|
|
|
|
|
|
|
init_composite = simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=keypoints) |
|
|
|
|
|
|
|
|
pipe = load_pipeline() |
|
|
|
|
|
|
|
|
prompt = ("photo-realistic fashion try-on, ultra detailed, high resolution, realistic lighting. " |
|
|
+ (prompt_extra or "garment applied on person, preserve texture and zippers, realistic folds.")) |
|
|
|
|
|
|
|
|
init_image = init_composite.convert("RGB") |
|
|
control_image = pose_map.convert("RGB") |
|
|
|
|
|
|
|
|
num_inference_steps = 20 |
|
|
guidance_scale = 7.5 |
|
|
strength = 0.75 |
|
|
|
|
|
|
|
|
generator = torch.Generator(device=DEVICE).manual_seed(torch.randint(0, 2**31 - 1, (1,)).item()) |
|
|
|
|
|
|
|
|
|
|
|
device = DEVICE |
|
|
pipe.to(device) |
|
|
|
|
|
try: |
|
|
|
|
|
with torch.autocast(device_type="cuda") if device == "cuda" else torch.cpu.amp.autocast(enabled=False): |
|
|
out = pipe( |
|
|
prompt=prompt, |
|
|
image=init_image, |
|
|
control_image=control_image, |
|
|
num_inference_steps=num_inference_steps, |
|
|
guidance_scale=guidance_scale, |
|
|
strength=strength, |
|
|
generator=generator |
|
|
) |
|
|
|
|
|
result_img = out.images[0] |
|
|
except TypeError: |
|
|
|
|
|
out = pipe( |
|
|
prompt=prompt, |
|
|
init_image=init_image, |
|
|
controlnet_conditioning_image=control_image, |
|
|
num_inference_steps=num_inference_steps, |
|
|
guidance_scale=guidance_scale, |
|
|
strength=strength, |
|
|
generator=generator |
|
|
) |
|
|
result_img = out.images[0] |
|
|
|
|
|
info = { |
|
|
"model_id": MODEL_ID, |
|
|
"controlnet_id": CONTROLNET_ID, |
|
|
"steps": num_inference_steps, |
|
|
"guidance_scale": guidance_scale, |
|
|
"strength": strength |
|
|
} |
|
|
return result_img, info |