Spaces:

Allex21
/

Fash

Build error

App Files Files Community

Allex21 commited on Sep 20, 2025

Commit

9bbc908

verified ·

1 Parent(s): e3f6801

Create utils/editor.py

Browse files

Files changed (1) hide show

utils/editor.py +258 -0

utils/editor.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# utils/editor.py
+import os
+import io
+import math
+from typing import Tuple, Dict, Any
+from PIL import Image, ImageOps
+import numpy as np
+import torch
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
+from transformers import logging as hf_logging
+hf_logging.set_verbosity_error()
+# detector auxiliar para gerar mapa de pose OpenPose-like
+from controlnet_aux import OpenposeDetector
+# para remoção de fundo da peça (extrair RGBA)
+from rembg import remove
+# parâmetros padrão (você pode ajustar)
+MODEL_ID = "runwayml/stable-diffusion-v1-5"  # base SD v1.5
+CONTROLNET_ID = "lllyasviel/sd-controlnet-openpose"  # controlnet openpose
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# pipeline cache globals
+_PIPELINE = None
+_OP_DETECTOR = None
+def get_openpose_detector():
+    global _OP_DETECTOR
+    if _OP_DETECTOR is None:
+        _OP_DETECTOR = OpenposeDetector()
+    return _OP_DETECTOR
+def load_pipeline():
+    """
+    Carrega o pipeline ControlNet + Stable Diffusion (com half precision quando possível).
+    """
+    global _PIPELINE
+    if _PIPELINE is not None:
+        return _PIPELINE
+    # Carregar ControlNet
+    controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32)
+    # Carregar pipeline SD + ControlNet
+    pipe = StableDiffusionControlNetPipeline.from_pretrained(
+        MODEL_ID,
+        controlnet=controlnet,
+        safety_checker=None,
+        torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32,
+    )
+    # usar UniPC scheduler — melhora velocidade/qualidade
+    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+    if DEVICE == "cuda":
+        pipe.enable_attention_slicing()  # economiza VRAM
+        pipe.to("cuda")
+    else:
+        pipe.to("cpu")
+    # reduzir torch_autocast config handled later in inference
+    _PIPELINE = pipe
+    return _PIPELINE
+def remove_background(pil_img: Image.Image) -> Image.Image:
+    """
+    Remove fundo da imagem da peça usando rembg (retorna RGBA com alpha).
+    """
+    # rembg expects bytes
+    img_bytes = io.BytesIO()
+    pil_img.convert("RGBA").save(img_bytes, format="PNG")
+    img_bytes = img_bytes.getvalue()
+    out = remove(img_bytes)
+    # out is bytes of PNG with alpha
+    out_img = Image.open(io.BytesIO(out)).convert("RGBA")
+    return out_img
+def simple_align_garment_to_model(model_img: Image.Image, garment_rgba: Image.Image, pose_keypoints=None) -> Image.Image:
+    """
+    Faz um alinhamento simples: escala a peça pela distância entre ombros (estimada)
+    e cola-a sobre a modelo aproximadamente no torso. Retorna imagem RGBA (com a modelo).
+    Isso é só a iniciação — o SD+ControlNet fará o refinamento.
+    """
+    model = model_img.convert("RGBA")
+    g = garment_rgba
+    Wm, Hm = model.size
+    Wg, Hg = g.size
+    # fallback: centragem se não houver keypoints
+    if pose_keypoints is None:
+        # escala para metade da largura do modelo
+        target_w = int(Wm * 0.5)
+        scale = target_w / Wg
+        new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale)))
+        g_resized = g.resize(new_size, resample=Image.LANCZOS)
+        pos = ((Wm - new_size[0]) // 2, int(Hm * 0.28))  # 28% from top as rough torso position
+        canvas = model.copy()
+        canvas.paste(g_resized, pos, g_resized)
+        return canvas
+    # se houver keypoints, tentamos usar ombros para dimensionar
+    try:
+        # keypoints: dict with names->(x,y) in pixel coords (as returned below)
+        ls = pose_keypoints.get("left_shoulder")
+        rs = pose_keypoints.get("right_shoulder")
+        if ls and rs:
+            shoulder_dist = math.hypot(rs[0]-ls[0], rs[1]-ls[1])
+            # queremos que a peça cubra ~1.4x a largura dos ombros (ajustar conforme peça)
+            target_w = int(shoulder_dist * 1.4)
+            scale = max(0.1, target_w / Wg)
+            new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale)))
+            g_resized = g.resize(new_size, resample=Image.LANCZOS)
+            # center position between shoulders, and slightly below
+            center_x = int((ls[0] + rs[0]) / 2)
+            top_y = int((ls[1] + rs[1]) / 1.8)  # move slightly up/down
+            pos = (max(0, center_x - new_size[0]//2), max(0, top_y - new_size[1]//6))
+            canvas = model.copy()
+            canvas.paste(g_resized, pos, g_resized)
+            return canvas
+    except Exception:
+        pass
+    # fallback
+    return simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=None)
+def extract_pose_and_keypoints(model_img: Image.Image) -> Tuple[Image.Image, Dict[str, Tuple[int,int]]]:
+    """
+    Usa controlnet_aux.OpenposeDetector para gerar a pose map (imagem) e tenta retornar
+    keypoints úteis (ombros). keypoints dict = {"left_shoulder":(x,y), ...}
+    """
+    detector = get_openpose_detector()
+    # detect returns a PIL image of the pose map; but also returns 'keypoints' structure if requested
+    # controlnet_aux OpenposeDetector has method detect which returns images; to get keypoints we call detect_and_return_info
+    # We'll attempt to call 'detect' and fallback if not available
+    try:
+        detected = detector.detect(model_img)
+        # detector.detect returns a pose image (PIL)
+        pose_image = detected
+        # try to get keypoints via internal method if present (may vary by version)
+        try:
+            info = detector.get_pose(model_img)  # some versions provide get_pose
+            # info parsing: try to find shoulders - adapt defensively
+            keypoints = {}
+            for person in info:
+                # each person: list of points or dict depending implementation
+                # attempt to parse common formats
+                if isinstance(person, dict):
+                    if "left_shoulder" in person and "right_shoulder" in person:
+                        keypoints["left_shoulder"] = tuple(person["left_shoulder"])
+                        keypoints["right_shoulder"] = tuple(person["right_shoulder"])
+                        break
+                elif isinstance(person, list) or isinstance(person, tuple):
+                    # fallback: OpenPose ordering often uses indices:
+                    # 2 = right shoulder, 5 = left shoulder OR vice-versa depending on lib.
+                    # We'll try both orders defensively
+                    try:
+                        p2 = person[2]
+                        p5 = person[5]
+                        # p2/p5 are (x,y,confidence) or similar
+                        keypoints["right_shoulder"] = (int(p2[0]), int(p2[1]))
+                        keypoints["left_shoulder"] = (int(p5[0]), int(p5[1]))
+                        break
+                    except Exception:
+                        continue
+            return pose_image.convert("RGB"), keypoints
+        except Exception:
+            # if we can't get structured keypoints, just return pose image and empty dict
+            return pose_image.convert("RGB"), {}
+    except Exception as e:
+        # last fallback: return blank pose (grayscale) and empty keypoints
+        blank = Image.new("RGB", model_img.size, (255,255,255))
+        return blank, {}
+def run_pipeline(model_image: Image.Image, garment_image: Image.Image, prompt_extra: str = "") -> Tuple[Image.Image, Dict[str,Any]]:
+    """
+    Função principal que:
+    1) extrai pose (pose_map)
+    2) remove fundo da peça (garment) e alinha simplisticamente
+    3) monta uma imagem inicial (init_image) com a peça sobre a modelo (RGBA)
+    4) chama Stable Diffusion + ControlNet (image2image) usando pose_map como conditioning image
+    Retorna: pil_image_result, info_dict
+    """
+    # Convert PIL to consistent size (we'll resize to 768 on larger side to balance quality/VRAM)
+    max_side = 768
+    model_img = model_image.convert("RGB")
+    W, H = model_img.size
+    scale = max_side / max(W, H) if max(W, H) > max_side else 1.0
+    if scale != 1.0:
+        model_img = model_img.resize((int(W*scale), int(H*scale)), Image.LANCZOS)
+    # garment: remove background to get alpha
+    garment_rgba = remove_background(garment_image)
+    # get pose map and shoulder keypoints
+    pose_map, keypoints = extract_pose_and_keypoints(model_img)
+    # align garment roughly
+    init_composite = simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=keypoints)
+    # prepare pipeline and control image
+    pipe = load_pipeline()
+    # create prompt: combine prompt_extra with description of garment (basic default)
+    prompt = ("photo-realistic fashion try-on, ultra detailed, high resolution, realistic lighting. "
+              + (prompt_extra or "garment applied on person, preserve texture and zippers, realistic folds."))
+    # convert images to correct formats
+    init_image = init_composite.convert("RGB")
+    control_image = pose_map.convert("RGB")
+    # inference parameters (tune if OOM)
+    num_inference_steps = 20
+    guidance_scale = 7.5
+    strength = 0.75  # image2image strength (how much to change)
+    # Run in autocast for fp16 if GPU is available
+    generator = torch.Generator(device=DEVICE).manual_seed(torch.randint(0, 2**31 - 1, (1,)).item())
+    # Note: Some versions of diffusers expect 'image' and 'control_image' keyword arguments
+    # We'll call the pipeline defensively.
+    device = DEVICE
+    pipe.to(device)
+    try:
+        # The StableDiffusionControlNetPipeline supports image2image by passing 'image' and 'control_image'
+        with torch.autocast(device_type="cuda") if device == "cuda" else torch.cpu.amp.autocast(enabled=False):
+            out = pipe(
+                prompt=prompt,
+                image=init_image,
+                control_image=control_image,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                strength=strength,
+                generator=generator
+            )
+            # out.images is a list
+            result_img = out.images[0]
+    except TypeError:
+        # Some diffusers versions use different signature; try alternate call
+        out = pipe(
+            prompt=prompt,
+            init_image=init_image,
+            controlnet_conditioning_image=control_image,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            strength=strength,
+            generator=generator
+        )
+        result_img = out.images[0]
+    info = {
+        "model_id": MODEL_ID,
+        "controlnet_id": CONTROLNET_ID,
+        "steps": num_inference_steps,
+        "guidance_scale": guidance_scale,
+        "strength": strength
+    }
+    return result_img, info