Spaces:

venbab
/

DCI-VTON

Paused

App Files Files Community

venbab commited on Oct 28, 2025

Commit

16d8232

verified ·

1 Parent(s): 26fbdc0

Update dci_vton_infer.py

Browse files

Files changed (1) hide show

dci_vton_infer.py +90 -56

dci_vton_infer.py CHANGED Viewed

@@ -1,99 +1,133 @@
 from __future__ import annotations
 import os, glob, subprocess, tempfile
 from pathlib import Path
-from typing import Optional, Dict
 from PIL import Image, ImageFilter
 from huggingface_hub import hf_hub_download
-import numpy as np
-def _pil_to_rgba(im: Image.Image): return im if im.mode=="RGBA" else im.convert("RGBA")
-def _auto_mask_torso(human: Image.Image, top:float, bot:float, feather:int):
-    w,h=human.size; y1=int(h*top); y2=int(h*bot)
-    mask=Image.new("L",(w,h),0); band=Image.new("L",(w,max(1,y2-y1)),255); mask.paste(band,(0,y1))
-    if feather>0: mask=mask.filter(ImageFilter.GaussianBlur(radius=feather))
     return mask
-def _quick_blend(person,garment,mask,fit,blend,torso):
-    human=_pil_to_rgba(person); garment=_pil_to_rgba(garment)
-    hw,hh=human.size; gw,gh=garment.size
-    fit_ratio={"Slim (75%)":0.75,"Relaxed (85%)":0.85,"Wide (95%)":0.95}.get(fit,0.85)
-    tw=int(hw*fit_ratio); scale=tw/max(1,gw); th=int(gh*scale)
-    garment=garment.resize((tw,th),Image.BICUBIC)
-    y1=int(hh*torso[0]); y2=int(hh*torso[1]); torso_h=max(1,y2-y1)
-    x=(hw-tw)//2; y=y1+(torso_h-th)//2
-    overlay=Image.new("RGBA",(hw,hh),(0,0,0,0)); overlay.paste(garment,(x,y),garment)
-    if mask is None: mask=Image.new("L",(hw,hh),255)
-    alpha=max(0,min(1,float(blend)))
-    return Image.blend(human,Image.composite(overlay,human,mask),alpha).convert("RGB")
 class DciVtonPredictor:
-    def __init__(self, device="cuda"):
-        self.device=device; self.ready=False
-        repo="venbab/dci-vton-weights"
         print("[DCI] downloading viton512_v2.ckpt …")
-        self.viton_ckpt=hf_hub_download(repo,filename="viton512_v2.ckpt")
         print("[DCI] downloading warp_viton.pth …")
-        self.warp_pth=hf_hub_download(repo,filename="warp_viton.pth")
-        self.ready=True; print(f"[DCI] backend ready (device={device})")
-def predict(
         self,
         person_img: Image.Image,
         garment_img: Image.Image,
         mask_img: Optional[Image.Image] = None,
         cfg: Optional[Dict] = None
     ) -> Image.Image:
         cfg = cfg or {}
-        fit   = cfg.get("fit", "Relaxed (85%)")
         blend = float(cfg.get("blend", 0.9))
         torso = tuple(cfg.get("torso", (0.30, 0.68)))
         dataroot = cfg.get("dataroot")
-        # If no dataset path, we must preview.
         if not dataroot:
-            print("[DCI] PREVIEW: no dataroot provided.")
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
-        # Try REAL DCI (test.py)
-        print(f"[DCI] REAL DCI: dataroot={dataroot}")
         try:
             outdir = Path(tempfile.mkdtemp(prefix="dci_out_"))
             cmd = [
-                "python",
-                "test.py",
                 "--config", "dci_vton/configs/viton512_v2.yaml",
                 "--ckpt", self.viton_ckpt,
                 "--dataroot", str(dataroot),
-                "--H", "512",
-                "--W", "512",
                 "--n_samples", "1",
                 "--ddim_steps", "30",
                 "--outdir", str(outdir),
             ]
             print("[DCI] running:", " ".join(cmd))
-            p = subprocess.run(cmd, capture_output=True, text=True)
-            print("[DCI] test.py exit code:", p.returncode)
-            if p.stdout:
-                print("[DCI][stdout]\n", p.stdout)
-            if p.stderr:
-                print("[DCI][stderr]\n", p.stderr)
-            if p.returncode != 0:
-                raise RuntimeError(f"test.py failed with code {p.returncode}")
-            res = sorted(glob.glob(str(outdir / "result" / "*.png")))
-            if not res:
-                raise RuntimeError("No result image produced by test.py.")
-            print("[DCI] SUCCESS: returning test.py result", res[0])
-            return Image.open(res[0]).convert("RGB")
         except Exception as e:
-            strict = os.getenv("DCI_STRICT", "0") == "1"
-            print("[DCI] ERROR running test.py:", repr(e))
-            if strict:
-                raise
-            print("[DCI] FALLBACK to preview blend.")
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)

+# dci_vton_infer.py
+# Try real DCI-VTON via test.py; otherwise fall back to a neat preview overlay.
 from __future__ import annotations
 import os, glob, subprocess, tempfile
 from pathlib import Path
+from typing import Optional, Dict, Tuple
 from PIL import Image, ImageFilter
 from huggingface_hub import hf_hub_download
+# ---------- tiny preview helpers ----------
+def _to_rgba(im: Image.Image) -> Image.Image:
+    return im if im.mode == "RGBA" else im.convert("RGBA")
+def _auto_mask_torso(human: Image.Image, top_rel: float, bot_rel: float, feather: int) -> Image.Image:
+    w, h = human.size
+    y_top = int(h * max(0.0, min(1.0, float(top_rel))))
+    y_bot = int(h * max(0.0, min(1.0, float(bot_rel))))
+    mask = Image.new("L", (w, h), 0)
+    band = Image.new("L", (w, max(1, y_bot - y_top)), 255)
+    mask.paste(band, (0, y_top))
+    if feather > 0:
+        mask = mask.filter(ImageFilter.GaussianBlur(radius=feather))
     return mask
+def _quick_blend(
+    person: Image.Image,
+    garment: Image.Image,
+    mask_img: Optional[Image.Image],
+    fit_width: str,
+    blend_strength: float,
+    torso: Tuple[float, float]
+) -> Image.Image:
+    human = _to_rgba(person)
+    cloth = _to_rgba(garment)
+    hw, hh = human.size
+    gw, gh = cloth.size
+    fit_ratio = {"Slim (75%)": 0.75, "Relaxed (85%)": 0.85, "Wide (95%)": 0.95}.get(fit_width, 0.85)
+    target_w = int(hw * fit_ratio)
+    scale = target_w / max(1, gw)
+    target_h = int(gh * scale)
+    cloth = cloth.resize((target_w, target_h), Image.BICUBIC)
+    top_rel, bot_rel = torso
+    y_top_full = int(hh * top_rel)
+    y_bot_full = int(hh * bot_rel)
+    torso_h = max(1, y_bot_full - y_top_full)
+    x_left = (hw - target_w) // 2
+    y_top = y_top_full + (torso_h - target_h) // 2
+    overlay = Image.new("RGBA", (hw, hh), (0, 0, 0, 0))
+    if cloth.mode != "RGBA":
+        cloth = cloth.convert("RGBA")
+    overlay.paste(cloth, (x_left, y_top), cloth)
+    if mask_img is None:
+        mask_img = Image.new("L", (hw, hh), 255)
+    alpha = max(0.0, min(1.0, float(blend_strength)))
+    mixed = Image.composite(overlay, human, mask_img)
+    out = Image.blend(human, mixed, alpha)
+    return out.convert("RGB")
+# ---------- main predictor ----------
 class DciVtonPredictor:
+    def __init__(self, device: str = "cuda"):
+        self.device = device
+        self.ready = False
+        # download weights (for when you call real test.py)
         print("[DCI] downloading viton512_v2.ckpt …")
+        self.viton_ckpt = hf_hub_download(repo_id="venbab/dci-vton-weights", filename="viton512_v2.ckpt")
         print("[DCI] downloading warp_viton.pth …")
+        self.warp_pth = hf_hub_download(repo_id="venbab/dci-vton-weights", filename="warp_viton.pth")
+        self.ready = True
+        print(f"[DCI] backend ready (device={self.device})")
+    def predict(
         self,
         person_img: Image.Image,
         garment_img: Image.Image,
         mask_img: Optional[Image.Image] = None,
         cfg: Optional[Dict] = None
     ) -> Image.Image:
+        """
+        cfg keys supported:
+          - dataroot: str | None   → if set, we try test.py; else do preview blend
+          - fit: str                ("Slim (75%)" | "Relaxed (85%)" | "Wide (95%)")
+          - blend: float            (0..1+)
+          - torso: (top_rel, bot_rel)
+        """
         cfg = cfg or {}
+        fit = cfg.get("fit", "Relaxed (85%)")
         blend = float(cfg.get("blend", 0.9))
         torso = tuple(cfg.get("torso", (0.30, 0.68)))
         dataroot = cfg.get("dataroot")
+        # If no dataset was prepared, just show preview overlay.
         if not dataroot:
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
+        # Try to run the repo's test.py
         try:
             outdir = Path(tempfile.mkdtemp(prefix="dci_out_"))
             cmd = [
+                "python", "test.py",
                 "--config", "dci_vton/configs/viton512_v2.yaml",
                 "--ckpt", self.viton_ckpt,
                 "--dataroot", str(dataroot),
+                "--H", "512", "--W", "512",
                 "--n_samples", "1",
                 "--ddim_steps", "30",
                 "--outdir", str(outdir),
             ]
             print("[DCI] running:", " ".join(cmd))
+            subprocess.run(cmd, check=True)
+            res_dir = outdir / "result"
+            pngs = sorted(glob.glob(str(res_dir / "*.png")))
+            if not pngs:
+                raise RuntimeError("No result produced by test.py")
+            return Image.open(pngs[0]).convert("RGB")
         except Exception as e:
+            print("[DCI] test.py failed → preview fallback. Reason:", repr(e))
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)