Spaces:

venbab
/

DCI-VTON

Paused

App Files Files Community

venbab commited on Oct 28, 2025

Commit

09d2508

verified ·

1 Parent(s): 16d8232

Update dci_vton_infer.py

Browse files

Files changed (1) hide show

dci_vton_infer.py +60 -46

dci_vton_infer.py CHANGED Viewed

@@ -1,17 +1,12 @@
 # dci_vton_infer.py
-# Try real DCI-VTON via test.py; otherwise fall back to a neat preview overlay.
 from __future__ import annotations
-import os, glob, subprocess, tempfile
 from pathlib import Path
-from typing import Optional, Dict, Tuple
 from PIL import Image, ImageFilter
 from huggingface_hub import hf_hub_download
-# ---------- tiny preview helpers ----------
-def _to_rgba(im: Image.Image) -> Image.Image:
     return im if im.mode == "RGBA" else im.convert("RGBA")
 def _auto_mask_torso(human: Image.Image, top_rel: float, bot_rel: float, feather: int) -> Image.Image:
@@ -31,19 +26,18 @@ def _quick_blend(
     mask_img: Optional[Image.Image],
     fit_width: str,
     blend_strength: float,
-    torso: Tuple[float, float]
 ) -> Image.Image:
-    human = _to_rgba(person)
-    cloth = _to_rgba(garment)
     hw, hh = human.size
-    gw, gh = cloth.size
     fit_ratio = {"Slim (75%)": 0.75, "Relaxed (85%)": 0.85, "Wide (95%)": 0.95}.get(fit_width, 0.85)
     target_w = int(hw * fit_ratio)
     scale = target_w / max(1, gw)
     target_h = int(gh * scale)
-    cloth = cloth.resize((target_w, target_h), Image.BICUBIC)
     top_rel, bot_rel = torso
     y_top_full = int(hh * top_rel)
@@ -54,9 +48,9 @@ def _quick_blend(
     y_top = y_top_full + (torso_h - target_h) // 2
     overlay = Image.new("RGBA", (hw, hh), (0, 0, 0, 0))
-    if cloth.mode != "RGBA":
-        cloth = cloth.convert("RGBA")
-    overlay.paste(cloth, (x_left, y_top), cloth)
     if mask_img is None:
         mask_img = Image.new("L", (hw, hh), 255)
@@ -66,21 +60,28 @@ def _quick_blend(
     out = Image.blend(human, mixed, alpha)
     return out.convert("RGB")
-# ---------- main predictor ----------
 class DciVtonPredictor:
     def __init__(self, device: str = "cuda"):
         self.device = device
         self.ready = False
-        # download weights (for when you call real test.py)
         print("[DCI] downloading viton512_v2.ckpt …")
-        self.viton_ckpt = hf_hub_download(repo_id="venbab/dci-vton-weights", filename="viton512_v2.ckpt")
         print("[DCI] downloading warp_viton.pth …")
-        self.warp_pth = hf_hub_download(repo_id="venbab/dci-vton-weights", filename="warp_viton.pth")
-        self.ready = True
         print(f"[DCI] backend ready (device={self.device})")
     def predict(
         self,
@@ -89,43 +90,56 @@ class DciVtonPredictor:
         mask_img: Optional[Image.Image] = None,
         cfg: Optional[Dict] = None
     ) -> Image.Image:
-        """
-        cfg keys supported:
-          - dataroot: str | None   → if set, we try test.py; else do preview blend
-          - fit: str                ("Slim (75%)" | "Relaxed (85%)" | "Wide (95%)")
-          - blend: float            (0..1+)
-          - torso: (top_rel, bot_rel)
-        """
         cfg = cfg or {}
-        fit = cfg.get("fit", "Relaxed (85%)")
         blend = float(cfg.get("blend", 0.9))
         torso = tuple(cfg.get("torso", (0.30, 0.68)))
         dataroot = cfg.get("dataroot")
-        # If no dataset was prepared, just show preview overlay.
         if not dataroot:
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
-        # Try to run the repo's test.py
         try:
-            outdir = Path(tempfile.mkdtemp(prefix="dci_out_"))
-            cmd = [
-                "python", "test.py",
-                "--config", "dci_vton/configs/viton512_v2.yaml",
-                "--ckpt", self.viton_ckpt,
-                "--dataroot", str(dataroot),
-                "--H", "512", "--W", "512",
-                "--n_samples", "1",
-                "--ddim_steps", "30",
-                "--outdir", str(outdir),
-            ]
             print("[DCI] running:", " ".join(cmd))
-            subprocess.run(cmd, check=True)
             res_dir = outdir / "result"
             pngs = sorted(glob.glob(str(res_dir / "*.png")))
             if not pngs:
-                raise RuntimeError("No result produced by test.py")
             return Image.open(pngs[0]).convert("RGB")
         except Exception as e:

 # dci_vton_infer.py
 from __future__ import annotations
+import os, glob, subprocess, tempfile, sys
 from pathlib import Path
+from typing import Optional, Dict
 from PIL import Image, ImageFilter
 from huggingface_hub import hf_hub_download
+def _pil_to_rgba(im: Image.Image) -> Image.Image:
     return im if im.mode == "RGBA" else im.convert("RGBA")
 def _auto_mask_torso(human: Image.Image, top_rel: float, bot_rel: float, feather: int) -> Image.Image:
     mask_img: Optional[Image.Image],
     fit_width: str,
     blend_strength: float,
+    torso: tuple[float, float]
 ) -> Image.Image:
+    human = _pil_to_rgba(person)
+    garment = _pil_to_rgba(garment)
     hw, hh = human.size
+    gw, gh = garment.size
     fit_ratio = {"Slim (75%)": 0.75, "Relaxed (85%)": 0.85, "Wide (95%)": 0.95}.get(fit_width, 0.85)
     target_w = int(hw * fit_ratio)
     scale = target_w / max(1, gw)
     target_h = int(gh * scale)
+    garment_resized = garment.resize((target_w, target_h), Image.BICUBIC)
     top_rel, bot_rel = torso
     y_top_full = int(hh * top_rel)
     y_top = y_top_full + (torso_h - target_h) // 2
     overlay = Image.new("RGBA", (hw, hh), (0, 0, 0, 0))
+    if garment_resized.mode != "RGBA":
+        garment_resized = garment_resized.convert("RGBA")
+    overlay.paste(garment_resized, (x_left, y_top), garment_resized)
     if mask_img is None:
         mask_img = Image.new("L", (hw, hh), 255)
     out = Image.blend(human, mixed, alpha)
     return out.convert("RGB")
 class DciVtonPredictor:
     def __init__(self, device: str = "cuda"):
         self.device = device
         self.ready = False
+        # Resolve repo paths
+        self.repo_root = Path(__file__).parent.resolve()
+        # Prefer dci_vton/test.py; fallback to root if needed
+        self.test_py = (self.repo_root / "dci_vton" / "test.py")
+        if not self.test_py.exists():
+            self.test_py = self.repo_root / "test.py"
+        self.config_yaml = (self.repo_root / "dci_vton" / "configs" / "viton512_v2.yaml")
+        # Download weights
+        repo_id = "venbab/dci-vton-weights"
         print("[DCI] downloading viton512_v2.ckpt …")
+        self.viton_ckpt = hf_hub_download(repo_id=repo_id, filename="viton512_v2.ckpt")
         print("[DCI] downloading warp_viton.pth …")
+        self.warp_pth = hf_hub_download(repo_id=repo_id, filename="warp_viton.pth")
         print(f"[DCI] backend ready (device={self.device})")
+        self.ready = True
     def predict(
         self,
         mask_img: Optional[Image.Image] = None,
         cfg: Optional[Dict] = None
     ) -> Image.Image:
         cfg = cfg or {}
+        fit   = cfg.get("fit", "Relaxed (85%)")
         blend = float(cfg.get("blend", 0.9))
         torso = tuple(cfg.get("torso", (0.30, 0.68)))
         dataroot = cfg.get("dataroot")
+        # If we don't have a dataset root, return preview
         if not dataroot:
             return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
+        # Ensure paths exist
+        if not self.test_py.exists():
+            print(f"[DCI] test.py not found at: {self.test_py}")
+            return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
+        if not self.config_yaml.exists():
+            print(f"[DCI] config yaml not found at: {self.config_yaml}")
+            return _quick_blend(person_img, garment_img, mask_img, fit, blend, torso)
+        # Build env with proper PYTHONPATH so `ldm/...` imports work
+        py_path = os.pathsep.join({
+            str(self.repo_root),
+            str(self.repo_root / "dci_vton"),
+        })
+        env = dict(os.environ)
+        env["PYTHONPATH"] = py_path + (os.pathsep + env["PYTHONPATH"] if "PYTHONPATH" in env else "")
+        outdir = Path(tempfile.mkdtemp(prefix="dci_out_"))
+        cmd = [
+            sys.executable,
+            str(self.test_py),
+            "--config", str(self.config_yaml),
+            "--ckpt",   str(self.viton_ckpt),
+            "--dataroot", str(dataroot),
+            "--H", "512",
+            "--W", "512",
+            "--n_samples", "1",
+            "--ddim_steps", "30",
+            "--outdir", str(outdir),
+        ]
         try:
+            print("[DCI] REAL DCI: dataroot=", dataroot)
             print("[DCI] running:", " ".join(cmd))
+            subprocess.run(cmd, check=True, env=env, cwd=str(self.repo_root))
             res_dir = outdir / "result"
             pngs = sorted(glob.glob(str(res_dir / "*.png")))
             if not pngs:
+                raise RuntimeError("No result image produced by test.py")
             return Image.open(pngs[0]).convert("RGB")
         except Exception as e: