virtual-tryon

Running on Zero

App Files Files Community

Nandha2017 commited on Mar 14

Commit

d04b5a9

verified ·

1 Parent(s): bb42d68

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +100 -192
requirements.txt +8 -9

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 """
-Virtual Try-On — Powered by CatVTON + Hugging Face ZeroGPU
-===========================================================
-No local GPU or model storage needed.
-Models download once to /data on HF's servers.
-Generated images are saved to the user's local device.
 """
 import datetime
@@ -15,10 +12,10 @@ import numpy as np
 import spaces
 import torch
 from huggingface_hub import snapshot_download
-from PIL import Image
 # ---------------------------------------------------------------------------
-# Persistent storage (HF Spaces /data, else /tmp)
 # ---------------------------------------------------------------------------
 DATA_DIR   = "/data" if os.path.exists("/data") else "/tmp"
 MODELS_DIR = os.path.join(DATA_DIR, "catvton_models")
@@ -26,109 +23,73 @@ OUTPUT_DIR = os.path.join(DATA_DIR, "outputs")
 os.makedirs(MODELS_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
-# Point HF cache to persistent storage
-os.environ["HF_HOME"]                = os.path.join(DATA_DIR, "hf_cache")
-os.environ["HUGGINGFACE_HUB_CACHE"]  = os.path.join(DATA_DIR, "hf_cache", "hub")
 # ---------------------------------------------------------------------------
-# Model download (runs at Space startup — on HF servers, NOT locally)
 # ---------------------------------------------------------------------------
-CATVTON_REPO   = "zhengchong/CatVTON"
-CATVTON_LOCAL  = os.path.join(MODELS_DIR, "CatVTON")
 def download_models():
-    if not os.path.exists(os.path.join(CATVTON_LOCAL, "config.json")):
-        print("Downloading CatVTON model to HF persistent storage...")
-        snapshot_download(
-            repo_id=CATVTON_REPO,
-            local_dir=CATVTON_LOCAL,
-            local_dir_use_symlinks=False,
-        )
-        print("CatVTON model ready.")
-    else:
-        print("CatVTON model already cached.")
 # ---------------------------------------------------------------------------
-# Pipeline loader (lazy — only after GPU is assigned)
 # ---------------------------------------------------------------------------
-_pipeline = None
-def load_pipeline():
-    global _pipeline
-    if _pipeline is not None:
-        return _pipeline
-    from diffusers import AutoencoderKL, UNet2DConditionModel
-    from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import (
-        StableDiffusionInpaintPipeline,
-    )
-    from transformers import CLIPTextModel, CLIPTokenizer
-    # CatVTON uses a custom diffusers-compatible pipeline
-    # Fall back to standard diffusers inpaint if custom loader unavailable
-    try:
-        sys.path.insert(0, CATVTON_LOCAL)
-        from model.pipeline import CatVTONPipeline
-        _pipeline = CatVTONPipeline(
-            base_ckpt=CATVTON_LOCAL,
-            attn_ckpt=CATVTON_LOCAL,
-            attn_ckpt_version="mix",
-            weight_dtype=torch.float16,
-            device="cuda",
-            skip_safety_check=True,
-        )
-        print("CatVTON custom pipeline loaded.")
-    except Exception as e:
-        print(f"CatVTON custom pipeline failed ({e}), using diffusers fallback...")
-        _pipeline = StableDiffusionInpaintPipeline.from_pretrained(
-            CATVTON_LOCAL,
-            torch_dtype=torch.float16,
-            safety_checker=None,
-        ).to("cuda")
-        print("Diffusers fallback pipeline loaded.")
-    return _pipeline
 # ---------------------------------------------------------------------------
-# Mask generation utilities
 # ---------------------------------------------------------------------------
-def _resize_and_pad(img: Image.Image, size: int = 768) -> Image.Image:
-    """Resize image to square, preserving aspect ratio with padding."""
     img.thumbnail((size, size), Image.LANCZOS)
     canvas = Image.new("RGB", (size, size), (255, 255, 255))
-    x = (size - img.width) // 2
-    y = (size - img.height) // 2
-    canvas.paste(img, (x, y))
     return canvas
-def _build_mask(person_img: Image.Image, cloth_type: str) -> Image.Image:
-    """
-    Build a rough inpainting mask based on cloth_type.
-    For a proper implementation, use a segmentation model (e.g. SCHP).
-    This simple version covers standard body regions.
-    """
-    w, h = person_img.size
-    mask = Image.new("L", (w, h), 0)
-    import PIL.ImageDraw as ImageDraw
-    draw = ImageDraw.Draw(mask)
     if cloth_type == "upper":
-        # Cover torso: from ~20% to ~65% height
-        draw.rectangle([int(w * 0.1), int(h * 0.18), int(w * 0.9), int(h * 0.65)], fill=255)
     elif cloth_type == "lower":
-        # Cover legs: from ~55% to ~100% height
-        draw.rectangle([int(w * 0.05), int(h * 0.55), int(w * 0.95), int(h * 1.0)], fill=255)
     else:  # overall / dress
-        # Cover full body: from ~15% to ~100% height
-        draw.rectangle([int(w * 0.05), int(h * 0.15), int(w * 0.95), int(h * 1.0)], fill=255)
     return mask
 # ---------------------------------------------------------------------------
-# Inference (ZeroGPU)
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def run_tryon(
@@ -138,129 +99,80 @@ def run_tryon(
     num_steps: int,
     guidance_scale: float,
     seed: int,
-) -> tuple[list, list]:
-    """
-    Run virtual try-on inference on HF ZeroGPU.
-    Returns (gallery_images, downloadable_file_paths).
-    """
     if person_image is None or garment_image is None:
-        raise gr.Error("Please upload both a person image and a garment image.")
-    pipe = load_pipeline()
-    # Pre-process
-    size = 768
-    person_resized  = _resize_and_pad(person_image.convert("RGB"), size)
-    garment_resized = _resize_and_pad(garment_image.convert("RGB"), size)
-    mask            = _build_mask(person_resized, cloth_type)
-    generator = torch.Generator(device="cuda")
-    if seed == -1:
-        seed = torch.randint(0, 2**32, (1,)).item()
-    generator.manual_seed(int(seed))
-    # Run pipeline
-    try:
-        # CatVTON custom call signature
-        result = pipe(
-            image=person_resized,
-            condition_image=garment_resized,
-            mask=mask,
-            num_inference_steps=num_steps,
-            guidance_scale=guidance_scale,
-            generator=generator,
-        )
-        output_images = result if isinstance(result, list) else [result]
-    except TypeError:
-        # Diffusers fallback call signature
-        result = pipe(
-            prompt="a person wearing the garment, photorealistic, high quality",
-            image=person_resized,
-            mask_image=mask,
-            num_inference_steps=num_steps,
-            guidance_scale=guidance_scale,
-            generator=generator,
-        )
-        output_images = result.images
-    # Save outputs
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     saved_paths = []
-    pil_images  = []
     for i, img in enumerate(output_images):
-        if not isinstance(img, Image.Image):
-            img = Image.fromarray(np.uint8(img))
-        pil_images.append(img)
         path = os.path.join(OUTPUT_DIR, f"tryon_{timestamp}_{i}.png")
         img.save(path, format="PNG")
         saved_paths.append(path)
-    return pil_images, saved_paths
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
-EXAMPLES = []  # add example paths here if desired
-with gr.Blocks(title="Virtual Try-On — CatVTON", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         "# 👗 Virtual Try-On\n"
-        "Upload a **person photo** and a **garment image**, then click **Try On**.\n\n"
-        "> Runs on Hugging Face ZeroGPU (free A10G) — no local GPU or storage needed.  \n"
-        "> Generated images are saved to your device via the Download button."
     )
     with gr.Row():
-        with gr.Column(scale=1):
-            person_input = gr.Image(
-                label="Person Photo",
-                type="pil",
-                height=400,
             )
-            garment_input = gr.Image(
-                label="Garment Image",
-                type="pil",
-                height=400,
-            )
-        with gr.Column(scale=1):
-            output_gallery = gr.Gallery(
-                label="Result",
-                show_label=True,
-                columns=1,
-                height=400,
-            )
-            output_files = gr.File(
                 label="⬇ Download to your device",
                 file_count="multiple",
                 interactive=False,
             )
-    with gr.Row():
-        cloth_type = gr.Radio(
-            ["upper", "lower", "overall"],
-            value="upper",
-            label="Garment Type",
-            info="upper = top/shirt, lower = pants/skirt, overall = dress/full outfit",
-        )
-    with gr.Accordion("Advanced Settings", open=False):
-        with gr.Row():
-            num_steps = gr.Slider(
-                minimum=10, maximum=50, value=30, step=1,
-                label="Inference Steps",
-            )
-            guidance = gr.Slider(
-                minimum=1.0, maximum=10.0, value=2.5, step=0.5,
-                label="Guidance Scale",
-            )
-            seed_input = gr.Number(
-                label="Seed (-1 = random)", value=-1, precision=0,
-            )
-    try_btn = gr.Button("👗 Try On", variant="primary", size="lg")
     try_btn.click(
         fn=run_tryon,
         inputs=[person_input, garment_input, cloth_type, num_steps, guidance, seed_input],
@@ -269,16 +181,12 @@ with gr.Blocks(title="Virtual Try-On — CatVTON", theme=gr.themes.Soft()) as de
     gr.Markdown(
         "---\n"
-        "**Notes:**  \n"
-        "- First run downloads the model (~2-4 GB) to HF persistent storage — takes a few minutes once.  \n"
-        "- Subsequent runs start immediately (model cached).  \n"
-        "- For best results: use a front-facing photo with clear garment visibility.  \n"
-        "- Built with [CatVTON](https://github.com/zhengchong/CatVTON) + "
-        "[Gradio](https://gradio.app) + [ZeroGPU](https://huggingface.co/docs/hub/spaces-zerogpu)"
     )
-# Download model at Space startup (on HF servers, not locally)
 download_models()
 if __name__ == "__main__":

 """
+Virtual Try-On — CatVTON + Hugging Face ZeroGPU
+No local GPU or model storage needed. Generated images download to your device.
 """
 import datetime
 import spaces
 import torch
 from huggingface_hub import snapshot_download
+from PIL import Image, ImageDraw
 # ---------------------------------------------------------------------------
+# Persistent storage (/data on ZeroGPU Spaces, /tmp fallback)
 # ---------------------------------------------------------------------------
 DATA_DIR   = "/data" if os.path.exists("/data") else "/tmp"
 MODELS_DIR = os.path.join(DATA_DIR, "catvton_models")
 os.makedirs(MODELS_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
+os.environ["HF_HOME"]               = os.path.join(DATA_DIR, "hf_cache")
+os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(DATA_DIR, "hf_cache", "hub")
 # ---------------------------------------------------------------------------
+# Model download — runs once at Space startup on HF servers (not locally)
 # ---------------------------------------------------------------------------
+CATVTON_REPO  = "zhengchong/CatVTON"
+CATVTON_LOCAL = os.path.join(MODELS_DIR, "CatVTON")
 def download_models():
+    if os.path.exists(os.path.join(CATVTON_LOCAL, "model_index.json")):
+        print("CatVTON already cached.")
+        return
+    print("Downloading CatVTON (~4 GB) to HF persistent storage…")
+    snapshot_download(
+        repo_id=CATVTON_REPO,
+        local_dir=CATVTON_LOCAL,
+        local_dir_use_symlinks=False,
+        ignore_patterns=["*.md", "*.txt", "*.py"],
+    )
+    print("CatVTON ready.")
 # ---------------------------------------------------------------------------
+# Pipeline (loaded lazily inside @spaces.GPU)
 # ---------------------------------------------------------------------------
+_pipe = None
+def _get_pipe():
+    global _pipe
+    if _pipe is not None:
+        return _pipe
+    from diffusers import StableDiffusionInpaintPipeline
+    _pipe = StableDiffusionInpaintPipeline.from_pretrained(
+        CATVTON_LOCAL,
+        torch_dtype=torch.float16,
+        safety_checker=None,
+        requires_safety_checker=False,
+    ).to("cuda")
+    _pipe.set_progress_bar_config(disable=True)
+    print("Pipeline loaded on CUDA.")
+    return _pipe
 # ---------------------------------------------------------------------------
+# Image helpers
 # ---------------------------------------------------------------------------
+TARGET_SIZE = 512
+def _fit_to_square(img: Image.Image, size: int = TARGET_SIZE) -> Image.Image:
+    img = img.convert("RGB")
     img.thumbnail((size, size), Image.LANCZOS)
     canvas = Image.new("RGB", (size, size), (255, 255, 255))
+    canvas.paste(img, ((size - img.width) // 2, (size - img.height) // 2))
     return canvas
+def _make_mask(size: int, cloth_type: str) -> Image.Image:
+    mask = Image.new("L", (size, size), 0)
+    d = ImageDraw.Draw(mask)
     if cloth_type == "upper":
+        d.rectangle([int(size*.10), int(size*.18), int(size*.90), int(size*.65)], fill=255)
     elif cloth_type == "lower":
+        d.rectangle([int(size*.05), int(size*.55), int(size*.95), int(size*1.0)], fill=255)
     else:  # overall / dress
+        d.rectangle([int(size*.05), int(size*.15), int(size*.95), int(size*1.0)], fill=255)
     return mask
 # ---------------------------------------------------------------------------
+# ZeroGPU inference
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def run_tryon(
     num_steps: int,
     guidance_scale: float,
     seed: int,
+) -> tuple:
     if person_image is None or garment_image is None:
+        raise gr.Error("Please upload both a person photo and a garment image.")
+    pipe = _get_pipe()
+    person  = _fit_to_square(person_image)
+    garment = _fit_to_square(garment_image)
+    mask    = _make_mask(TARGET_SIZE, cloth_type)
+    rng = torch.Generator(device="cuda")
+    rng.manual_seed(int(seed) if seed != -1 else torch.randint(0, 2**32, (1,)).item())
+    prompt = (
+        "a person wearing the garment in the reference image, "
+        "photorealistic, high quality, natural lighting"
+    )
+    negative = "blurry, distorted, deformed, low quality, artifacts"
+    result = pipe(
+        prompt=prompt,
+        negative_prompt=negative,
+        image=person,
+        mask_image=mask,
+        num_inference_steps=num_steps,
+        guidance_scale=guidance_scale,
+        generator=rng,
+    )
+    output_images = result.images
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     saved_paths = []
     for i, img in enumerate(output_images):
         path = os.path.join(OUTPUT_DIR, f"tryon_{timestamp}_{i}.png")
         img.save(path, format="PNG")
         saved_paths.append(path)
+    return output_images, saved_paths
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
+with gr.Blocks(title="Virtual Try-On", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         "# 👗 Virtual Try-On\n"
+        "Upload a **person photo** and a **garment image**, select the type, then click **Try On**.\n\n"
+        "> Runs entirely on **Hugging Face ZeroGPU** (free A10G) — no local GPU needed.  \n"
+        "> Models download once to HF persistent storage. Images save to your device via the Download button."
     )
     with gr.Row():
+        with gr.Column():
+            person_input  = gr.Image(label="Person Photo", type="pil", height=380)
+            garment_input = gr.Image(label="Garment Image", type="pil", height=380)
+            cloth_type = gr.Radio(
+                ["upper", "lower", "overall"],
+                value="upper",
+                label="Garment Type",
+                info="upper=top/shirt  |  lower=pants/skirt  |  overall=dress/full outfit",
             )
+            with gr.Accordion("Advanced", open=False):
+                num_steps  = gr.Slider(10, 50, value=30, step=1, label="Steps")
+                guidance   = gr.Slider(1.0, 10.0, value=7.5, step=0.5, label="Guidance Scale")
+                seed_input = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
+            try_btn = gr.Button("👗 Try On", variant="primary", size="lg")
+        with gr.Column():
+            output_gallery = gr.Gallery(label="Result", columns=1, height=380)
+            output_files   = gr.File(
                 label="⬇ Download to your device",
                 file_count="multiple",
                 interactive=False,
             )
     try_btn.click(
         fn=run_tryon,
         inputs=[person_input, garment_input, cloth_type, num_steps, guidance, seed_input],
     gr.Markdown(
         "---\n"
+        "**Tips:** front-facing photo · garment on white/neutral background · upper body for shirts\n\n"
+        "First run: ~2-5 min (model download). Subsequent runs: ~15-30s.\n\n"
+        "Built with [CatVTON](https://github.com/zhengchong/CatVTON) · "
+        "[Gradio](https://gradio.app) · [ZeroGPU](https://huggingface.co/docs/hub/spaces-zerogpu)"
     )
 download_models()
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,13 +1,12 @@
-gradio>=4.44.0
 spaces
-torch
-torchvision
-diffusers>=0.27.0
-transformers>=4.40.0
-accelerate>=0.28.0
-huggingface_hub>=0.27.0
 Pillow>=10.0.0
 numpy>=1.24.0
 safetensors>=0.4.2
-omegaconf
-einops

+gradio==4.44.0
 spaces
+torch==2.3.1
+torchvision==0.18.1
+diffusers==0.29.2
+transformers==4.44.2
+accelerate==0.33.0
+huggingface_hub>=0.24.0
 Pillow>=10.0.0
 numpy>=1.24.0
 safetensors>=0.4.2
+einops==0.8.0