virtual-tryon

Running on Zero

App Files Files Community

Nandha2017 commited on Mar 14

Commit

bb42d68

verified ·

1 Parent(s): 5d5eb22

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +46 -12
app.py +285 -0
packages.txt +6 -0
requirements.txt +13 -0

README.md CHANGED Viewed

@@ -1,12 +1,46 @@
----
-title: Virtual Tryon
-emoji: 🦀
-colorFrom: purple
-colorTo: yellow
-sdk: gradio
-sdk_version: 6.9.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Virtual Try-On (CatVTON)
+emoji: 👗
+colorFrom: pink
+colorTo: purple
+sdk: gradio
+sdk_version: "4.44.0"
+app_file: app.py
+pinned: false
+license: apache-2.0
+hardware: zero-a10g
+---
+# 👗 Virtual Try-On
+Try on garments virtually using AI — runs entirely in your browser via Hugging Face ZeroGPU.
+**No local GPU or storage needed.**
+## How to Use
+1. Upload a **person photo** (front-facing works best)
+2. Upload a **garment image** (product photo on white background works best)
+3. Select the garment type (upper / lower / overall)
+4. Click **Try On**
+5. Download the result to your device
+## Technical Details
+- **Model**: [CatVTON](https://github.com/zhengchong/CatVTON) (`zhengchong/CatVTON`)
+- **GPU**: Hugging Face ZeroGPU (A10G, free tier)
+- **Model storage**: Downloaded once to `/data` persistent storage on HF servers
+- **Your device**: Only needs a web browser — no downloads, no GPU
+## Notes
+- First run takes ~2-5 minutes (model download to HF servers)
+- Subsequent runs start immediately (model cached in persistent storage)
+- For best results: clear front-facing photos, garment on white/neutral background
+- ZeroGPU provides ~120 seconds of GPU time per generation
+## Built With
+- [CatVTON](https://github.com/zhengchong/CatVTON) — virtual try-on model
+- [Gradio](https://gradio.app) — web interface
+- [Hugging Face ZeroGPU](https://huggingface.co/docs/hub/spaces-zerogpu) — free GPU

app.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+Virtual Try-On — Powered by CatVTON + Hugging Face ZeroGPU
+===========================================================
+No local GPU or model storage needed.
+Models download once to /data on HF's servers.
+Generated images are saved to the user's local device.
+"""
+import datetime
+import os
+import sys
+import gradio as gr
+import numpy as np
+import spaces
+import torch
+from huggingface_hub import snapshot_download
+from PIL import Image
+# ---------------------------------------------------------------------------
+# Persistent storage (HF Spaces /data, else /tmp)
+# ---------------------------------------------------------------------------
+DATA_DIR   = "/data" if os.path.exists("/data") else "/tmp"
+MODELS_DIR = os.path.join(DATA_DIR, "catvton_models")
+OUTPUT_DIR = os.path.join(DATA_DIR, "outputs")
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# Point HF cache to persistent storage
+os.environ["HF_HOME"]                = os.path.join(DATA_DIR, "hf_cache")
+os.environ["HUGGINGFACE_HUB_CACHE"]  = os.path.join(DATA_DIR, "hf_cache", "hub")
+# ---------------------------------------------------------------------------
+# Model download (runs at Space startup — on HF servers, NOT locally)
+# ---------------------------------------------------------------------------
+CATVTON_REPO   = "zhengchong/CatVTON"
+CATVTON_LOCAL  = os.path.join(MODELS_DIR, "CatVTON")
+def download_models():
+    if not os.path.exists(os.path.join(CATVTON_LOCAL, "config.json")):
+        print("Downloading CatVTON model to HF persistent storage...")
+        snapshot_download(
+            repo_id=CATVTON_REPO,
+            local_dir=CATVTON_LOCAL,
+            local_dir_use_symlinks=False,
+        )
+        print("CatVTON model ready.")
+    else:
+        print("CatVTON model already cached.")
+# ---------------------------------------------------------------------------
+# Pipeline loader (lazy — only after GPU is assigned)
+# ---------------------------------------------------------------------------
+_pipeline = None
+def load_pipeline():
+    global _pipeline
+    if _pipeline is not None:
+        return _pipeline
+    from diffusers import AutoencoderKL, UNet2DConditionModel
+    from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import (
+        StableDiffusionInpaintPipeline,
+    )
+    from transformers import CLIPTextModel, CLIPTokenizer
+    # CatVTON uses a custom diffusers-compatible pipeline
+    # Fall back to standard diffusers inpaint if custom loader unavailable
+    try:
+        sys.path.insert(0, CATVTON_LOCAL)
+        from model.pipeline import CatVTONPipeline
+        _pipeline = CatVTONPipeline(
+            base_ckpt=CATVTON_LOCAL,
+            attn_ckpt=CATVTON_LOCAL,
+            attn_ckpt_version="mix",
+            weight_dtype=torch.float16,
+            device="cuda",
+            skip_safety_check=True,
+        )
+        print("CatVTON custom pipeline loaded.")
+    except Exception as e:
+        print(f"CatVTON custom pipeline failed ({e}), using diffusers fallback...")
+        _pipeline = StableDiffusionInpaintPipeline.from_pretrained(
+            CATVTON_LOCAL,
+            torch_dtype=torch.float16,
+            safety_checker=None,
+        ).to("cuda")
+        print("Diffusers fallback pipeline loaded.")
+    return _pipeline
+# ---------------------------------------------------------------------------
+# Mask generation utilities
+# ---------------------------------------------------------------------------
+def _resize_and_pad(img: Image.Image, size: int = 768) -> Image.Image:
+    """Resize image to square, preserving aspect ratio with padding."""
+    img.thumbnail((size, size), Image.LANCZOS)
+    canvas = Image.new("RGB", (size, size), (255, 255, 255))
+    x = (size - img.width) // 2
+    y = (size - img.height) // 2
+    canvas.paste(img, (x, y))
+    return canvas
+def _build_mask(person_img: Image.Image, cloth_type: str) -> Image.Image:
+    """
+    Build a rough inpainting mask based on cloth_type.
+    For a proper implementation, use a segmentation model (e.g. SCHP).
+    This simple version covers standard body regions.
+    """
+    w, h = person_img.size
+    mask = Image.new("L", (w, h), 0)
+    import PIL.ImageDraw as ImageDraw
+    draw = ImageDraw.Draw(mask)
+    if cloth_type == "upper":
+        # Cover torso: from ~20% to ~65% height
+        draw.rectangle([int(w * 0.1), int(h * 0.18), int(w * 0.9), int(h * 0.65)], fill=255)
+    elif cloth_type == "lower":
+        # Cover legs: from ~55% to ~100% height
+        draw.rectangle([int(w * 0.05), int(h * 0.55), int(w * 0.95), int(h * 1.0)], fill=255)
+    else:  # overall / dress
+        # Cover full body: from ~15% to ~100% height
+        draw.rectangle([int(w * 0.05), int(h * 0.15), int(w * 0.95), int(h * 1.0)], fill=255)
+    return mask
+# ---------------------------------------------------------------------------
+# Inference (ZeroGPU)
+# ---------------------------------------------------------------------------
+@spaces.GPU(duration=120)
+def run_tryon(
+    person_image: Image.Image,
+    garment_image: Image.Image,
+    cloth_type: str,
+    num_steps: int,
+    guidance_scale: float,
+    seed: int,
+) -> tuple[list, list]:
+    """
+    Run virtual try-on inference on HF ZeroGPU.
+    Returns (gallery_images, downloadable_file_paths).
+    """
+    if person_image is None or garment_image is None:
+        raise gr.Error("Please upload both a person image and a garment image.")
+    pipe = load_pipeline()
+    # Pre-process
+    size = 768
+    person_resized  = _resize_and_pad(person_image.convert("RGB"), size)
+    garment_resized = _resize_and_pad(garment_image.convert("RGB"), size)
+    mask            = _build_mask(person_resized, cloth_type)
+    generator = torch.Generator(device="cuda")
+    if seed == -1:
+        seed = torch.randint(0, 2**32, (1,)).item()
+    generator.manual_seed(int(seed))
+    # Run pipeline
+    try:
+        # CatVTON custom call signature
+        result = pipe(
+            image=person_resized,
+            condition_image=garment_resized,
+            mask=mask,
+            num_inference_steps=num_steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        output_images = result if isinstance(result, list) else [result]
+    except TypeError:
+        # Diffusers fallback call signature
+        result = pipe(
+            prompt="a person wearing the garment, photorealistic, high quality",
+            image=person_resized,
+            mask_image=mask,
+            num_inference_steps=num_steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        output_images = result.images
+    # Save outputs
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    saved_paths = []
+    pil_images  = []
+    for i, img in enumerate(output_images):
+        if not isinstance(img, Image.Image):
+            img = Image.fromarray(np.uint8(img))
+        pil_images.append(img)
+        path = os.path.join(OUTPUT_DIR, f"tryon_{timestamp}_{i}.png")
+        img.save(path, format="PNG")
+        saved_paths.append(path)
+    return pil_images, saved_paths
+# ---------------------------------------------------------------------------
+# Gradio UI
+# ---------------------------------------------------------------------------
+EXAMPLES = []  # add example paths here if desired
+with gr.Blocks(title="Virtual Try-On — CatVTON", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        "# 👗 Virtual Try-On\n"
+        "Upload a **person photo** and a **garment image**, then click **Try On**.\n\n"
+        "> Runs on Hugging Face ZeroGPU (free A10G) — no local GPU or storage needed.  \n"
+        "> Generated images are saved to your device via the Download button."
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            person_input = gr.Image(
+                label="Person Photo",
+                type="pil",
+                height=400,
+            )
+            garment_input = gr.Image(
+                label="Garment Image",
+                type="pil",
+                height=400,
+            )
+        with gr.Column(scale=1):
+            output_gallery = gr.Gallery(
+                label="Result",
+                show_label=True,
+                columns=1,
+                height=400,
+            )
+            output_files = gr.File(
+                label="⬇ Download to your device",
+                file_count="multiple",
+                interactive=False,
+            )
+    with gr.Row():
+        cloth_type = gr.Radio(
+            ["upper", "lower", "overall"],
+            value="upper",
+            label="Garment Type",
+            info="upper = top/shirt, lower = pants/skirt, overall = dress/full outfit",
+        )
+    with gr.Accordion("Advanced Settings", open=False):
+        with gr.Row():
+            num_steps = gr.Slider(
+                minimum=10, maximum=50, value=30, step=1,
+                label="Inference Steps",
+            )
+            guidance = gr.Slider(
+                minimum=1.0, maximum=10.0, value=2.5, step=0.5,
+                label="Guidance Scale",
+            )
+            seed_input = gr.Number(
+                label="Seed (-1 = random)", value=-1, precision=0,
+            )
+    try_btn = gr.Button("👗 Try On", variant="primary", size="lg")
+    try_btn.click(
+        fn=run_tryon,
+        inputs=[person_input, garment_input, cloth_type, num_steps, guidance, seed_input],
+        outputs=[output_gallery, output_files],
+    )
+    gr.Markdown(
+        "---\n"
+        "**Notes:**  \n"
+        "- First run downloads the model (~2-4 GB) to HF persistent storage — takes a few minutes once.  \n"
+        "- Subsequent runs start immediately (model cached).  \n"
+        "- For best results: use a front-facing photo with clear garment visibility.  \n"
+        "- Built with [CatVTON](https://github.com/zhengchong/CatVTON) + "
+        "[Gradio](https://gradio.app) + [ZeroGPU](https://huggingface.co/docs/hub/spaces-zerogpu)"
+    )
+# Download model at Space startup (on HF servers, not locally)
+download_models()
+if __name__ == "__main__":
+    demo.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+libgl1-mesa-glx
+libglib2.0-0
+libsm6
+libxext6
+libxrender-dev
+libgomp1

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio>=4.44.0
+spaces
+torch
+torchvision
+diffusers>=0.27.0
+transformers>=4.40.0
+accelerate>=0.28.0
+huggingface_hub>=0.27.0
+Pillow>=10.0.0
+numpy>=1.24.0
+safetensors>=0.4.2
+omegaconf
+einops