Spaces:

fanboyd13
/

oodtryon

Runtime error

File size: 7,773 Bytes

22f3c7e

import gradio as gr
import torch
import numpy as np
from PIL import Image
import requests
import os
import sys
import subprocess

# ── Auto-install heavy deps if missing ──────────────────────────────────────
def ensure_deps():
    pkgs = ["diffusers>=0.27.0", "transformers>=4.38.0", "accelerate", "omegaconf"]
    for pkg in pkgs:
        try:
            __import__(pkg.split(">=")[0].replace("-", "_"))
        except ImportError:
            subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])

ensure_deps()

# ── Model loading (lazy, cached) ─────────────────────────────────────────────
pipe = None

def load_pipeline():
    global pipe
    if pipe is not None:
        return pipe

    from diffusers import AutoPipelineForImage2Image
    from diffusers.utils import load_image

    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype  = torch.float16 if device == "cuda" else torch.float32

    print(f"[VirtualTryOn] Loading OOTDiffusion on {device} …")

    # We use OOTD via the community pipeline on diffusers
    pipe = AutoPipelineForImage2Image.from_pretrained(
        "levihsu/OOTDiffusion",          # best free VITON model on HF
        torch_dtype=dtype,
        variant="fp16" if device == "cuda" else None,
        use_safetensors=True,
    ).to(device)

    pipe.enable_attention_slicing()
    if device == "cuda":
        pipe.enable_xformers_memory_efficient_attention()

    return pipe


# ── Fallback: img2img blend (CPU-safe demo mode) ─────────────────────────────
def blend_tryon_fallback(person_img: Image.Image, garment_img: Image.Image,
                          strength: float = 0.55) -> Image.Image:
    """
    A lightweight alpha-composite 'preview' used when the diffusion model
    cannot be loaded (e.g., no GPU / OOM).  Not photorealistic but functional.
    """
    # Resize garment to fit the torso region of the person image
    pw, ph = person_img.size
    garment_resized = garment_img.resize((int(pw * 0.55), int(ph * 0.45)),
                                          Image.LANCZOS).convert("RGBA")

    # Paste roughly on torso
    out = person_img.copy().convert("RGBA")
    gx = int(pw * 0.22)
    gy = int(ph * 0.22)
    garment_resized.putalpha(int(255 * strength))
    out.paste(garment_resized, (gx, gy), garment_resized)
    return out.convert("RGB")


# ── Core inference function ──────────────────────────────────────────────────
def run_tryon(person_img, garment_img, category, num_steps, guidance, seed):
    if person_img is None or garment_img is None:
        return None, "⚠️ Please upload both a person image and a garment image."

    person_pil  = Image.fromarray(person_img).convert("RGB")
    garment_pil = Image.fromarray(garment_img).convert("RGB")

    # Target size (HF Spaces free tier has limited VRAM)
    target_size = (512, 512) if not torch.cuda.is_available() else (768, 1024)
    person_pil  = person_pil.resize(target_size, Image.LANCZOS)
    garment_pil = garment_pil.resize(target_size, Image.LANCZOS)

    generator = torch.Generator().manual_seed(int(seed))

    try:
        pl = load_pipeline()
        device = next(pl.unet.parameters()).device

        result = pl(
            prompt=(
                f"A person wearing the {category}, "
                "photorealistic, high quality, fashion photography, "
                "studio lighting, 4k, detailed"
            ),
            negative_prompt=(
                "blurry, deformed, ugly, distorted body, "
                "bad anatomy, disfigured, extra limbs"
            ),
            image=person_pil,
            strength=0.75,
            num_inference_steps=int(num_steps),
            guidance_scale=float(guidance),
            generator=generator,
        ).images[0]

        return result, "✅ Try-on complete!"

    except Exception as e:
        print(f"[WARN] Diffusion pipeline failed: {e}")
        print("[INFO] Falling back to alpha-composite preview …")
        result = blend_tryon_fallback(person_pil, garment_pil)
        return result, (
            "⚠️ GPU not available – showing quick preview composite.\n"
            "For full diffusion quality, run on a GPU Space or locally."
        )


# ── Gradio UI ────────────────────────────────────────────────────────────────
CATEGORY_OPTIONS = [
    "upper-body garment",
    "lower-body garment",
    "full-body outfit / dress",
    "jacket / outerwear",
]

css = """
:root {
    --accent: #c8f542;
    --bg: #0d0d0d;
    --surface: #1a1a1a;
    --border: #2a2a2a;
    --text: #f0f0f0;
    --muted: #888;
}
body, .gradio-container { background: var(--bg) !important; color: var(--text); font-family: 'DM Mono', monospace; }
h1 { font-size: 2.4rem; font-weight: 800; letter-spacing: -1px; color: var(--accent); margin-bottom: 0; }
.subtitle { color: var(--muted); font-size: 0.85rem; margin-bottom: 2rem; }
.gr-button-primary { background: var(--accent) !important; color: #000 !important; font-weight: 700 !important; border-radius: 6px !important; }
.gr-button-primary:hover { opacity: 0.85 !important; }
.gr-image { border: 1px solid var(--border) !important; border-radius: 8px !important; }
label { color: var(--muted) !important; font-size: 0.78rem !important; letter-spacing: 0.05em !important; text-transform: uppercase; }
.gr-box, .gr-form { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; }
footer { display: none !important; }
"""

with gr.Blocks(css=css, title="Virtual Try-On") as demo:
    gr.HTML("""
        <link href="https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@800&display=swap" rel="stylesheet">
        <h1 style="font-family:'Syne',sans-serif;">VIRTUAL TRY·ON</h1>
        <p class="subtitle">Powered by OOTDiffusion &nbsp;·&nbsp; Upload a person + garment → get the look</p>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            person_input  = gr.Image(label="Person Photo", type="numpy", height=380)
            garment_input = gr.Image(label="Garment / Clothing", type="numpy", height=380)

        with gr.Column(scale=1):
            output_img    = gr.Image(label="Result", type="pil", height=420)
            status_box    = gr.Textbox(label="Status", interactive=False, lines=2)

            category      = gr.Dropdown(CATEGORY_OPTIONS, value=CATEGORY_OPTIONS[0], label="Garment Category")
            with gr.Row():
                steps     = gr.Slider(10, 50, value=30, step=1,  label="Inference Steps")
                guidance  = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance Scale")
            seed          = gr.Number(value=42, label="Seed", precision=0)

            run_btn = gr.Button("✦ Generate Try-On", variant="primary", size="lg")

    run_btn.click(
        fn=run_tryon,
        inputs=[person_input, garment_input, category, steps, guidance, seed],
        outputs=[output_img, status_box],
    )

    gr.HTML("""
        <p style="color:#555;font-size:0.75rem;margin-top:1.5rem;">
        Model: OOTDiffusion (levihsu/OOTDiffusion) &nbsp;|&nbsp;
        Falls back to preview composite on CPU Spaces &nbsp;|&nbsp;
        For best results use a GPU-enabled Space.
        </p>
    """)

if __name__ == "__main__":
    demo.launch()