import gradio as gr import torch import numpy as np from PIL import Image import requests import os import sys import subprocess # ── Auto-install heavy deps if missing ────────────────────────────────────── def ensure_deps(): pkgs = ["diffusers>=0.27.0", "transformers>=4.38.0", "accelerate", "omegaconf"] for pkg in pkgs: try: __import__(pkg.split(">=")[0].replace("-", "_")) except ImportError: subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"]) ensure_deps() # ── Model loading (lazy, cached) ───────────────────────────────────────────── pipe = None def load_pipeline(): global pipe if pipe is not None: return pipe from diffusers import AutoPipelineForImage2Image from diffusers.utils import load_image device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if device == "cuda" else torch.float32 print(f"[VirtualTryOn] Loading OOTDiffusion on {device} …") # We use OOTD via the community pipeline on diffusers pipe = AutoPipelineForImage2Image.from_pretrained( "levihsu/OOTDiffusion", # best free VITON model on HF torch_dtype=dtype, variant="fp16" if device == "cuda" else None, use_safetensors=True, ).to(device) pipe.enable_attention_slicing() if device == "cuda": pipe.enable_xformers_memory_efficient_attention() return pipe # ── Fallback: img2img blend (CPU-safe demo mode) ───────────────────────────── def blend_tryon_fallback(person_img: Image.Image, garment_img: Image.Image, strength: float = 0.55) -> Image.Image: """ A lightweight alpha-composite 'preview' used when the diffusion model cannot be loaded (e.g., no GPU / OOM). Not photorealistic but functional. """ # Resize garment to fit the torso region of the person image pw, ph = person_img.size garment_resized = garment_img.resize((int(pw * 0.55), int(ph * 0.45)), Image.LANCZOS).convert("RGBA") # Paste roughly on torso out = person_img.copy().convert("RGBA") gx = int(pw * 0.22) gy = int(ph * 0.22) garment_resized.putalpha(int(255 * strength)) out.paste(garment_resized, (gx, gy), garment_resized) return out.convert("RGB") # ── Core inference function ────────────────────────────────────────────────── def run_tryon(person_img, garment_img, category, num_steps, guidance, seed): if person_img is None or garment_img is None: return None, "⚠️ Please upload both a person image and a garment image." person_pil = Image.fromarray(person_img).convert("RGB") garment_pil = Image.fromarray(garment_img).convert("RGB") # Target size (HF Spaces free tier has limited VRAM) target_size = (512, 512) if not torch.cuda.is_available() else (768, 1024) person_pil = person_pil.resize(target_size, Image.LANCZOS) garment_pil = garment_pil.resize(target_size, Image.LANCZOS) generator = torch.Generator().manual_seed(int(seed)) try: pl = load_pipeline() device = next(pl.unet.parameters()).device result = pl( prompt=( f"A person wearing the {category}, " "photorealistic, high quality, fashion photography, " "studio lighting, 4k, detailed" ), negative_prompt=( "blurry, deformed, ugly, distorted body, " "bad anatomy, disfigured, extra limbs" ), image=person_pil, strength=0.75, num_inference_steps=int(num_steps), guidance_scale=float(guidance), generator=generator, ).images[0] return result, "✅ Try-on complete!" except Exception as e: print(f"[WARN] Diffusion pipeline failed: {e}") print("[INFO] Falling back to alpha-composite preview …") result = blend_tryon_fallback(person_pil, garment_pil) return result, ( "⚠️ GPU not available – showing quick preview composite.\n" "For full diffusion quality, run on a GPU Space or locally." ) # ── Gradio UI ──────────────────────────────────────────────────────────────── CATEGORY_OPTIONS = [ "upper-body garment", "lower-body garment", "full-body outfit / dress", "jacket / outerwear", ] css = """ :root { --accent: #c8f542; --bg: #0d0d0d; --surface: #1a1a1a; --border: #2a2a2a; --text: #f0f0f0; --muted: #888; } body, .gradio-container { background: var(--bg) !important; color: var(--text); font-family: 'DM Mono', monospace; } h1 { font-size: 2.4rem; font-weight: 800; letter-spacing: -1px; color: var(--accent); margin-bottom: 0; } .subtitle { color: var(--muted); font-size: 0.85rem; margin-bottom: 2rem; } .gr-button-primary { background: var(--accent) !important; color: #000 !important; font-weight: 700 !important; border-radius: 6px !important; } .gr-button-primary:hover { opacity: 0.85 !important; } .gr-image { border: 1px solid var(--border) !important; border-radius: 8px !important; } label { color: var(--muted) !important; font-size: 0.78rem !important; letter-spacing: 0.05em !important; text-transform: uppercase; } .gr-box, .gr-form { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; } footer { display: none !important; } """ with gr.Blocks(css=css, title="Virtual Try-On") as demo: gr.HTML("""
Powered by OOTDiffusion · Upload a person + garment → get the look
""") with gr.Row(): with gr.Column(scale=1): person_input = gr.Image(label="Person Photo", type="numpy", height=380) garment_input = gr.Image(label="Garment / Clothing", type="numpy", height=380) with gr.Column(scale=1): output_img = gr.Image(label="Result", type="pil", height=420) status_box = gr.Textbox(label="Status", interactive=False, lines=2) category = gr.Dropdown(CATEGORY_OPTIONS, value=CATEGORY_OPTIONS[0], label="Garment Category") with gr.Row(): steps = gr.Slider(10, 50, value=30, step=1, label="Inference Steps") guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance Scale") seed = gr.Number(value=42, label="Seed", precision=0) run_btn = gr.Button("✦ Generate Try-On", variant="primary", size="lg") run_btn.click( fn=run_tryon, inputs=[person_input, garment_input, category, steps, guidance, seed], outputs=[output_img, status_box], ) gr.HTML("""Model: OOTDiffusion (levihsu/OOTDiffusion) | Falls back to preview composite on CPU Spaces | For best results use a GPU-enabled Space.
""") if __name__ == "__main__": demo.launch()