File size: 7,773 Bytes
22f3c7e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | import gradio as gr
import torch
import numpy as np
from PIL import Image
import requests
import os
import sys
import subprocess
# ββ Auto-install heavy deps if missing ββββββββββββββββββββββββββββββββββββββ
def ensure_deps():
pkgs = ["diffusers>=0.27.0", "transformers>=4.38.0", "accelerate", "omegaconf"]
for pkg in pkgs:
try:
__import__(pkg.split(">=")[0].replace("-", "_"))
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
ensure_deps()
# ββ Model loading (lazy, cached) βββββββββββββββββββββββββββββββββββββββββββββ
pipe = None
def load_pipeline():
global pipe
if pipe is not None:
return pipe
from diffusers import AutoPipelineForImage2Image
from diffusers.utils import load_image
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
print(f"[VirtualTryOn] Loading OOTDiffusion on {device} β¦")
# We use OOTD via the community pipeline on diffusers
pipe = AutoPipelineForImage2Image.from_pretrained(
"levihsu/OOTDiffusion", # best free VITON model on HF
torch_dtype=dtype,
variant="fp16" if device == "cuda" else None,
use_safetensors=True,
).to(device)
pipe.enable_attention_slicing()
if device == "cuda":
pipe.enable_xformers_memory_efficient_attention()
return pipe
# ββ Fallback: img2img blend (CPU-safe demo mode) βββββββββββββββββββββββββββββ
def blend_tryon_fallback(person_img: Image.Image, garment_img: Image.Image,
strength: float = 0.55) -> Image.Image:
"""
A lightweight alpha-composite 'preview' used when the diffusion model
cannot be loaded (e.g., no GPU / OOM). Not photorealistic but functional.
"""
# Resize garment to fit the torso region of the person image
pw, ph = person_img.size
garment_resized = garment_img.resize((int(pw * 0.55), int(ph * 0.45)),
Image.LANCZOS).convert("RGBA")
# Paste roughly on torso
out = person_img.copy().convert("RGBA")
gx = int(pw * 0.22)
gy = int(ph * 0.22)
garment_resized.putalpha(int(255 * strength))
out.paste(garment_resized, (gx, gy), garment_resized)
return out.convert("RGB")
# ββ Core inference function ββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_tryon(person_img, garment_img, category, num_steps, guidance, seed):
if person_img is None or garment_img is None:
return None, "β οΈ Please upload both a person image and a garment image."
person_pil = Image.fromarray(person_img).convert("RGB")
garment_pil = Image.fromarray(garment_img).convert("RGB")
# Target size (HF Spaces free tier has limited VRAM)
target_size = (512, 512) if not torch.cuda.is_available() else (768, 1024)
person_pil = person_pil.resize(target_size, Image.LANCZOS)
garment_pil = garment_pil.resize(target_size, Image.LANCZOS)
generator = torch.Generator().manual_seed(int(seed))
try:
pl = load_pipeline()
device = next(pl.unet.parameters()).device
result = pl(
prompt=(
f"A person wearing the {category}, "
"photorealistic, high quality, fashion photography, "
"studio lighting, 4k, detailed"
),
negative_prompt=(
"blurry, deformed, ugly, distorted body, "
"bad anatomy, disfigured, extra limbs"
),
image=person_pil,
strength=0.75,
num_inference_steps=int(num_steps),
guidance_scale=float(guidance),
generator=generator,
).images[0]
return result, "β
Try-on complete!"
except Exception as e:
print(f"[WARN] Diffusion pipeline failed: {e}")
print("[INFO] Falling back to alpha-composite preview β¦")
result = blend_tryon_fallback(person_pil, garment_pil)
return result, (
"β οΈ GPU not available β showing quick preview composite.\n"
"For full diffusion quality, run on a GPU Space or locally."
)
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CATEGORY_OPTIONS = [
"upper-body garment",
"lower-body garment",
"full-body outfit / dress",
"jacket / outerwear",
]
css = """
:root {
--accent: #c8f542;
--bg: #0d0d0d;
--surface: #1a1a1a;
--border: #2a2a2a;
--text: #f0f0f0;
--muted: #888;
}
body, .gradio-container { background: var(--bg) !important; color: var(--text); font-family: 'DM Mono', monospace; }
h1 { font-size: 2.4rem; font-weight: 800; letter-spacing: -1px; color: var(--accent); margin-bottom: 0; }
.subtitle { color: var(--muted); font-size: 0.85rem; margin-bottom: 2rem; }
.gr-button-primary { background: var(--accent) !important; color: #000 !important; font-weight: 700 !important; border-radius: 6px !important; }
.gr-button-primary:hover { opacity: 0.85 !important; }
.gr-image { border: 1px solid var(--border) !important; border-radius: 8px !important; }
label { color: var(--muted) !important; font-size: 0.78rem !important; letter-spacing: 0.05em !important; text-transform: uppercase; }
.gr-box, .gr-form { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; }
footer { display: none !important; }
"""
with gr.Blocks(css=css, title="Virtual Try-On") as demo:
gr.HTML("""
<link href="https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@800&display=swap" rel="stylesheet">
<h1 style="font-family:'Syne',sans-serif;">VIRTUAL TRYΒ·ON</h1>
<p class="subtitle">Powered by OOTDiffusion Β· Upload a person + garment β get the look</p>
""")
with gr.Row():
with gr.Column(scale=1):
person_input = gr.Image(label="Person Photo", type="numpy", height=380)
garment_input = gr.Image(label="Garment / Clothing", type="numpy", height=380)
with gr.Column(scale=1):
output_img = gr.Image(label="Result", type="pil", height=420)
status_box = gr.Textbox(label="Status", interactive=False, lines=2)
category = gr.Dropdown(CATEGORY_OPTIONS, value=CATEGORY_OPTIONS[0], label="Garment Category")
with gr.Row():
steps = gr.Slider(10, 50, value=30, step=1, label="Inference Steps")
guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance Scale")
seed = gr.Number(value=42, label="Seed", precision=0)
run_btn = gr.Button("β¦ Generate Try-On", variant="primary", size="lg")
run_btn.click(
fn=run_tryon,
inputs=[person_input, garment_input, category, steps, guidance, seed],
outputs=[output_img, status_box],
)
gr.HTML("""
<p style="color:#555;font-size:0.75rem;margin-top:1.5rem;">
Model: OOTDiffusion (levihsu/OOTDiffusion) |
Falls back to preview composite on CPU Spaces |
For best results use a GPU-enabled Space.
</p>
""")
if __name__ == "__main__":
demo.launch()
|