oodtryon / app.py
fanboyd13's picture
Upload 5 files
22f3c7e verified
import gradio as gr
import torch
import numpy as np
from PIL import Image
import requests
import os
import sys
import subprocess
# ── Auto-install heavy deps if missing ──────────────────────────────────────
def ensure_deps():
pkgs = ["diffusers>=0.27.0", "transformers>=4.38.0", "accelerate", "omegaconf"]
for pkg in pkgs:
try:
__import__(pkg.split(">=")[0].replace("-", "_"))
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
ensure_deps()
# ── Model loading (lazy, cached) ─────────────────────────────────────────────
pipe = None
def load_pipeline():
global pipe
if pipe is not None:
return pipe
from diffusers import AutoPipelineForImage2Image
from diffusers.utils import load_image
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
print(f"[VirtualTryOn] Loading OOTDiffusion on {device} …")
# We use OOTD via the community pipeline on diffusers
pipe = AutoPipelineForImage2Image.from_pretrained(
"levihsu/OOTDiffusion", # best free VITON model on HF
torch_dtype=dtype,
variant="fp16" if device == "cuda" else None,
use_safetensors=True,
).to(device)
pipe.enable_attention_slicing()
if device == "cuda":
pipe.enable_xformers_memory_efficient_attention()
return pipe
# ── Fallback: img2img blend (CPU-safe demo mode) ─────────────────────────────
def blend_tryon_fallback(person_img: Image.Image, garment_img: Image.Image,
strength: float = 0.55) -> Image.Image:
"""
A lightweight alpha-composite 'preview' used when the diffusion model
cannot be loaded (e.g., no GPU / OOM). Not photorealistic but functional.
"""
# Resize garment to fit the torso region of the person image
pw, ph = person_img.size
garment_resized = garment_img.resize((int(pw * 0.55), int(ph * 0.45)),
Image.LANCZOS).convert("RGBA")
# Paste roughly on torso
out = person_img.copy().convert("RGBA")
gx = int(pw * 0.22)
gy = int(ph * 0.22)
garment_resized.putalpha(int(255 * strength))
out.paste(garment_resized, (gx, gy), garment_resized)
return out.convert("RGB")
# ── Core inference function ──────────────────────────────────────────────────
def run_tryon(person_img, garment_img, category, num_steps, guidance, seed):
if person_img is None or garment_img is None:
return None, "⚠️ Please upload both a person image and a garment image."
person_pil = Image.fromarray(person_img).convert("RGB")
garment_pil = Image.fromarray(garment_img).convert("RGB")
# Target size (HF Spaces free tier has limited VRAM)
target_size = (512, 512) if not torch.cuda.is_available() else (768, 1024)
person_pil = person_pil.resize(target_size, Image.LANCZOS)
garment_pil = garment_pil.resize(target_size, Image.LANCZOS)
generator = torch.Generator().manual_seed(int(seed))
try:
pl = load_pipeline()
device = next(pl.unet.parameters()).device
result = pl(
prompt=(
f"A person wearing the {category}, "
"photorealistic, high quality, fashion photography, "
"studio lighting, 4k, detailed"
),
negative_prompt=(
"blurry, deformed, ugly, distorted body, "
"bad anatomy, disfigured, extra limbs"
),
image=person_pil,
strength=0.75,
num_inference_steps=int(num_steps),
guidance_scale=float(guidance),
generator=generator,
).images[0]
return result, "βœ… Try-on complete!"
except Exception as e:
print(f"[WARN] Diffusion pipeline failed: {e}")
print("[INFO] Falling back to alpha-composite preview …")
result = blend_tryon_fallback(person_pil, garment_pil)
return result, (
"⚠️ GPU not available – showing quick preview composite.\n"
"For full diffusion quality, run on a GPU Space or locally."
)
# ── Gradio UI ────────────────────────────────────────────────────────────────
CATEGORY_OPTIONS = [
"upper-body garment",
"lower-body garment",
"full-body outfit / dress",
"jacket / outerwear",
]
css = """
:root {
--accent: #c8f542;
--bg: #0d0d0d;
--surface: #1a1a1a;
--border: #2a2a2a;
--text: #f0f0f0;
--muted: #888;
}
body, .gradio-container { background: var(--bg) !important; color: var(--text); font-family: 'DM Mono', monospace; }
h1 { font-size: 2.4rem; font-weight: 800; letter-spacing: -1px; color: var(--accent); margin-bottom: 0; }
.subtitle { color: var(--muted); font-size: 0.85rem; margin-bottom: 2rem; }
.gr-button-primary { background: var(--accent) !important; color: #000 !important; font-weight: 700 !important; border-radius: 6px !important; }
.gr-button-primary:hover { opacity: 0.85 !important; }
.gr-image { border: 1px solid var(--border) !important; border-radius: 8px !important; }
label { color: var(--muted) !important; font-size: 0.78rem !important; letter-spacing: 0.05em !important; text-transform: uppercase; }
.gr-box, .gr-form { background: var(--surface) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; }
footer { display: none !important; }
"""
with gr.Blocks(css=css, title="Virtual Try-On") as demo:
gr.HTML("""
<link href="https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Syne:wght@800&display=swap" rel="stylesheet">
<h1 style="font-family:'Syne',sans-serif;">VIRTUAL TRYΒ·ON</h1>
<p class="subtitle">Powered by OOTDiffusion &nbsp;Β·&nbsp; Upload a person + garment β†’ get the look</p>
""")
with gr.Row():
with gr.Column(scale=1):
person_input = gr.Image(label="Person Photo", type="numpy", height=380)
garment_input = gr.Image(label="Garment / Clothing", type="numpy", height=380)
with gr.Column(scale=1):
output_img = gr.Image(label="Result", type="pil", height=420)
status_box = gr.Textbox(label="Status", interactive=False, lines=2)
category = gr.Dropdown(CATEGORY_OPTIONS, value=CATEGORY_OPTIONS[0], label="Garment Category")
with gr.Row():
steps = gr.Slider(10, 50, value=30, step=1, label="Inference Steps")
guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="Guidance Scale")
seed = gr.Number(value=42, label="Seed", precision=0)
run_btn = gr.Button("✦ Generate Try-On", variant="primary", size="lg")
run_btn.click(
fn=run_tryon,
inputs=[person_input, garment_input, category, steps, guidance, seed],
outputs=[output_img, status_box],
)
gr.HTML("""
<p style="color:#555;font-size:0.75rem;margin-top:1.5rem;">
Model: OOTDiffusion (levihsu/OOTDiffusion) &nbsp;|&nbsp;
Falls back to preview composite on CPU Spaces &nbsp;|&nbsp;
For best results use a GPU-enabled Space.
</p>
""")
if __name__ == "__main__":
demo.launch()