Try-Space-Tryon / CatVTON /run_max_quality.py
feylur's picture
Upload folder using huggingface_hub
e5bd8d8 verified
import os
import sys
import torch
from PIL import Image
import gc
sys.path.insert(0, '/kaggle/working/CatVTON')
from model.pipeline import CatVTONPipeline
from model.cloth_masker import AutoMasker
from utils import init_weight_dtype, resize_and_crop, resize_and_padding
# Clear GPU memory
torch.cuda.empty_cache()
gc.collect()
print("πŸ”„ Loading CatVTON models...")
from huggingface_hub import snapshot_download
repo_path = snapshot_download(repo_id="zhengchong/CatVTON")
pipeline = CatVTONPipeline(
base_ckpt="booksforcharlie/stable-diffusion-inpainting",
attn_ckpt=repo_path,
attn_ckpt_version="mix",
weight_dtype=init_weight_dtype("fp16"),
use_tf32=True, # CHANGED: Enable TF32 for better quality
device='cuda'
)
automasker = AutoMasker(
densepose_ckpt=os.path.join(repo_path, "DensePose"),
schp_ckpt=os.path.join(repo_path, "SCHP"),
device='cuda'
)
print("βœ… Models loaded!\n")
# Load images
print("πŸ“‚ Loading images...")
person_img = Image.open("person.jpg").convert("RGB")
cloth_img = Image.open("garment.jpg").convert("RGB")
print(f" Person: {person_img.size[0]}x{person_img.size[1]} px")
print(f" Garment: {cloth_img.size[0]}x{cloth_img.size[1]} px")
# IMPROVED: Better resolution for more natural results
target_height = 1024
target_width = 768
person_img = resize_and_crop(person_img, (target_width, target_height))
cloth_img = resize_and_padding(cloth_img, (target_width, target_height))
print("\n🎭 Generating body mask...")
mask = automasker(person_img, "upper")['mask']
torch.cuda.empty_cache()
print("\nβš™οΈ Running inference with MAXIMUM NATURAL FIT settings...")
print(f" β€’ Resolution: {target_width}x{target_height} HD")
print(" β€’ Steps: 50 (optimal quality/speed balance)")
print(" β€’ Guidance: 2.5 (natural garment integration)")
print(" β€’ Seed: 42 (reproducible)")
print(" β€’ Estimated time: 2-3 minutes\n")
# IMPROVED SETTINGS for natural fit
result = pipeline(
image=person_img,
condition_image=cloth_img,
mask=mask,
num_inference_steps=50, # CHANGED: 50 is optimal (60 can over-process)
guidance_scale=2.5, # CHANGED: 2.5 for natural look (5.0 was too rigid)
seed=42,
height=target_height,
width=target_width
)[0]
result.save("result.jpg", quality=95) # CHANGED: 95 quality (98 can show artifacts)
print("\nβœ… SUCCESS! Result saved to result.jpg")