import os import sys import torch from PIL import Image import gc sys.path.insert(0, '/kaggle/working/CatVTON') from model.pipeline import CatVTONPipeline from model.cloth_masker import AutoMasker from utils import init_weight_dtype, resize_and_crop, resize_and_padding # Clear GPU memory torch.cuda.empty_cache() gc.collect() print("šŸ”„ Loading CatVTON models...") from huggingface_hub import snapshot_download repo_path = snapshot_download(repo_id="zhengchong/CatVTON") pipeline = CatVTONPipeline( base_ckpt="booksforcharlie/stable-diffusion-inpainting", attn_ckpt=repo_path, attn_ckpt_version="mix", weight_dtype=init_weight_dtype("fp16"), use_tf32=True, # CHANGED: Enable TF32 for better quality device='cuda' ) automasker = AutoMasker( densepose_ckpt=os.path.join(repo_path, "DensePose"), schp_ckpt=os.path.join(repo_path, "SCHP"), device='cuda' ) print("āœ… Models loaded!\n") # Load images print("šŸ“‚ Loading images...") person_img = Image.open("person.jpg").convert("RGB") cloth_img = Image.open("garment.jpg").convert("RGB") print(f" Person: {person_img.size[0]}x{person_img.size[1]} px") print(f" Garment: {cloth_img.size[0]}x{cloth_img.size[1]} px") # IMPROVED: Better resolution for more natural results target_height = 1024 target_width = 768 person_img = resize_and_crop(person_img, (target_width, target_height)) cloth_img = resize_and_padding(cloth_img, (target_width, target_height)) print("\nšŸŽ­ Generating body mask...") mask = automasker(person_img, "upper")['mask'] torch.cuda.empty_cache() print("\nāš™ļø Running inference with MAXIMUM NATURAL FIT settings...") print(f" • Resolution: {target_width}x{target_height} HD") print(" • Steps: 50 (optimal quality/speed balance)") print(" • Guidance: 2.5 (natural garment integration)") print(" • Seed: 42 (reproducible)") print(" • Estimated time: 2-3 minutes\n") # IMPROVED SETTINGS for natural fit result = pipeline( image=person_img, condition_image=cloth_img, mask=mask, num_inference_steps=50, # CHANGED: 50 is optimal (60 can over-process) guidance_scale=2.5, # CHANGED: 2.5 for natural look (5.0 was too rigid) seed=42, height=target_height, width=target_width )[0] result.save("result.jpg", quality=95) # CHANGED: 95 quality (98 can show artifacts) print("\nāœ… SUCCESS! Result saved to result.jpg")