import os
import sys
import torch
from PIL import Image
import gc

sys.path.insert(0, '/kaggle/working/CatVTON')

from model.pipeline import CatVTONPipeline
from model.cloth_masker import AutoMasker
from utils import init_weight_dtype, resize_and_crop, resize_and_padding

# Clear GPU memory
torch.cuda.empty_cache()
gc.collect()

print("🔄 Loading CatVTON models...")

from huggingface_hub import snapshot_download
repo_path = snapshot_download(repo_id="zhengchong/CatVTON")

pipeline = CatVTONPipeline(
    base_ckpt="booksforcharlie/stable-diffusion-inpainting",
    attn_ckpt=repo_path,
    attn_ckpt_version="mix",
    weight_dtype=init_weight_dtype("fp16"),
    use_tf32=True,  # CHANGED: Enable TF32 for better quality
    device='cuda'
)

automasker = AutoMasker(
    densepose_ckpt=os.path.join(repo_path, "DensePose"),
    schp_ckpt=os.path.join(repo_path, "SCHP"),
    device='cuda'
)

print("✅ Models loaded!\n")

# Load images
print("📂 Loading images...")
person_img = Image.open("person.jpg").convert("RGB")
cloth_img = Image.open("garment.jpg").convert("RGB")

print(f"   Person: {person_img.size[0]}x{person_img.size[1]} px")
print(f"   Garment: {cloth_img.size[0]}x{cloth_img.size[1]} px")

# IMPROVED: Better resolution for more natural results
target_height = 1024
target_width = 768

person_img = resize_and_crop(person_img, (target_width, target_height))
cloth_img = resize_and_padding(cloth_img, (target_width, target_height))

print("\n🎭 Generating body mask...")
mask = automasker(person_img, "upper")['mask']

torch.cuda.empty_cache()

print("\n⚙️  Running inference with MAXIMUM NATURAL FIT settings...")
print(f"   • Resolution: {target_width}x{target_height} HD")
print("   • Steps: 50 (optimal quality/speed balance)")
print("   • Guidance: 2.5 (natural garment integration)")
print("   • Seed: 42 (reproducible)")
print("   • Estimated time: 2-3 minutes\n")

# IMPROVED SETTINGS for natural fit
result = pipeline(
    image=person_img,
    condition_image=cloth_img,
    mask=mask,
    num_inference_steps=50,  # CHANGED: 50 is optimal (60 can over-process)
    guidance_scale=2.5,       # CHANGED: 2.5 for natural look (5.0 was too rigid)
    seed=42,
    height=target_height,
    width=target_width
)[0]

result.save("result.jpg", quality=95)  # CHANGED: 95 quality (98 can show artifacts)
print("\n✅ SUCCESS! Result saved to result.jpg")