import argparse
import json
from pathlib import Path
from datetime import datetime
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from torchvision.utils import make_grid
from diffusers import StableDiffusionXLPipeline, AutoencoderKL

try:
    from pytorch_msssim import ssim, ms_ssim
except ImportError:
    print("Installing pytorch-msssim...")
    import subprocess
    subprocess.check_call(["pip", "install", "pytorch-msssim"])
    from pytorch_msssim import ssim, ms_ssim


def add_caption_to_image(image, caption, font_size=20):
    """Add caption to image and return as tensor"""
    # Convert tensor to PIL Image if needed
    if isinstance(image, torch.Tensor):
        image = (image * 255).clamp(0, 255).to(torch.uint8)
        image = image.permute(1, 2, 0).cpu().numpy()
        image = Image.fromarray(image)
    
    # Create new image with space for caption
    margin = 10
    width = image.width
    height = image.height + font_size + 2*margin
    new_image = Image.new('RGB', (width, height), 'white')
    new_image.paste(image, (0, 0))
    
    # Add caption
    draw = ImageDraw.Draw(new_image)
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
    except:
        font = ImageFont.load_default()
    
    # Center the text
    text_width = draw.textlength(caption, font=font)
    x = (width - text_width) // 2
    y = height - font_size - margin
    
    draw.text((x, y), caption, fill='black', font=font)
    
    # Convert back to tensor
    new_image = torch.from_numpy(np.array(new_image)).permute(2, 0, 1).float() / 255.0
    return new_image


def create_image_grid(images, prompts, images_per_prompt, font_size=20):
    """Create a grid of images with captions"""
    # First add captions to all images
    captioned_images = []
    for i, img in enumerate(images):
        prompt_idx = i // images_per_prompt
        img_idx = i % images_per_prompt + 1
        caption = f"{prompts[prompt_idx]} ({img_idx}/{images_per_prompt})"
        img_tensor = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0
        captioned_img = add_caption_to_image(img_tensor, caption, font_size)
        captioned_images.append(captioned_img)
    
    # Convert to tensor and create grid
    image_tensor = torch.stack(captioned_images)
    grid = make_grid(image_tensor, nrow=images_per_prompt, padding=10)
    
    return grid


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--output_path", type=str, required=True, help="path to save the images"
    )
    parser.add_argument(
        "--content_LoRA", type=str, default=None, help="path for the content LoRA"
    )
    parser.add_argument(
        "--content_alpha", type=float, default=1.0, help="scale factor for content LoRA weights"
    )
    parser.add_argument(
        "--style_LoRA", type=str, default=None, help="path for the style LoRA"
    )
    parser.add_argument(
        "--style_alpha", type=float, default=1.0, help="scale factor for style LoRA weights"
    )
    parser.add_argument(
        "--num_images_per_prompt", type=int, default=4, help="number of images per prompt"
    )
    parser.add_argument(
        "--evaluation_prompt_file", type=str, required=True, help="path to evaluation prompts file"
    )
    parser.add_argument(
        "--placeholder_style", type=str, required=True, help="placeholder for the style prompt"
    )
    parser.add_argument(
        "--placeholder_content", type=str, required=True, help="placeholder for the content prompt"
    )
    parser.add_argument(
        "--name_concept", type=str, required=True, help="name of the concept being evaluated"
    )
    parser.add_argument(
        "--font_size", type=int, default=20, help="font size for image captions"
    )
    return parser.parse_args()


def process_prompts(pipeline, prompts, output_dir, args, prompt_type, lora_type, start_idx=0):
    """Process a set of prompts and save results"""
    all_images = []
    current_idx = start_idx
    
    for prompt in prompts:
        formatted_prompt = prompt.replace("{}", args.placeholder_style if lora_type == "style" else args.placeholder_content)
        
        # Update config to use new argument names
        config = {
            "gen_prompt": formatted_prompt,
            "content_LoRA": args.content_LoRA if lora_type == "content" else None,
            "content_alpha": args.content_alpha if lora_type == "content" else None,
            "style_LoRA": args.style_LoRA if lora_type == "style" else None,
            "style_alpha": args.style_alpha if lora_type == "style" else None
        }
        
        # Save config with consecutive numbering
        config_path = output_dir / f'prompt_{current_idx}_params.json'
        with open(config_path, 'w') as f:
            json.dump(config, f, indent=4)

        # Generate images
        images = pipeline(formatted_prompt, num_images_per_prompt=args.num_images_per_prompt).images
        all_images.extend(images)

        # Save individual images with consecutive numbering
        prompt_dir = output_dir / 'output' / 'ours' / f'prompt_{current_idx}_{prompt_type}'
        prompt_dir.mkdir(parents=True, exist_ok=True)
        
        for img_idx, img in enumerate(images):
            img.save(prompt_dir / f'{img_idx:03d}.jpg')
            
        current_idx += 1
    
    return all_images, [p.replace("{}", args.placeholder_style if lora_type == "style" else args.placeholder_content) for p in prompts], current_idx


if __name__ == '__main__':
    args = parse_args()
    
    # Create timestamped output directory
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    result_dir = Path(args.output_path) / f'{args.name_concept}_{timestamp}'
    result_dir.mkdir(parents=True, exist_ok=True)

    # Load benchmark prompts
    with open(args.evaluation_prompt_file, 'r') as f:
        benchmark_prompts = json.load(f)

    # Initialize pipeline
    vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
    pipeline = StableDiffusionXLPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        vae=vae,
        torch_dtype=torch.float16
    ).to("cuda")

    current_prompt_idx = 0

    # Process content prompts if content LoRA is provided
    if args.content_LoRA is not None:
        print("Loading content LoRA...")
        pipeline.load_lora_weights(args.content_LoRA, scale=args.content_alpha)

        for category, prompts in benchmark_prompts["content"].items():
            print(f"Processing content {category} prompts...")
            images, formatted_prompts, current_prompt_idx = process_prompts(
                pipeline, prompts, result_dir, args, f"content_{category}", "content",
                start_idx=current_prompt_idx
            )
            
            grid = create_image_grid(images, formatted_prompts, args.num_images_per_prompt, args.font_size)
            grid_image = Image.fromarray((grid.permute(1, 2, 0).numpy() * 255).astype(np.uint8))
            grid_path = result_dir / f'grid_content_{category}.png'
            grid_image.save(grid_path)

        # Unload content LoRA
        pipeline.unload_lora_weights()

    # Process style prompts if style LoRA is provided
    if args.style_LoRA is not None:
        print("Loading style LoRA...")
        pipeline.load_lora_weights(args.style_LoRA, scale=args.style_alpha)

        print("Processing style prompts...")
        images, formatted_prompts, _ = process_prompts(
            pipeline, benchmark_prompts["style"], result_dir, args, "style", "style",
            start_idx=current_prompt_idx
        )
        
        grid = create_image_grid(images, formatted_prompts, args.num_images_per_prompt, args.font_size)
        grid_image = Image.fromarray((grid.permute(1, 2, 0).numpy() * 255).astype(np.uint8))
        grid_path = result_dir / 'grid_style.png'
        grid_image.save(grid_path)

    print(f"Results saved to {result_dir}")