Spaces:

coralLight
/

Hyperparameters-are-all-you-need-UniPC-XL

Running on Zero

Hyperparameters-are-all-you-need-UniPC-XL

File size: 11,584 Bytes

import gradio as gr
import numpy as np
import random
import json
import spaces #[uncomment to use ZeroGPU]
from diffusers import (
    AutoencoderKL,
    StableDiffusionXLPipeline,
)
from huggingface_hub import login, hf_hub_download
from PIL import Image
# from huggingface_hub import login
from SVDNoiseUnet import NPNet64
import functools
import random
from free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d
import torch
import torch.nn as nn
from einops import rearrange
from torchvision.utils import make_grid
import time
from pytorch_lightning import seed_everything
from torch import autocast
from contextlib import contextmanager, nullcontext
import accelerate
import torchsde
from SVDNoiseUnet import NPNet128
from tqdm import tqdm, trange
from itertools import islice
device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "Lykon/dreamshaper-xl-1-0"  # Replace to the model you would like to use
from sampler import UniPCSampler

precision_scope = autocast

def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())


def numpy_to_pil(images):
    """
    Convert a numpy image or a batch of images to a PIL image.
    """
    if images.ndim == 3:
        images = images[None, ...]
    images = (images * 255).round().astype("uint8")
    pil_images = [Image.fromarray(image) for image in images]

    return pil_images


def load_replacement(x):
    try:
        hwc = x.shape
        y = Image.open("assets/rick.jpeg").convert("RGB").resize((hwc[1], hwc[0]))
        y = (np.array(y) / 255.0).astype(x.dtype)
        assert y.shape == x.shape
        return y
    except Exception:
        return x


# Adapted from pipelines.StableDiffusionPipeline.encode_prompt
def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompts, is_train=True):
    captions = []
    for caption in prompt_batch:
        if random.random() < proportion_empty_prompts:
            captions.append("")
        elif isinstance(caption, str):
            captions.append(caption)
        elif isinstance(caption, (list, np.ndarray)):
            # take a random caption if there are multiple
            captions.append(random.choice(caption) if is_train else caption[0])

    with torch.no_grad():
        text_inputs = tokenizer(
            captions,
            padding="max_length",
            max_length=tokenizer.model_max_length,
            truncation=True,
            return_tensors="pt",
        )
        text_input_ids = text_inputs.input_ids
        prompt_embeds = text_encoder(text_input_ids.to(text_encoder.device))[0]

    return prompt_embeds

def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())

def convert_caption_json_to_str(json):
    caption = json["caption"]
    return caption

def prepare_sdxl_pipeline_step_parameter(pipe, prompts, need_cfg, device, negative_prompts, W = 1024, H = 1024):
    (
        prompt_embeds,
        negative_prompt_embeds,
        pooled_prompt_embeds,
        negative_pooled_prompt_embeds,
    ) = pipe.encode_prompt(
        prompt=prompts,
        negative_prompt=negative_prompts,
        device=device,
        do_classifier_free_guidance=need_cfg,
    )
    # timesteps = pipe.scheduler.timesteps
    
    prompt_embeds = prompt_embeds.to(device)
    add_text_embeds = pooled_prompt_embeds.to(device)
    original_size = (W, H)
    crops_coords_top_left = (0, 0)
    target_size = (W, H)
    text_encoder_projection_dim = None
    add_time_ids = list(original_size + crops_coords_top_left + target_size)
    if pipe.text_encoder_2 is None:
        text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
    else:
        text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim
    passed_add_embed_dim = (
        pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim
    )
    expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features
    if expected_add_embed_dim != passed_add_embed_dim:
        raise ValueError(
            f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
        )
    add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype)
    add_time_ids = add_time_ids.to(device)
    negative_add_time_ids = add_time_ids

    if need_cfg:
        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
        add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
        add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
    ret_dict = {
        "text_embeds": add_text_embeds,
        "time_ids": add_time_ids
    }
    return prompt_embeds, ret_dict


def model_closure(pipe):
    def model_fn(x, t, c):
        prompt = c[0]
        cond_kwargs = c[1] if len(c) > 1 else None
        # prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe=pipe,prompts = prompt, need_cfg=True, device=pipe.device,negative_prompts=negative_prompt)
        # prompt_embeds, cond_kwargs = c
        return pipe.unet(x
                         , t
                         , encoder_hidden_states=prompt.to(device=x.device, dtype=x.dtype)
                         , added_cond_kwargs=cond_kwargs).sample

    return model_fn


torch_dtype = torch.float16
repo_id = "madebyollin/sdxl-vae-fp16-fix"  # e.g., "distilbert/distilgpt2"
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix",torch_dtype=torch_dtype) #from_single_file(downloaded_path, torch_dtype=torch_dtype)
vae.to('cuda')
    
pipe = StableDiffusionXLPipeline.from_pretrained("John6666/illustrij-evo-lvl3-sdxl",torch_dtype=torch_dtype,vae=vae)
    # pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",torch_dtype=torch.float16,vae=vae)
 
pipe.to('cuda')
    


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

accelerator = accelerate.Accelerator()

def generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps):
    """Helper function to generate image with specific number of steps"""
    prompts =  [prompt]
    sampler = UniPCSampler(pipe,model_closure=model_closure, steps=num_inference_steps, guidance_scale=guidance_scale)
    c = prompts
    uc = ['(worst quality:2), (low quality:2), (normal quality:2), bad anatomy, bad proportions, poorly drawn face, poorly drawn hands, missing fingers, extra limbs, blurry, pixelated, distorted, lowres, jpeg artifacts, watermark, signature, text, (deformed:1.5), (bad hands:1.3), overexposed, underexposed, censored, mutated, extra fingers, cloned face, bad eyes'] * len(c) if guidance_scale != 1.0 else None
    shape = [4, width // 8, height // 8]
                    # if opt.method == "dpm_solver_v3":
                            # batch_size, shape, conditioning, x_T, unconditional_conditioning
    samples, _ = sampler.sample(
        conditioning=c,
        batch_size=1,
        shape=shape,
        unconditional_conditioning=uc,
        x_T=None,
        start_free_u_step=6 if num_inference_steps == 8 else 4,
        xl_preprocess_closure = prepare_sdxl_pipeline_step_parameter,
                        # npnet = npn_net,
        use_corrector=True,
    )

    x_samples = pipe.vae.decode(samples / pipe.vae.config.scaling_factor).sample
    x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
    x_samples = x_samples.cpu().permute(0, 2, 3, 1).numpy()

    x_image_torch = torch.from_numpy(x_samples).permute(0, 3, 1, 2) # need to pay attention

    for x_sample in x_image_torch:
        x_sample = 255.0 * rearrange(x_sample.cpu().numpy(), "c h w -> h w c")
        img = Image.fromarray(x_sample.astype(np.uint8))
    return img

@spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    resolution,
    guidance_scale,
    num_inference_steps,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    
    # Parse resolution string into width and height
    width, height = map(int, resolution.split('x'))
    
    # Generate image with selected steps
    image_quick = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps)
    
    # Generate image with 50 steps for high quality
    image_50_steps = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, 50)

    return image_quick, image_50_steps, seed


examples = [
    "Astronaut in a jungle, cold color, muted colors, detailed, 8k",
    "a painting of a virus monster playing guitar",
    "a painting of a squirrel eating a burger",
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Hyperparameters are all you need")

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )

            run_button = gr.Button("Run", scale=0, variant="primary")

        with gr.Row():
            with gr.Column():
                gr.Markdown("### Our fast inference Result")
                result = gr.Image(label="Quick Result", show_label=False)
            with gr.Column():
                gr.Markdown("### Original 50 steps Result")
                result_50_steps = gr.Image(label="50 Steps Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                visible=False,
            )

            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            resolution = gr.Dropdown(
                choices=[
                    "1024x1024",
                    "1216x832",
                    "832x1216"
                ],
                value="1024x1024",
                label="Resolution",
            )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=7.5,  # Replace with defaults that work for your model
                )

                num_inference_steps = gr.Dropdown(
                    choices=[6, 8],
                    value=8,
                    label="Number of inference steps",
                )

        gr.Examples(examples=examples, inputs=[prompt])
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            resolution,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[result, result_50_steps, seed],
    )

if __name__ == "__main__":
    demo.launch()