import gradio as gr import numpy as np import random import json import spaces #[uncomment to use ZeroGPU] from diffusers import ( AutoencoderKL, StableDiffusionXLPipeline, ) from huggingface_hub import login, hf_hub_download from PIL import Image # from huggingface_hub import login from SVDNoiseUnet import NPNet64 import functools import random from free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d import torch import torch.nn as nn from einops import rearrange from torchvision.utils import make_grid import time from pytorch_lightning import seed_everything from torch import autocast from contextlib import contextmanager, nullcontext import accelerate import torchsde from SVDNoiseUnet import NPNet128 from tqdm import tqdm, trange from itertools import islice device = "cuda" if torch.cuda.is_available() else "cpu" model_repo_id = "Lykon/dreamshaper-xl-1-0" # Replace to the model you would like to use from sampler import UniPCSampler precision_scope = autocast def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) def numpy_to_pil(images): """ Convert a numpy image or a batch of images to a PIL image. """ if images.ndim == 3: images = images[None, ...] images = (images * 255).round().astype("uint8") pil_images = [Image.fromarray(image) for image in images] return pil_images def load_replacement(x): try: hwc = x.shape y = Image.open("assets/rick.jpeg").convert("RGB").resize((hwc[1], hwc[0])) y = (np.array(y) / 255.0).astype(x.dtype) assert y.shape == x.shape return y except Exception: return x # Adapted from pipelines.StableDiffusionPipeline.encode_prompt def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompts, is_train=True): captions = [] for caption in prompt_batch: if random.random() < proportion_empty_prompts: captions.append("") elif isinstance(caption, str): captions.append(caption) elif isinstance(caption, (list, np.ndarray)): # take a random caption if there are multiple captions.append(random.choice(caption) if is_train else caption[0]) with torch.no_grad(): text_inputs = tokenizer( captions, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids prompt_embeds = text_encoder(text_input_ids.to(text_encoder.device))[0] return prompt_embeds def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) def convert_caption_json_to_str(json): caption = json["caption"] return caption def prepare_sdxl_pipeline_step_parameter(pipe, prompts, need_cfg, device, negative_prompts, W = 1024, H = 1024): ( prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, ) = pipe.encode_prompt( prompt=prompts, negative_prompt=negative_prompts, device=device, do_classifier_free_guidance=need_cfg, ) # timesteps = pipe.scheduler.timesteps prompt_embeds = prompt_embeds.to(device) add_text_embeds = pooled_prompt_embeds.to(device) original_size = (W, H) crops_coords_top_left = (0, 0) target_size = (W, H) text_encoder_projection_dim = None add_time_ids = list(original_size + crops_coords_top_left + target_size) if pipe.text_encoder_2 is None: text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1]) else: text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim passed_add_embed_dim = ( pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim ) expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features if expected_add_embed_dim != passed_add_embed_dim: raise ValueError( f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`." ) add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype) add_time_ids = add_time_ids.to(device) negative_add_time_ids = add_time_ids if need_cfg: prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0) ret_dict = { "text_embeds": add_text_embeds, "time_ids": add_time_ids } return prompt_embeds, ret_dict def model_closure(pipe): def model_fn(x, t, c): prompt = c[0] cond_kwargs = c[1] if len(c) > 1 else None # prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe=pipe,prompts = prompt, need_cfg=True, device=pipe.device,negative_prompts=negative_prompt) # prompt_embeds, cond_kwargs = c return pipe.unet(x , t , encoder_hidden_states=prompt.to(device=x.device, dtype=x.dtype) , added_cond_kwargs=cond_kwargs).sample return model_fn torch_dtype = torch.float16 repo_id = "madebyollin/sdxl-vae-fp16-fix" # e.g., "distilbert/distilgpt2" vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix",torch_dtype=torch_dtype) #from_single_file(downloaded_path, torch_dtype=torch_dtype) vae.to('cuda') pipe = StableDiffusionXLPipeline.from_pretrained("John6666/illustrij-evo-lvl3-sdxl",torch_dtype=torch_dtype,vae=vae) # pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",torch_dtype=torch.float16,vae=vae) pipe.to('cuda') MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 accelerator = accelerate.Accelerator() def generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps): """Helper function to generate image with specific number of steps""" prompts = [prompt] sampler = UniPCSampler(pipe,model_closure=model_closure, steps=num_inference_steps, guidance_scale=guidance_scale) c = prompts uc = ['(worst quality:2), (low quality:2), (normal quality:2), bad anatomy, bad proportions, poorly drawn face, poorly drawn hands, missing fingers, extra limbs, blurry, pixelated, distorted, lowres, jpeg artifacts, watermark, signature, text, (deformed:1.5), (bad hands:1.3), overexposed, underexposed, censored, mutated, extra fingers, cloned face, bad eyes'] * len(c) if guidance_scale != 1.0 else None shape = [4, width // 8, height // 8] # if opt.method == "dpm_solver_v3": # batch_size, shape, conditioning, x_T, unconditional_conditioning samples, _ = sampler.sample( conditioning=c, batch_size=1, shape=shape, unconditional_conditioning=uc, x_T=None, start_free_u_step=6 if num_inference_steps == 8 else 4, xl_preprocess_closure = prepare_sdxl_pipeline_step_parameter, # npnet = npn_net, use_corrector=True, ) x_samples = pipe.vae.decode(samples / pipe.vae.config.scaling_factor).sample x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) x_samples = x_samples.cpu().permute(0, 2, 3, 1).numpy() x_image_torch = torch.from_numpy(x_samples).permute(0, 3, 1, 2) # need to pay attention for x_sample in x_image_torch: x_sample = 255.0 * rearrange(x_sample.cpu().numpy(), "c h w -> h w c") img = Image.fromarray(x_sample.astype(np.uint8)) return img @spaces.GPU #[uncomment to use ZeroGPU] def infer( prompt, negative_prompt, seed, randomize_seed, resolution, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) # Parse resolution string into width and height width, height = map(int, resolution.split('x')) # Generate image with selected steps image_quick = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps) # Generate image with 50 steps for high quality image_50_steps = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, 50) return image_quick, image_50_steps, seed examples = [ "Astronaut in a jungle, cold color, muted colors, detailed, 8k", "a painting of a virus monster playing guitar", "a painting of a squirrel eating a burger", ] css = """ #col-container { margin: 0 auto; max-width: 640px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(" # Hyperparameters are all you need") with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0, variant="primary") with gr.Row(): with gr.Column(): gr.Markdown("### Our fast inference Result") result = gr.Image(label="Quick Result", show_label=False) with gr.Column(): gr.Markdown("### Original 50 steps Result") result_50_steps = gr.Image(label="50 Steps Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", visible=False, ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) resolution = gr.Dropdown( choices=[ "1024x1024", "1216x832", "832x1216" ], value="1024x1024", label="Resolution", ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=7.5, # Replace with defaults that work for your model ) num_inference_steps = gr.Dropdown( choices=[6, 8], value=8, label="Number of inference steps", ) gr.Examples(examples=examples, inputs=[prompt]) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ prompt, negative_prompt, seed, randomize_seed, resolution, guidance_scale, num_inference_steps, ], outputs=[result, result_50_steps, seed], ) if __name__ == "__main__": demo.launch()