Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import json | |
| import spaces #[uncomment to use ZeroGPU] | |
| from diffusers import ( | |
| AutoencoderKL, | |
| StableDiffusionXLPipeline, | |
| ) | |
| from huggingface_hub import login, hf_hub_download | |
| from PIL import Image | |
| # from huggingface_hub import login | |
| from SVDNoiseUnet import NPNet64 | |
| import functools | |
| import random | |
| from free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d | |
| import torch | |
| import torch.nn as nn | |
| from einops import rearrange | |
| from torchvision.utils import make_grid | |
| import time | |
| from pytorch_lightning import seed_everything | |
| from torch import autocast | |
| from contextlib import contextmanager, nullcontext | |
| import accelerate | |
| import torchsde | |
| from SVDNoiseUnet import NPNet128 | |
| from tqdm import tqdm, trange | |
| from itertools import islice | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_repo_id = "Lykon/dreamshaper-xl-1-0" # Replace to the model you would like to use | |
| from sampler import UniPCSampler | |
| precision_scope = autocast | |
| def chunk(it, size): | |
| it = iter(it) | |
| return iter(lambda: tuple(islice(it, size)), ()) | |
| def numpy_to_pil(images): | |
| """ | |
| Convert a numpy image or a batch of images to a PIL image. | |
| """ | |
| if images.ndim == 3: | |
| images = images[None, ...] | |
| images = (images * 255).round().astype("uint8") | |
| pil_images = [Image.fromarray(image) for image in images] | |
| return pil_images | |
| def load_replacement(x): | |
| try: | |
| hwc = x.shape | |
| y = Image.open("assets/rick.jpeg").convert("RGB").resize((hwc[1], hwc[0])) | |
| y = (np.array(y) / 255.0).astype(x.dtype) | |
| assert y.shape == x.shape | |
| return y | |
| except Exception: | |
| return x | |
| # Adapted from pipelines.StableDiffusionPipeline.encode_prompt | |
| def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empty_prompts, is_train=True): | |
| captions = [] | |
| for caption in prompt_batch: | |
| if random.random() < proportion_empty_prompts: | |
| captions.append("") | |
| elif isinstance(caption, str): | |
| captions.append(caption) | |
| elif isinstance(caption, (list, np.ndarray)): | |
| # take a random caption if there are multiple | |
| captions.append(random.choice(caption) if is_train else caption[0]) | |
| with torch.no_grad(): | |
| text_inputs = tokenizer( | |
| captions, | |
| padding="max_length", | |
| max_length=tokenizer.model_max_length, | |
| truncation=True, | |
| return_tensors="pt", | |
| ) | |
| text_input_ids = text_inputs.input_ids | |
| prompt_embeds = text_encoder(text_input_ids.to(text_encoder.device))[0] | |
| return prompt_embeds | |
| def chunk(it, size): | |
| it = iter(it) | |
| return iter(lambda: tuple(islice(it, size)), ()) | |
| def convert_caption_json_to_str(json): | |
| caption = json["caption"] | |
| return caption | |
| def prepare_sdxl_pipeline_step_parameter(pipe, prompts, need_cfg, device, negative_prompts, W = 1024, H = 1024): | |
| ( | |
| prompt_embeds, | |
| negative_prompt_embeds, | |
| pooled_prompt_embeds, | |
| negative_pooled_prompt_embeds, | |
| ) = pipe.encode_prompt( | |
| prompt=prompts, | |
| negative_prompt=negative_prompts, | |
| device=device, | |
| do_classifier_free_guidance=need_cfg, | |
| ) | |
| # timesteps = pipe.scheduler.timesteps | |
| prompt_embeds = prompt_embeds.to(device) | |
| add_text_embeds = pooled_prompt_embeds.to(device) | |
| original_size = (W, H) | |
| crops_coords_top_left = (0, 0) | |
| target_size = (W, H) | |
| text_encoder_projection_dim = None | |
| add_time_ids = list(original_size + crops_coords_top_left + target_size) | |
| if pipe.text_encoder_2 is None: | |
| text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1]) | |
| else: | |
| text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim | |
| passed_add_embed_dim = ( | |
| pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim | |
| ) | |
| expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features | |
| if expected_add_embed_dim != passed_add_embed_dim: | |
| raise ValueError( | |
| f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`." | |
| ) | |
| add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype) | |
| add_time_ids = add_time_ids.to(device) | |
| negative_add_time_ids = add_time_ids | |
| if need_cfg: | |
| prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) | |
| add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) | |
| add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0) | |
| ret_dict = { | |
| "text_embeds": add_text_embeds, | |
| "time_ids": add_time_ids | |
| } | |
| return prompt_embeds, ret_dict | |
| def model_closure(pipe): | |
| def model_fn(x, t, c): | |
| prompt = c[0] | |
| cond_kwargs = c[1] if len(c) > 1 else None | |
| # prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe=pipe,prompts = prompt, need_cfg=True, device=pipe.device,negative_prompts=negative_prompt) | |
| # prompt_embeds, cond_kwargs = c | |
| return pipe.unet(x | |
| , t | |
| , encoder_hidden_states=prompt.to(device=x.device, dtype=x.dtype) | |
| , added_cond_kwargs=cond_kwargs).sample | |
| return model_fn | |
| torch_dtype = torch.float16 | |
| repo_id = "madebyollin/sdxl-vae-fp16-fix" # e.g., "distilbert/distilgpt2" | |
| vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix",torch_dtype=torch_dtype) #from_single_file(downloaded_path, torch_dtype=torch_dtype) | |
| vae.to('cuda') | |
| pipe = StableDiffusionXLPipeline.from_pretrained("John6666/illustrij-evo-lvl3-sdxl",torch_dtype=torch_dtype,vae=vae) | |
| # pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",torch_dtype=torch.float16,vae=vae) | |
| pipe.to('cuda') | |
| MAX_SEED = np.iinfo(np.int32).max | |
| MAX_IMAGE_SIZE = 1024 | |
| accelerator = accelerate.Accelerator() | |
| def generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps): | |
| """Helper function to generate image with specific number of steps""" | |
| prompts = [prompt] | |
| sampler = UniPCSampler(pipe,model_closure=model_closure, steps=num_inference_steps, guidance_scale=guidance_scale) | |
| c = prompts | |
| uc = ['(worst quality:2), (low quality:2), (normal quality:2), bad anatomy, bad proportions, poorly drawn face, poorly drawn hands, missing fingers, extra limbs, blurry, pixelated, distorted, lowres, jpeg artifacts, watermark, signature, text, (deformed:1.5), (bad hands:1.3), overexposed, underexposed, censored, mutated, extra fingers, cloned face, bad eyes'] * len(c) if guidance_scale != 1.0 else None | |
| shape = [4, width // 8, height // 8] | |
| # if opt.method == "dpm_solver_v3": | |
| # batch_size, shape, conditioning, x_T, unconditional_conditioning | |
| samples, _ = sampler.sample( | |
| conditioning=c, | |
| batch_size=1, | |
| shape=shape, | |
| unconditional_conditioning=uc, | |
| x_T=None, | |
| start_free_u_step=6 if num_inference_steps == 8 else 4, | |
| xl_preprocess_closure = prepare_sdxl_pipeline_step_parameter, | |
| # npnet = npn_net, | |
| use_corrector=True, | |
| ) | |
| x_samples = pipe.vae.decode(samples / pipe.vae.config.scaling_factor).sample | |
| x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) | |
| x_samples = x_samples.cpu().permute(0, 2, 3, 1).numpy() | |
| x_image_torch = torch.from_numpy(x_samples).permute(0, 3, 1, 2) # need to pay attention | |
| for x_sample in x_image_torch: | |
| x_sample = 255.0 * rearrange(x_sample.cpu().numpy(), "c h w -> h w c") | |
| img = Image.fromarray(x_sample.astype(np.uint8)) | |
| return img | |
| #[uncomment to use ZeroGPU] | |
| def infer( | |
| prompt, | |
| negative_prompt, | |
| seed, | |
| randomize_seed, | |
| resolution, | |
| guidance_scale, | |
| num_inference_steps, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| # Parse resolution string into width and height | |
| width, height = map(int, resolution.split('x')) | |
| # Generate image with selected steps | |
| image_quick = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps) | |
| # Generate image with 50 steps for high quality | |
| image_50_steps = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, 50) | |
| return image_quick, image_50_steps, seed | |
| examples = [ | |
| "Astronaut in a jungle, cold color, muted colors, detailed, 8k", | |
| "a painting of a virus monster playing guitar", | |
| "a painting of a squirrel eating a burger", | |
| ] | |
| css = """ | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 640px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown(" # Hyperparameters are all you need") | |
| with gr.Row(): | |
| prompt = gr.Text( | |
| label="Prompt", | |
| show_label=False, | |
| max_lines=1, | |
| placeholder="Enter your prompt", | |
| container=False, | |
| ) | |
| run_button = gr.Button("Run", scale=0, variant="primary") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Our fast inference Result") | |
| result = gr.Image(label="Quick Result", show_label=False) | |
| with gr.Column(): | |
| gr.Markdown("### Original 50 steps Result") | |
| result_50_steps = gr.Image(label="50 Steps Result", show_label=False) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| negative_prompt = gr.Text( | |
| label="Negative prompt", | |
| max_lines=1, | |
| placeholder="Enter a negative prompt", | |
| visible=False, | |
| ) | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| resolution = gr.Dropdown( | |
| choices=[ | |
| "1024x1024", | |
| "1216x832", | |
| "832x1216" | |
| ], | |
| value="1024x1024", | |
| label="Resolution", | |
| ) | |
| with gr.Row(): | |
| guidance_scale = gr.Slider( | |
| label="Guidance scale", | |
| minimum=0.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=7.5, # Replace with defaults that work for your model | |
| ) | |
| num_inference_steps = gr.Dropdown( | |
| choices=[6, 8], | |
| value=8, | |
| label="Number of inference steps", | |
| ) | |
| gr.Examples(examples=examples, inputs=[prompt]) | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[ | |
| prompt, | |
| negative_prompt, | |
| seed, | |
| randomize_seed, | |
| resolution, | |
| guidance_scale, | |
| num_inference_steps, | |
| ], | |
| outputs=[result, result_50_steps, seed], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |