import gradio as gr import numpy as np import random import json import spaces #[uncomment to use ZeroGPU] from diffusers import ( AutoencoderKL, StableDiffusionXLPipeline, DPMSolverMultistepScheduler ) from huggingface_hub import login, hf_hub_download from PIL import Image # from huggingface_hub import login from SVDNoiseUnet import NPNet64 import functools import random from free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d import torch import torch.nn as nn from einops import rearrange from torchvision.utils import make_grid import time from pytorch_lightning import seed_everything from torch import autocast from contextlib import contextmanager, nullcontext import accelerate import torchsde from SVDNoiseUnet import NPNet128 from tqdm import tqdm, trange from itertools import islice device = "cuda" if torch.cuda.is_available() else "cpu" model_repo_id = "Lykon/dreamshaper-xl-1-0" # Replace to the model you would like to use from sampler import UniPCSampler from customed_unipc_scheduler import CustomedUniPCMultistepScheduler precision_scope = autocast def extract_into_tensor(a, t, x_shape): b, *_ = t.shape out = a.gather(-1, t) return out.reshape(b, *((1,) * (len(x_shape) - 1))) def append_zero(x): return torch.cat([x, x.new_zeros([1])]) def prepare_sdxl_pipeline_step_parameter( pipe: StableDiffusionXLPipeline , prompts , need_cfg , device , negative_prompt = None , W = 1024 , H = 1024): # need to correct the format ( prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, ) = pipe.encode_prompt( prompt=prompts, negative_prompt=negative_prompt, device=device, do_classifier_free_guidance=need_cfg, ) # timesteps = pipe.scheduler.timesteps prompt_embeds = prompt_embeds.to(device) add_text_embeds = pooled_prompt_embeds.to(device) original_size = (W, H) crops_coords_top_left = (0, 0) target_size = (W, H) text_encoder_projection_dim = None add_time_ids = list(original_size + crops_coords_top_left + target_size) if pipe.text_encoder_2 is None: text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1]) else: text_encoder_projection_dim = pipe.text_encoder_2.config.projection_dim passed_add_embed_dim = ( pipe.unet.config.addition_time_embed_dim * len(add_time_ids) + text_encoder_projection_dim ) expected_add_embed_dim = pipe.unet.add_embedding.linear_1.in_features if expected_add_embed_dim != passed_add_embed_dim: raise ValueError( f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`." ) add_time_ids = torch.tensor([add_time_ids], dtype=prompt_embeds.dtype) add_time_ids = add_time_ids.to(device) negative_add_time_ids = add_time_ids if need_cfg: prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0) add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0) ret_dict = { "text_embeds": add_text_embeds, "time_ids": add_time_ids } return prompt_embeds, ret_dict # New helper to load a list-of-dicts preference JSON # JSON schema: [ { 'human_preference': [int], 'prompt': str, 'file_path': [str] }, ... ] def load_preference_json(json_path: str) -> list[dict]: """Load records from a JSON file formatted as a list of preference dicts.""" with open(json_path, 'r') as f: data = json.load(f) return data # New helper to extract just the prompts from the preference JSON # Returns a flat list of all 'prompt' values def extract_prompts_from_pref_json(json_path: str) -> list[str]: """Load a JSON of preference records and return only the prompts.""" records = load_preference_json(json_path) return [rec['prompt'] for rec in records] # Example usage: # prompts = extract_prompts_from_pref_json("path/to/preference.json") # print(prompts) def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu',need_append_zero = True): """Constructs the noise schedule of Karras et al. (2022).""" ramp = torch.linspace(0, 1, n) min_inv_rho = sigma_min ** (1 / rho) max_inv_rho = sigma_max ** (1 / rho) sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho return append_zero(sigmas).to(device) if need_append_zero else sigmas.to(device) def extract_into_tensor(a, t, x_shape): b, *_ = t.shape out = a.gather(-1, t) return out.reshape(b, *((1,) * (len(x_shape) - 1))) def append_zero(x): return torch.cat([x, x.new_zeros([1])]) def append_dims(x, target_dims): """Appends dimensions to the end of a tensor until it has target_dims dimensions.""" dims_to_append = target_dims - x.ndim if dims_to_append < 0: raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') return x[(...,) + (None,) * dims_to_append] def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) def convert_caption_json_to_str(json): caption = json["caption"] return caption torch_dtype = torch.float16 repo_id = "madebyollin/sdxl-vae-fp16-fix" # e.g., "distilbert/distilgpt2" vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix",torch_dtype=torch_dtype) #from_single_file(downloaded_path, torch_dtype=torch_dtype) vae.to('cuda') pipe = StableDiffusionXLPipeline.from_pretrained("John6666/nova-anime-xl-il-v120-sdxl",torch_dtype=torch_dtype,vae=vae) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 accelerator = accelerate.Accelerator() def generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps): """Helper function to generate image with specific number of steps""" scheduler = CustomedUniPCMultistepScheduler.from_config(pipe.scheduler.config , solver_order = 2 if num_inference_steps==8 else 1 ,denoise_to_zero = False , use_afs=True) pipe.scheduler = scheduler pipe.to('cuda') with torch.no_grad(): with precision_scope("cuda"): prompts = [prompt] latents = torch.randn( (1, pipe.unet.config.in_channels, height // 8, width // 8), device=device, ) latents = latents * pipe.scheduler.init_noise_sigma pipe.scheduler.set_timesteps(num_inference_steps) idx = 0 register_free_upblock2d(pipe, b1=1.0, b2=1.0, s1=1.0, s2=1.0) register_free_crossattn_upblock2d(pipe, b1=1.0, b2=1.0, s1=1.0, s2=1.0) for t in tqdm(pipe.scheduler.timesteps): # Still not enough. I will tell you, what is the best implementation. Although not via the following code. # if idx == len(pipe.scheduler.timesteps) - 1: # break if idx == -1:#(6 if num_inference_steps == 8 else 4): register_free_upblock2d(pipe, b1=1.2, b2=1.2, s1=0.9, s2=0.9) register_free_crossattn_upblock2d(pipe, b1=1.2, b2=1.2, s1=0.9, s2=0.9) latent_model_input = torch.cat([latents] * 2) latent_model_input = pipe.scheduler.scale_model_input(latent_model_input , timestep=t) negative_prompts = '(worst quality:2), (low quality:2), (normal quality:2), bad anatomy, bad proportions, poorly drawn face, poorly drawn hands, missing fingers, extra limbs, blurry, pixelated, distorted, lowres, jpeg artifacts, watermark, signature, text, (deformed:1.5), (bad hands:1.3), overexposed, underexposed, censored, mutated, extra fingers, cloned face, bad eyes' negative_prompts = 1 * [negative_prompts] use_afs = num_inference_steps < 7 use_free_predictor = False prompt_embeds, cond_kwargs = prepare_sdxl_pipeline_step_parameter(pipe , prompts , need_cfg=True , device=pipe.device , negative_prompt=negative_prompts , W=width , H=height) if idx == 0 and use_afs: noise_pred = latent_model_input * 0.975 elif idx == len(pipe.scheduler.timesteps) - 1 and use_free_predictor: noise_pred = None else: noise_pred = pipe.unet(latent_model_input , t , encoder_hidden_states=prompt_embeds.to(device=latents.device, dtype=latents.dtype) , added_cond_kwargs=cond_kwargs).sample if noise_pred is not None: uncond, cond = noise_pred.chunk(2) noise_pred = uncond + (cond - uncond) * guidance_scale latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample idx += 1 x_samples_ddim = pipe.vae.decode(latents / pipe.vae.config.scaling_factor).sample x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) if True: for x_sample in x_samples_ddim: # x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') img = Image.fromarray(x_sample.astype(np.uint8)) return img @spaces.GPU #[uncomment to use ZeroGPU] def infer( prompt, negative_prompt, seed, randomize_seed, resolution, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) # Parse resolution string into width and height width, height = map(int, resolution.split('x')) # Generate image with selected steps image_quick = generate_image_with_steps(prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, final_sigmas_type="sigma_min") # Generate image with 50 steps for high quality negative_prompts = '(worst quality:2), (low quality:2), (normal quality:2), bad anatomy, bad proportions, poorly drawn face, poorly drawn hands, missing fingers, extra limbs, blurry, pixelated, distorted, lowres, jpeg artifacts, watermark, signature, text, (deformed:1.5), (bad hands:1.3), overexposed, underexposed, censored, mutated, extra fingers, cloned face, bad eyes' negative_prompts = 1 * [negative_prompts] image_50_steps = pipe(prompt=[prompt] ,negative_prompt=negative_prompts ,num_inference_steps=20 ,guidance_scale=guidance_scale ,height=height ,width=width).images[0] return image_quick, image_50_steps, seed examples = [ "Astronaut in a jungle, cold color, muted colors, detailed, 8k", "a painting of a virus monster playing guitar", "a painting of a squirrel eating a burger", ] css = """ #col-container { margin: 0 auto; max-width: 640px; } """ with gr.Blocks() as demo: gr.HTML(f"") with gr.Column(elem_id="col-container"): gr.Markdown(" # Hyperparameters are all you need") with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0, variant="primary") with gr.Row(): with gr.Column(): gr.Markdown("### Our fast inference Result using afs and uni-predictor to get 2 free steps") result = gr.Image(label="Quick Result", show_label=False) with gr.Column(): gr.Markdown("### Original 20 steps Result") result_20_steps = gr.Image(label="20 Steps Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", visible=False, ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) resolution = gr.Dropdown( choices=[ "1024x1024", "1216x832", "832x1216" ], value="1024x1024", label="Resolution", ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=6.0, step=0.1, value=5.5, # Replace with defaults that work for your model ) num_inference_steps = gr.Dropdown( choices=[5, 6, 7, 8], value=8, label="Number of inference steps", ) gr.Examples(examples=examples, inputs=[prompt]) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ prompt, negative_prompt, seed, randomize_seed, resolution, guidance_scale, num_inference_steps, ], outputs=[result, result_20_steps, seed], ) if __name__ == "__main__": demo.launch()