Upload folder using huggingface_hub

b386992 verified 7 months ago

12.3 kB

	# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import itertools
	import time
	from typing import Dict, List

	import torch
	import torch.profiler
	from diffusers import AutoencoderKL
	from torch import nn


	class VAEGenerator:
	"""
	A class for generating and searching different Variational Autoencoder (VAE) configurations.

	This class provides functionality to generate various VAE architecture configurations
	given a specific input resolution and compression ratio. It allows searching through a
	design space to find configurations that match given parameter and memory budgets.
	"""

	def __init__(self, input_resolution: int = 1024, compression_ratio: int = 16) -> None:
	if input_resolution == 1024:
	assert compression_ratio in [8, 16]
	elif input_resolution == 2048:
	assert compression_ratio in [8, 16, 32]
	else:
	raise NotImplementedError("Higher resolution than 2028 is not implemented yet!")

	self._input_resolution = input_resolution
	self._compression_ratio = compression_ratio

	def _generate_input(self):
	"""
	Generate a random input tensor with the specified input resolution.

	The tensor is placed on the GPU in half-precision (float16).
	"""
	random_tensor = torch.rand(1, 3, self.input_resolution, self.input_resolution)
	random_tensor = random_tensor.to(dtype=torch.float16, device="cuda")
	return random_tensor

	def _count_parameters(self, model: nn.Module = None):
	"""
	Count the number of trainable parameters in a given model.

	Args:
	model (nn.Module): The model for which to count parameters.

	Returns:
	int: The number of trainable parameters.
	"""
	assert model is not None, "Please provide a nn.Module to count the parameters."
	return sum(p.numel() for p in model.parameters() if p.requires_grad)

	def _load_base_json_skeleton(self):
	"""
	Load a base configuration skeleton for the VAE.

	Returns:
	dict: A dictionary representing the base configuration JSON skeleton.
	"""
	skeleton = {
	"_class_name": "AutoencoderKL",
	"_diffusers_version": "0.20.0.dev0",
	"_name_or_path": "../sdxl-vae/",
	"act_fn": "silu",
	"block_out_channels": [],
	"down_block_types": [],
	"force_upcast": False,
	"in_channels": 3,
	"latent_channels": -1, # 16
	"layers_per_block": -1, # 2
	"norm_num_groups": 32,
	"out_channels": 3,
	"sample_size": 1024, # resolution size
	"scaling_factor": 0.13025,
	"up_block_types": [],
	}
	return skeleton

	def _generate_all_combinations(self, attr):
	"""
	Generates all possible combinations from a search space dictionary.

	Args:
	attr (dict): A dictionary where each key has a list of possible values.

	Returns:
	List[Dict]: A list of dictionaries, each representing a unique combination of attributes.
	"""
	keys = list(attr.keys())
	choices = [attr[key] for key in keys]
	all_combinations = list(itertools.product(*choices))

	combination_dicts = []
	for combination in all_combinations:
	combination_dict = {key: value for key, value in zip(keys, combination)}
	combination_dicts.append(combination_dict)

	return combination_dicts

	def _assign_attributes(self, choice):
	"""
	Assign a chosen set of attributes to the base VAE configuration skeleton.

	Args:
	choice (dict): A dictionary of attributes to assign to the skeleton.

	Returns:
	dict: A dictionary representing the updated VAE configuration.
	"""
	search_space_skleton = self._load_base_json_skeleton()
	search_space_skleton["down_block_types"] = choice["down_block_types"]
	search_space_skleton["up_block_types"] = choice["up_block_types"]
	search_space_skleton["block_out_channels"] = choice["block_out_channels"]
	search_space_skleton["layers_per_block"] = choice["layers_per_block"]
	search_space_skleton["latent_channels"] = choice["latent_channels"]
	return search_space_skleton

	def _search_space_16x1024(self):
	"""
	Define the search space for a 16x compression ratio at 1024 resolution.

	Returns:
	dict: A dictionary defining lists of possible attribute values.
	"""
	attr = {}
	attr["down_block_types"] = [["DownEncoderBlock2D"] * 5]
	attr["up_block_types"] = [["UpDecoderBlock2D"] * 5]
	attr["block_out_channels"] = [
	[128, 256, 512, 512, 512],
	[128, 256, 512, 512, 1024],
	[128, 256, 512, 1024, 2048],
	[64, 128, 256, 512, 512],
	]
	attr["layers_per_block"] = [1, 2, 3]
	attr["latent_channels"] = [4, 16, 32, 64]
	return attr

	def _search_space_8x1024(self):
	"""
	Define the search space for an 8x compression ratio at 1024 resolution.

	Returns:
	dict: A dictionary defining lists of possible attribute values.
	"""
	attr = {}
	attr["down_block_types"] = [["DownEncoderBlock2D"] * 4]
	attr["up_block_types"] = [["UpDecoderBlock2D"] * 4]
	attr["block_out_channels"] = [[128, 256, 512, 512], [128, 256, 512, 1024], [64, 128, 256, 512]]
	attr["layers_per_block"] = [1, 2, 3]
	attr["latent_channels"] = [4, 16, 32, 64]
	return attr

	def _sort_data_in_place(self, data: List[Dict], mode: str) -> None:
	"""
	Sort the list of design configurations in place based on a chosen mode.

	Args:
	data (List[Dict]): A list of dictionaries representing design configurations.
	mode (str): The sorting criterion. Can be 'abs_param_diff', 'abs_cuda_mem_diff', or 'mse'.
	"""
	if mode == 'abs_param_diff':
	data.sort(key=lambda x: abs(x['param_diff']))
	elif mode == 'abs_cuda_mem_diff':
	data.sort(key=lambda x: abs(x['cuda_mem_diff']))
	elif mode == 'mse':
	data.sort(key=lambda x: (x['param_diff'] 2 + x['cuda_mem_diff'] 2) / 2)
	else:
	raise ValueError("Invalid mode. Choose from 'abs_param_diff', 'abs_cuda_mem_diff', 'mse'.")

	def _print_table(self, data, headers, col_widths):
	"""
	Print a formatted table of the design choices.

	Args:
	data (List[Dict]): The data to print, each entry a design configuration.
	headers (List[str]): Column headers.
	col_widths (List[int]): Widths for each column.
	"""
	# Create header row
	header_row = ""
	for header, width in zip(headers, col_widths):
	header_row += f"{header:<{width}}"
	print(header_row)
	print("-" * sum(col_widths))

	# Print each data row
	for item in data:
	row = f"{item['param_diff']:<{col_widths[0]}}"
	row += f"{item['cuda_mem_diff']:<{col_widths[1]}}"
	print(row)

	def search_for_target_vae(self, parameters_budget=0, cuda_max_mem=0):
	"""
	Search through available VAE design choices to find one that best matches
	the given parameter and memory budgets.

	Args:
	parameters_budget (float, optional): The target number of parameters (in millions).
	cuda_max_mem (float, optional): The target maximum GPU memory usage (in MB).

	Returns:
	AutoencoderKL: The chosen VAE configuration that best matches the provided budgets.
	"""
	if parameters_budget <= 0 and cuda_max_mem <= 0:
	raise ValueError("Please specify a valid parameter budget or cuda max memory budget")

	search_space_choices = []
	if self.input_resolution == 1024 and self.compression_ratio == 8:
	search_space = self._search_space_8x1024()
	search_space_choices = self._generate_all_combinations(search_space)
	elif self.input_resolution == 1024 and self.compression_ratio == 16:
	search_space = self._search_space_16x1024()
	search_space_choices = self._generate_all_combinations(search_space)

	inp_tensor = self._generate_input()
	inp_tensor = inp_tensor.to(dtype=torch.float16, device="cuda")
	design_choices = []

	for choice in search_space_choices:
	parameters_budget_diff = 0
	cuda_max_mem_diff = 0

	curt_design_json = self._assign_attributes(choice)
	print("-" * 20)
	print(choice)
	vae = AutoencoderKL.from_config(curt_design_json)
	vae = vae.to(dtype=torch.float16, device="cuda")
	total_params = self._count_parameters(vae)
	total_params /= 10**6
	# Reset peak memory statistics
	torch.cuda.reset_peak_memory_stats()
	torch.cuda.synchronize()

	with torch.profiler.profile(
	activities=[
	torch.profiler.ProfilerActivity.CPU,
	torch.profiler.ProfilerActivity.CUDA,
	],
	profile_memory=True, # Enables memory profiling
	record_shapes=True, # Records tensor shapes
	with_stack=True, # Records stack traces
	) as prof:
	# Perform forward pass
	start_time = time.perf_counter()
	with torch.no_grad():
	_ = vae.encode(inp_tensor).latent_dist.sample()
	torch.cuda.synchronize()
	end_time = time.perf_counter()

	total_execution_time_ms = (end_time - start_time) * 1000

	# Get maximum memory allocated
	max_memory_allocated = torch.cuda.max_memory_allocated()
	max_memory_allocated = max_memory_allocated / (1024**2)

	parameters_budget_diff = parameters_budget - total_params
	cuda_max_mem_diff = cuda_max_mem - max_memory_allocated
	design_choices.append(
	{"param_diff": parameters_budget_diff, "cuda_mem_diff": cuda_max_mem_diff, "design": curt_design_json}
	)

	print(f" Total params: {total_params}")
	print(f" Max GPU Memory Usage: {max_memory_allocated} MB")
	print(f" Total Execution Time: {total_execution_time_ms:.2f} ms")

	print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

	print("-" * 20)
	sort_mode = "abs_param_diff"
	if parameters_budget == 0:
	sort_mode = "abs_cuda_mem_diff"
	elif cuda_max_mem == 0:
	sort_mode = "abs_param_diff"
	else:
	sort_mode = "mse"

	print("#" * 20)
	self._sort_data_in_place(design_choices, sort_mode)
	headers = ["param_diff (M)", "cuda_mem_diff (MB)"]
	col_widths = [12, 15]
	self._print_table(design_choices, headers, col_widths)

	vae = AutoencoderKL.from_config(design_choices[0]["design"])
	return vae

	@property
	def input_resolution(self) -> int:
	"""
	Get the input resolution for the VAE.

	Returns:
	int: The input resolution.
	"""
	return self._input_resolution

	@property
	def compression_ratio(self) -> float:
	"""
	Get the compression ratio for the VAE.

	Returns:
	float: The compression ratio.
	"""
	return self._compression_ratio