uncond-sd2-base-complex-4 / vae /laps.model.med_vae_wrapper.py

Rename vae/pips.model.med_vae_wrapper.py to vae/laps.model.med_vae_wrapper.py

5c18113 verified 22 days ago

2.61 kB

	"""
	Wrapper to match VAE interface to that of SD VAE.
	"""

	from types import SimpleNamespace

	import numpy as np
	import torch
	from diffusers.configuration_utils import ConfigMixin, register_to_config
	from diffusers.models.modeling_utils import ModelMixin
	from medvae.models import AutoencoderKL_2D
	from medvae.utils.factory import (
	FILE_DICT_ASSOCIATIONS,
	create_model,
	download_model_weights,
	)
	from omegaconf import OmegaConf


	class LatentDist:
	def __init__(self, dist):
	self.latent_dist = dist

	def sample(self):
	return self.latent_dist.mode()

	# alias
	def mode(self):
	return self.latent_dist.mode()


	class MedVAEWrapper(ModelMixin, ConfigMixin):
	config_name = "config.json"
	ignore_for_config = ["vae"]

	@register_to_config
	def __init__(self, vae=None, scaling_factor=1.0, downsampling_factor=4):
	super().__init__()
	assert downsampling_factor in [
	4,
	8,
	], "Only 4x and 8x downsampling are currently supported"
	if vae is None:
	model_name = (
	"medvae_4_4_2d_c" if downsampling_factor == 4 else "medvae_8_4_2d_c"
	)
	config_fpath = download_model_weights(
	FILE_DICT_ASSOCIATIONS[model_name]["config"]
	)
	if model_name == "medvae_8_4_2d_c":
	config_fpath = "/data/yurman/repos/fast-mri-ldm/submodules/medvae/configs/ours-8x1-new.yaml"

	conf = OmegaConf.load(config_fpath)
	conf.embed_dim = 4
	conf.ddconfig.z_channels = 4
	conf["ddconfig"]["in_channels"] = 2
	conf["ddconfig"]["out_ch"] = 2

	vae = AutoencoderKL_2D(
	ddconfig=conf.ddconfig,
	embed_dim=conf.embed_dim,
	)

	self.vae = vae
	# When using SD pipeline it uses `block_out_channels` to determine the size of the image based on
	# 2 ** (len(block_out_channels) - 1)
	n_blocks = int(np.log2(downsampling_factor)) + 1
	self.register_to_config(
	block_out_channels=[
	1,
	]
	* n_blocks,
	in_channels=2,
	scaling_factor=scaling_factor,
	downsampling_factor=downsampling_factor,
	)

	def encode(self, x):
	dist = self.vae.encode(x)

	return SimpleNamespace(latent_dist=LatentDist(dist))

	def decode(self, x, return_dict=False, generator=None):
	with torch.amp.autocast(device_type="cuda", enabled=False):
	x = self.vae.decode(x)
	return (x,)