ltx-i2v / ComfyUI /comfy /supported_models_base.py

Upload folder using huggingface_hub

34cae42 verified 11 months ago

4.3 kB

	"""
	This file is part of ComfyUI.
	Copyright (C) 2024 Comfy

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <https://www.gnu.org/licenses/>.
	"""

	import torch
	from . import model_base
	from . import utils
	from . import latent_formats

	class ClipTarget:
	def __init__(self, tokenizer, clip):
	self.clip = clip
	self.tokenizer = tokenizer
	self.params = {}

	class BASE:
	unet_config = {}
	unet_extra_config = {
	"num_heads": -1,
	"num_head_channels": 64,
	}

	required_keys = {}

	clip_prefix = []
	clip_vision_prefix = None
	noise_aug_config = None
	sampling_settings = {}
	latent_format = latent_formats.LatentFormat
	vae_key_prefix = ["first_stage_model."]
	text_encoder_key_prefix = ["cond_stage_model."]
	supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]

	memory_usage_factor = 2.0

	manual_cast_dtype = None
	custom_operations = None
	scaled_fp8 = None
	optimizations = {"fp8": False}

	@classmethod
	def matches(s, unet_config, state_dict=None):
	for k in s.unet_config:
	if k not in unet_config or s.unet_config[k] != unet_config[k]:
	return False
	if state_dict is not None:
	for k in s.required_keys:
	if k not in state_dict:
	return False
	return True

	def model_type(self, state_dict, prefix=""):
	return model_base.ModelType.EPS

	def inpaint_model(self):
	return self.unet_config["in_channels"] > 4

	def __init__(self, unet_config):
	self.unet_config = unet_config.copy()
	self.sampling_settings = self.sampling_settings.copy()
	self.latent_format = self.latent_format()
	self.optimizations = self.optimizations.copy()
	for x in self.unet_extra_config:
	self.unet_config[x] = self.unet_extra_config[x]

	def get_model(self, state_dict, prefix="", device=None):
	if self.noise_aug_config is not None:
	out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device)
	else:
	out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device)
	if self.inpaint_model():
	out.set_inpaint()
	return out

	def process_clip_state_dict(self, state_dict):
	state_dict = utils.state_dict_prefix_replace(state_dict, {k: "" for k in self.text_encoder_key_prefix}, filter_keys=True)
	return state_dict

	def process_unet_state_dict(self, state_dict):
	return state_dict

	def process_vae_state_dict(self, state_dict):
	return state_dict

	def process_clip_state_dict_for_saving(self, state_dict):
	replace_prefix = {"": self.text_encoder_key_prefix[0]}
	return utils.state_dict_prefix_replace(state_dict, replace_prefix)

	def process_clip_vision_state_dict_for_saving(self, state_dict):
	replace_prefix = {}
	if self.clip_vision_prefix is not None:
	replace_prefix[""] = self.clip_vision_prefix
	return utils.state_dict_prefix_replace(state_dict, replace_prefix)

	def process_unet_state_dict_for_saving(self, state_dict):
	replace_prefix = {"": "model.diffusion_model."}
	return utils.state_dict_prefix_replace(state_dict, replace_prefix)

	def process_vae_state_dict_for_saving(self, state_dict):
	replace_prefix = {"": self.vae_key_prefix[0]}
	return utils.state_dict_prefix_replace(state_dict, replace_prefix)

	def set_inference_dtype(self, dtype, manual_cast_dtype):
	self.unet_config['dtype'] = dtype
	self.manual_cast_dtype = manual_cast_dtype