File size: 4,809 Bytes
618f472 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import os
from pathlib import Path
from typing import Any, Dict, Tuple
import torch
from shared.utils import files_locator as fl
class family_handler:
@staticmethod
def query_supported_types():
return ["ovi"]
@staticmethod
def query_family_maps() -> Tuple[Dict[str, str], Dict[str, list]]:
return {}, {}
@staticmethod
def query_model_family():
return "wan"
@staticmethod
def query_family_infos():
return {}
@staticmethod
def get_wan_text_encoder_filename(text_encoder_quantization):
text_encoder_filename = "umt5-xxl/models_t5_umt5-xxl-enc-bf16.safetensors"
if text_encoder_quantization =="int8":
text_encoder_filename = text_encoder_filename.replace("bf16", "quanto_int8")
return fl.locate_file(text_encoder_filename, True)
@staticmethod
def query_model_def(base_model_type: str, model_def: Dict[str, Any]):
cfg = {
"wan_5B_class": True,
"profiles_dir": ["wan_2_2_ovi"],
"group": "wan2_2",
"fps": 24,
"frames_minimum": 121,
"frames_steps": 120,
"sliding_window": False,
"multiple_submodels": False,
"guidance_max_phases": 1,
"skip_layer_guidance": True,
"returns_audio": True,
"sample_solvers": [
("unipc", "unipc"),
("dpm++", "dpm++"),
("euler", "euler"),
],
"audio_guidance": True,
"image_prompt_types_allowed" : "TSVL",
"sliding_window": True,
"sliding_window_size_locked": True,
"sliding_window_defaults" : { "overlap_min" : 1, "overlap_max" : 1, "overlap_step": 0, "overlap_default": 1},
"compile": ["transformer", "transformer2"]
}
cfg.update(model_def)
return cfg
@staticmethod
def query_model_files(computeList, base_model_type, model_filename, text_encoder_quantization):
from .wan_handler import family_handler
download_def = family_handler.query_model_files(computeList, "ti2v_2_2", model_filename, text_encoder_quantization)
if not isinstance(download_def, list):
download_def = [download_def]
download_def += [{
"repoId" : "DeepBeepMeep/Wan2.1",
"sourceFolderList" : ["mmaudio", ],
"fileList" : [ [ "v1-16.pth", "best_netG.pt"]]
}]
return download_def
@staticmethod
def load_model(
model_filename,
model_type,
base_model_type,
model_def,
quantizeTransformer=False,
text_encoder_quantization=None,
dtype=torch.bfloat16,
VAE_dtype=torch.float32,
mixed_precision_transformer=False,
save_quantized=False,
submodel_no_list=None,
override_text_encoder=None,
):
from .ovi_fusion_engine import OviFusionEngine
checkpoint_dir = "ckpts"
ovi_model = OviFusionEngine(
config=None,
checkpoint_dir=checkpoint_dir,
model_def=model_def,
model_filename = model_filename,
text_encoder_filename = family_handler.get_wan_text_encoder_filename(text_encoder_quantization),
dtype=dtype,
)
pipe = {
"transformer": ovi_model.model.video_model,
"transformer2": ovi_model.model.audio_model,
"text_encoder": ovi_model.text_encoder.model,
"vae": ovi_model.vae.model,
"vae2": ovi_model.audio_vae,
}
cotenants_map = {
"transformer": ["transformer2"],
"transformer2": ["transformer"],
}
dict = { "pipe": pipe, "coTenantsMap": cotenants_map}
return ovi_model, dict
@staticmethod
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
pass
@staticmethod
def update_default_settings(base_model_type, model_def, ui_defaults):
ui_defaults.update({ "sample_solver": "unipc",
"flow_shift": 5.0,
"guidance_scale": 4.0,
"audio_guidance_scale": 3.0,
"num_inference_steps": 50,
"slg_switch": 1,
"sliding_window_size": 121,
"video_length": 121,
"slg_layers" : [11]
})
@staticmethod
def get_vae_block_size(base_model_type):
return 32
@staticmethod
def get_rgb_factors(base_model_type):
from shared.RGB_factors import get_rgb_factors
return get_rgb_factors("wan", "ti2v_2_2")
|