| import os |
| import shutil |
| import sys |
| import torch |
| from shared.utils import files_locator as fl |
| from shared.utils.hf import build_hf_url |
| from shared.utils.loras_mutipliers import parse_loras_multipliers |
| import gradio as gr |
| from pathlib import Path |
|
|
| from .infos import LTX2_INFOS |
| from .lora_utils import control_video_phase2_message |
|
|
| _GEMMA_FOLDER_URL = "https://huggingface.co/DeepBeepMeep/LTX-2/resolve/main/gemma-3-12b-it-qat-q4_0-unquantized/" |
| _GEMMA_FOLDER = "gemma-3-12b-it-qat-q4_0-unquantized" |
| _GEMMA_FILENAME = f"{_GEMMA_FOLDER}.safetensors" |
| _GEMMA_QUANTO_FILENAME = f"{_GEMMA_FOLDER}_quanto_bf16_int8.safetensors" |
| _LORAS_MIGRATED = False |
| _LORA_SPEC_KEYS = ("distilled_lora", "distilled_1_1_lora", "union_control_lora", "id_lora", "outpaint_lora", "hdr_lora") |
| _SYSTEM_LORA_SPEC_KEYS = { |
| "distilled": "distilled_lora", |
| "distilled_1_1": "distilled_1_1_lora", |
| "union_control": "union_control_lora", |
| "id": "id_lora", |
| "outpaint": "outpaint_lora", |
| "hdr": "hdr_lora", |
| } |
| _EDITANYTHING_MODEL_DEF = { |
| "ltx2_edit_anything": True, |
| "ltx2_edit_anything_ref": True, |
| "ltx2_edit_anything_ref_start_block": 12, |
| "ltx2_edit_anything_ref_end_block": 35, |
| "ltx2_edit_anything_ref_context_scale": 0.01, |
| "ltx2_edit_anything_ref_token_scale": 0.25, |
| "ltx2_edit_anything_adaln_scale": 2.0, |
| } |
|
|
| _ARCH_SPECS = { |
| "ltx2_19B": { |
| "repo_id": "DeepBeepMeep/LTX-2", |
| "config_file": "ltx2_19b_config.json", |
| "spatial_upscaler": "ltx-2-spatial-upscaler-x2-1.0.safetensors", |
| "temporal_upscaler": "ltx-2-temporal-upscaler-x2-1.0.safetensors", |
| "distilled_lora": "ltx-2-19b-distilled-lora-384.safetensors", |
| "union_control_lora": "ltx-2-19b-ic-lora-union-control-ref0.5.safetensors", |
| "id_lora": "id-lora-celebvhq-ltx2.safetensors", |
| "video_vae": "ltx-2-19b_vae.safetensors", |
| "audio_vae": "ltx-2-19b_audio_vae.safetensors", |
| "vocoder": "ltx-2-19b_vocoder.safetensors", |
| "text_embedding_projection": "ltx-2-19b_text_embedding_projection.safetensors", |
| "dev_embeddings_connector": "ltx-2-19b-dev_embeddings_connector.safetensors", |
| "distilled_embeddings_connector": "ltx-2-19b-distilled_embeddings_connector.safetensors", |
| "profiles_dir": "ltx2", |
| "dev_profiles_dir": "ltx2_dev_accelerators", |
| "preset_profiles_dir": "ltx2_presets", |
| "distilled_preset_profiles_dir": "ltx2_distilled_presets", |
| "lora_dir": "ltx2", |
| }, |
| "ltx2_22B": { |
| "repo_id": "DeepBeepMeep/LTX-2", |
| "config_file": "ltx2_22b_config.json", |
| "spatial_upscaler": "ltx-2.3-spatial-upscaler-x2-1.1.safetensors", |
| "temporal_upscaler": "ltx-2.3-temporal-upscaler-x2-1.0.safetensors", |
| "distilled_lora": "ltx-2.3-22b-distilled-lora-384.safetensors", |
| "distilled_1_1_lora": "ltx-2.3-22b-distilled-lora-384-1.1.safetensors", |
| "union_control_lora": "ltx-2.3-22b-ic-lora-union-control-ref0.5.safetensors", |
| "id_lora": "id-lora-celebvhq-ltx2.3.safetensors", |
| "outpaint_lora": "ltx-2.3-22b-ic-lora-outpaint.safetensors", |
| "hdr_lora": "ltx-2.3-22b-ic-lora-hdr-0.9.safetensors", |
| "hdr_scene_embeddings": "ltx-2.3-22b-ic-lora-hdr-scene-emb.safetensors", |
| "video_vae": "ltx-2.3-22b_vae.safetensors", |
| "audio_vae": "ltx-2.3-22b_audio_vae.safetensors", |
| "vocoder": "ltx-2.3-22b_vocoder.safetensors", |
| "text_embedding_projection": "ltx-2.3-22b_text_embedding_projection.safetensors", |
| "embeddings_connector": "ltx-2.3-22b_embeddings_connector.safetensors", |
| "profiles_dir": "ltx2", |
| "dev_profiles_dir": "ltx2_dev_accelerators", |
| "preset_profiles_dir": "ltx2_presets", |
| "distilled_preset_profiles_dir": "ltx2_distilled_presets", |
| "lora_dir": "ltx2", |
| }, |
| } |
| LTX2_22B_CLASS = {"ltx2_22B", "ltx2_22B_edit_anything"} |
| for model_type in LTX2_22B_CLASS: |
| if model_type!= "ltx2_22B": |
| _ARCH_SPECS[model_type]=_ARCH_SPECS["ltx2_22B"] |
|
|
| def _get_arch_spec(base_model_type: str | None) -> dict: |
| return _ARCH_SPECS.get(base_model_type or "", _ARCH_SPECS["ltx2_19B"]) |
|
|
|
|
| def _get_system_lora_urls(spec: dict) -> dict: |
| return { |
| f"ltx2_lora_{name}": build_hf_url(spec["repo_id"], spec[spec_key]) |
| for name, spec_key in _SYSTEM_LORA_SPEC_KEYS.items() |
| if spec.get(spec_key) |
| } |
|
|
|
|
| def _default_perturbation_layers(base_model_type: str | None) -> list[int]: |
| return [28] if base_model_type in LTX2_22B_CLASS else [29] |
|
|
|
|
| def _default_dev_settings(base_model_type: str | None) -> dict: |
| if base_model_type in LTX2_22B_CLASS: |
| return { |
| "num_inference_steps": 8, |
| "video_length": 121, |
| "resolution": "1280x720", |
| "sample_solver": "distilled_8_steps", |
| "guidance_scale": 1.0, |
| "audio_guidance_scale": 1.0, |
| "alt_guidance_scale": 1.0, |
| "alt_scale": 0.0, |
| "perturbation_switch": 0, |
| "perturbation_layers": _default_perturbation_layers(base_model_type), |
| "perturbation_start_perc": 0, |
| "perturbation_end_perc": 100, |
| "apg_switch": 0, |
| "cfg_star_switch": 0, |
| "self_refiner_setting": 0, |
| "guidance_phases": 2, |
| } |
| return { |
| "num_inference_steps": 40, |
| "guidance_scale": 3.0, |
| |
| |
| |
| |
| "perturbation_layers": _default_perturbation_layers(base_model_type), |
| "perturbation_start_perc": 0, |
| "perturbation_end_perc": 100, |
| "apg_switch": 0, |
| "cfg_star_switch": 0, |
| "guidance_phases": 2, |
| } |
|
|
|
|
| def _is_editanything_model(model_def) -> bool: |
| return model_def.get("ltx2_edit_anything", False) or model_def.get("architecture","")=="ltx2_22B_edit_anything" |
|
|
|
|
| def _is_distilled_model(model_def) -> bool: |
| return model_def.get("ltx2_pipeline", "") == "distilled" |
|
|
|
|
| def _get_embeddings_connector_filename(model_def, base_model_type): |
| spec = _get_arch_spec(base_model_type) |
| shared_connector = spec.get("embeddings_connector") |
| if shared_connector: |
| return shared_connector |
| pipeline_kind = (model_def or {}).get("ltx2_pipeline", "two_stage") |
| if pipeline_kind == "distilled": |
| return spec["distilled_embeddings_connector"] |
| return spec["dev_embeddings_connector"] |
|
|
|
|
| def _get_multi_file_names(model_def, base_model_type): |
| spec = _get_arch_spec(base_model_type) |
| return { |
| "video_vae": spec["video_vae"], |
| "audio_vae": spec["audio_vae"], |
| "vocoder": spec["vocoder"], |
| "text_embedding_projection": spec["text_embedding_projection"], |
| "text_embeddings_connector": _get_embeddings_connector_filename(model_def, base_model_type), |
| } |
|
|
|
|
| def _resolve_multi_file_paths(model_def, base_model_type): |
| spec = _get_arch_spec(base_model_type) |
| paths = {key: fl.locate_file(name) for key, name in _get_multi_file_names(model_def, base_model_type).items()} |
| paths["spatial_upsampler"] = fl.locate_file(spec["spatial_upscaler"]) |
| model_config = os.path.join(os.path.dirname(__file__), "configs", spec["config_file"]) |
| if not os.path.isfile(model_config): |
| raise FileNotFoundError(f"Missing LTX config file: {model_config}") |
| paths["model_config"] = model_config |
| return paths |
|
|
|
|
| def _migrate_loras(): |
| global _LORAS_MIGRATED |
| if _LORAS_MIGRATED: |
| return |
| wgp = sys.modules.get("wgp") |
| lora_root = wgp.get_lora_root() |
|
|
| lora_dir = Path(lora_root) / _ARCH_SPECS["ltx2_19B"]["lora_dir"] |
| lora_dir.mkdir(parents=True, exist_ok=True) |
|
|
| moved = set() |
| for spec in _ARCH_SPECS.values(): |
| for key in _LORA_SPEC_KEYS: |
| filename = spec.get(key, None) |
| if filename is None or filename in moved: |
| continue |
| source = fl.locate_file(filename, error_if_none=False) |
| if source is None: |
| continue |
| target = lora_dir / filename |
| if Path(source).resolve() == target.resolve() or target.exists(): |
| moved.add(filename) |
| continue |
| shutil.move(source, target) |
| print(f"[WAN2GP][LTX2] Moved {key} LoRA '{source}' -> '{target}'") |
| moved.add(filename) |
| |
| _LORAS_MIGRATED = True |
|
|
|
|
| def _notify_control_video_phase2(base_model_type, model_def, inputs, any_outpainting): |
| video_prompt_type = inputs.get("video_prompt_type", "") or "" |
| if int(inputs.get("guidance_phases", 1)) != 2 or "V" not in video_prompt_type or inputs.get("video_guide") is None: |
| return "" |
| wgp = sys.modules.get("wgp") |
| lora_dir = wgp.get_lora_dir(base_model_type) if wgp is not None and hasattr(wgp, "get_lora_dir") else None |
| selected = {os.path.basename(lora).lower() for lora in inputs.get("activated_loras", []) or []} |
| spec = _get_arch_spec(base_model_type) |
| builtins = [ |
| spec.get("hdr_lora") if base_model_type == "ltx2_22B" and "&" in video_prompt_type else None, |
| spec.get("union_control_lora") if any(letter in video_prompt_type for letter in "OPDE") else None, |
| spec.get("outpaint_lora") if base_model_type == "ltx2_22B" and any_outpainting else None, |
| ] |
| extra_loras = [os.path.join(lora_dir, name) if lora_dir else name for name in builtins if name and name.lower() not in selected] |
| extra_mults = [1.0] * len(extra_loras) |
| activated_loras = [os.path.join(lora_dir, os.path.basename(lora)) if lora_dir else lora for lora in inputs.get("activated_loras", []) or []] |
| steps, switch_phase = int(inputs.get("num_inference_steps", 1)), inputs.get("model_switch_phase", 1) |
| _, loras_slists, errors = parse_loras_multipliers(extra_mults, len(extra_loras), steps, nb_phases=2, model_switch_phase=switch_phase) |
| if not errors: |
| _, loras_slists, errors = parse_loras_multipliers(inputs.get("loras_multipliers", ""), len(activated_loras), steps, nb_phases=2, merge_slist=loras_slists, model_switch_phase=switch_phase) |
| if errors: |
| return f"Error parsing Loras: {errors}" |
| loras_selected = extra_loras + activated_loras |
| msg = control_video_phase2_message(loras_selected, loras_slists, force_phase2_control=_is_editanything_model(model_def), force_name="EditAnything") |
| print(msg) |
| gr.Info(msg) |
| return "" |
|
|
|
|
| class family_handler: |
| @staticmethod |
| def query_supported_types(): |
| _migrate_loras() |
| return ["ltx2_19B", "ltx2_22B", "ltx2_22B_edit_anything"] |
|
|
| @staticmethod |
| def query_family_maps(): |
|
|
| models_eqv_map = { |
| "ltx2_19B" : "ltx2_22B", |
| "ltx2_22B_edit_anything" : "ltx2_22B", |
| } |
|
|
| models_comp_map = { |
| "ltx2_19B" : [ "ltx2_22B", "ltx2_22B_edit_anything"], |
| } |
| return models_eqv_map, models_comp_map |
|
|
| @staticmethod |
| def query_model_family(): |
| return "ltx2" |
|
|
| @staticmethod |
| def query_family_infos(): |
| return {"ltx2": (40, "LTX-2")} |
|
|
| @staticmethod |
| def query_model_def(base_model_type, model_def): |
| preload_urls = model_def.get("preload_URLs") |
| spec = _get_arch_spec(base_model_type) |
| from .prompt_enhancer import LTX2_PROMPT_INFOS, LTX2_RELAYED_IMAGE_PROMPT, LTX2_RELAYED_PROMPT |
| if isinstance(preload_urls, list): |
| |
| lora_filenames = {spec[key] for key in _LORA_SPEC_KEYS if key in spec} |
| def add_lora_dir_suffix(entry): |
| if not isinstance(entry, str) or "|%lora_dir" in entry: |
| return entry |
| source_entry = entry.split("|", 1)[0] |
| if source_entry.startswith("http") and os.path.basename(source_entry) in lora_filenames: |
| return f"{source_entry}|%lora_dir" |
| return entry |
| model_def["preload_URLs"] = [add_lora_dir_suffix(entry) for entry in preload_urls] |
|
|
| editanything_ref = _is_editanything_model(model_def) |
| pipeline_kind = "distilled" if _is_distilled_model(model_def) else "two_stage" |
|
|
| distilled = pipeline_kind == "distilled" |
| audio_prompt_selection = ["", "A", "K", "2", "A1OF"] |
| if editanything_ref and not distilled: |
| audio_prompt_selection = ["", "A", "K"] |
| audio_prompt_labels = { |
| "": "Generate Video & Soundtrack based on Text Prompt", |
| "A": "Generate Video based on Soundtrack and Text Prompt", |
| "K": "Generate Video based on Control Video + its Audio Track and Text Prompt", |
| "2": "Generate Audio based on Control Video and Text Prompt", |
| "A1OF": "Generate Video based on Reference Voice (ID-LoRA) and Text Prompt", |
| } |
|
|
|
|
| extra_model_def = { |
| "ltx2_22B_class": base_model_type in LTX2_22B_CLASS, |
| "ltx2_edit_anything": editanything_ref, |
| "infos": model_def.get("infos", LTX2_INFOS), |
| "text_encoder_folder": _GEMMA_FOLDER, |
| "text_encoder_URLs": [ |
| build_hf_url("DeepBeepMeep/LTX-2", _GEMMA_FOLDER, _GEMMA_FILENAME), |
| build_hf_url("DeepBeepMeep/LTX-2", _GEMMA_FOLDER, _GEMMA_QUANTO_FILENAME), |
| ], |
| "dtype": "bf16", |
| "fps": 24, |
| "frames_minimum": 17, |
| "frames_steps": 8, |
| "sliding_window": True, |
| "image_prompt_types_allowed": "TSEVL", |
| "end_frames_always_enabled": True, |
| "returns_audio": True, |
| "any_audio_prompt": True, |
| "audio_prompt_choices": True, |
| "one_speaker_only": True, |
| "audio_guide_label": "Audio Prompt (Soundtrack, leave blank to to use a Null Audio)", |
| "audio_scale_name": "Prompt Audio Strength", |
| "audio_prompt_type_sources": { |
| "selection": audio_prompt_selection, |
| "labels": audio_prompt_labels, |
| "custom_flags": { |
| "1": "Reference Voice (ID-LoRA)", |
| "2": "Generate Audio based on Control Video and Text Prompt", |
| }, |
| "letters_filter": "A1OFK2", |
| "show_label": False, |
| "default": "K" if editanything_ref else "", |
| }, |
| "prompt_enhancer_button_label": "Write", |
| "prompt_infos": LTX2_PROMPT_INFOS, |
| "prompt_enhancer_def": { |
| "selection": ["T", "TI", "T1", "TI1"], |
| "labels": { |
| "T": "An Enhanced Prompt using existing Text Prompt", |
| "TI": "An Enhanced Prompt using existing Text Prompt and Start Image", |
| "T1": "An Enhanced Relayed Prompt using existing Text Prompt", |
| "TI1": "An Enhanced Relayed Prompt using existing Text Prompt and Start Image", |
| }, |
| "default": "", |
| }, |
| "text_prompt_enhancer_instructions1": LTX2_RELAYED_PROMPT, |
| "video_prompt_enhancer_instructions1": LTX2_RELAYED_IMAGE_PROMPT, |
| "image_prompt_enhancer_instructions1": LTX2_RELAYED_IMAGE_PROMPT, |
| "text_prompt_enhancer_max_tokens1": 1024, |
| "video_prompt_enhancer_max_tokens1": 1024, |
| "image_prompt_enhancer_max_tokens1": 1024, |
| "auto_null_audio": True, |
| "audio_guide_window_slicing": True, |
| "video_length_not_limited_by_audio": True, |
| "output_audio_is_input_audio": True, |
| "multimedia_generation": True, |
| "multiple_images_as_text_prompts": True, |
| "custom_denoising_strength": distilled, |
| "profiles_dir": [spec["profiles_dir"]] + ([] if distilled else [spec["dev_profiles_dir"]]), |
| "ltx2_spatial_upscaler_file": spec["spatial_upscaler"], |
| "ltx2_hdr_lora_file": spec.get("hdr_lora", ""), |
| "ltx2_hdr_scene_embeddings_file": spec.get("hdr_scene_embeddings", ""), |
| "self_refiner": True, |
| "self_refiner_max_plans": 2, |
| |
| "vae_block_size": 64, |
| "keep_frames_video_guide_not_supported": True, |
| "NAG": True, |
| } |
| extra_model_def.update(_get_system_lora_urls(spec)) |
| if distilled: |
| extra_model_def["ltx2_pipeline"] = "distilled" |
| else: |
| extra_model_def["finetune_custom_urls"] = [ "ltx2_lora_distilled"] |
|
|
| |
| if editanything_ref: |
| extra_model_def.update(_EDITANYTHING_MODEL_DEF) |
| |
| if base_model_type in ["ltx2_22B"]: |
| extra_model_def["video_guide_outpainting"] = [0,1] |
| extra_model_def["video_guide_outpainting_label"] = "Enable Spatial Outpainting on Control Video using Ic Lora Outpaint" |
| extra_model_def["guide_inpaint_color"] = 0 |
|
|
| extra_model_def["preset_profiles_dir"] = [spec.get("distilled_preset_profiles_dir") if distilled else spec.get("preset_profiles_dir")] |
| extra_model_def["extra_control_frames"] = 1 |
| extra_model_def["dont_cat_preguide"] = True |
| extra_model_def["input_video_strength"] = { |
| "label": "Start Image / Source Strength (lower values may create more motion)", |
| "name": "Start Image / Source Strength", |
| } |
| extra_model_def["denoising_strength"] = { |
| "label": "Control Video Strength (higher = closer to the Control Video)", |
| "name": "Control Video Strength", |
| } |
| extra_model_def["masking_strength"] = { |
| "label": "Masked Control Duration (higher = longer masked reinjection)", |
| "name": "Masked Control Duration", |
| } |
| |
| if base_model_type in ["ltx2_22B_edit_anything"]: |
| control_choices = [("EditAnything Source Video", "VGI")] |
| else: |
| control_choices = [("No Video Process", "")] |
| control_choices += [ ("Transfer Human Motion", "PVG"), ("Transfer Human Motion With Pose Alignment", "OVG") , ("Transfer Depth", "DVG") , ("Transfer Canny Edges", "EVG"), ("LTX2 Raw Format / Control Video for Ic Lora", "VG")] |
| |
| if base_model_type == "ltx2_22B": |
| control_choices += [("Convert SDR to HDR (IC-LoRA)", f"V&G")] |
| control_choices += [("Inject Frames", "KFI")] |
| extra_model_def["guide_custom_choices"] = { |
| "choices": control_choices, |
| "letters_filter": f"OPDEVG&KFI", |
| "default": "VGI" if editanything_ref else "", |
| "label": "Control Video / Frames Injection", |
| "visible": not editanything_ref, |
| } |
| extra_model_def["custom_frames_injection"] = True |
| extra_model_def["one_image_ref_only"] = True |
| if editanything_ref: |
| extra_model_def["one_image_ref_needed"] = True |
|
|
| if editanything_ref: |
| extra_model_def["mask_preprocessing"] = { |
| "selection": [""], "visible": False, |
| } |
| else: |
| extra_model_def["mask_preprocessing"] = { |
| "selection": ["", "A", "NA", "XA", "XNA"], |
| } |
| extra_model_def["sliding_window_defaults"] = { |
| "overlap_min": 1, |
| "overlap_max": 97, |
| "overlap_step": 8, |
| "overlap_default": 9, |
| "window_min": 5, |
| "window_max": 501, |
| "window_step": 4, |
| "window_default": 241, |
| } |
| if distilled: |
| extra_model_def.update( |
| { |
| "lock_inference_steps": True, |
| "no_negative_prompt": False, |
| } |
| ) |
| else: |
| extra_model_def.update( |
| { |
| "audio_guidance": True, |
| "adaptive_projected_guidance": True, |
| "cfg_star": True, |
| "perturbation": True, |
| "alt_guidance": "Modality Guidance", |
| "alt_scale": "Guidance Rescale", |
| "perturbation_choices": [ |
| ("Off", 0), |
| ("Skip Layer Guidance", 1), |
| ("Skip Self Attention", 2), |
| ], |
| "perturbation_layers_max": 48, |
| } |
| ) |
| if base_model_type in LTX2_22B_CLASS: |
| extra_model_def["sample_solvers"] = [("Distilled 8 Steps", "distilled_8_steps"), ("Euler", "euler"), ("HQ (res2s)", "res2s")] |
| extra_model_def["guidance_max_phases"] = 2 |
| extra_model_def["visible_phases"] = 0 if distilled else 1 |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| return extra_model_def |
|
|
| @staticmethod |
| def get_rgb_factors(base_model_type): |
| from shared.RGB_factors import get_rgb_factors |
|
|
| return get_rgb_factors("ltx2", base_model_type) |
|
|
| @staticmethod |
| def register_lora_cli_args(parser, lora_root): |
| parser.add_argument( |
| "--lora-dir-ltx2", |
| type=str, |
| default=None, |
| help=f"Path to a directory that contains LTX-2 LoRAs (default: {os.path.join(lora_root, 'ltx2')})", |
| ) |
| |
| |
| |
| |
| |
| |
|
|
| @staticmethod |
| def get_lora_dir(base_model_type, args, lora_root): |
| |
| |
| return getattr(args, "lora_dir_ltx2", None) or os.path.join(lora_root, "ltx2") |
|
|
| @staticmethod |
| def query_model_files(computeList, base_model_type, model_def=None): |
| spec = _get_arch_spec(base_model_type) |
| gemma_files = [ |
| "added_tokens.json", |
| "chat_template.json", |
| "config_light.json", |
| "generation_config.json", |
| "preprocessor_config.json", |
| "processor_config.json", |
| "special_tokens_map.json", |
| "tokenizer.json", |
| "tokenizer.model", |
| "tokenizer_config.json", |
| ] |
|
|
| file_list = [spec["spatial_upscaler"], spec["temporal_upscaler"]] |
| for name in _get_multi_file_names(model_def, base_model_type).values(): |
| if name not in file_list: |
| file_list.append(name) |
|
|
| download_def = [ |
| { |
| "repoId": spec["repo_id"], |
| "sourceFolderList": [""], |
| "fileList": [file_list], |
| }, |
| { |
| "repoId": "DeepBeepMeep/LTX-2", |
| "sourceFolderList": [_GEMMA_FOLDER], |
| "fileList": [gemma_files], |
| }, |
| ] |
| return download_def |
|
|
| def validate_generative_settings(base_model_type, model_def, inputs): |
| pipeline_kind = model_def.get("ltx2_pipeline", "two_stage") |
| if pipeline_kind == "distilled": |
| inputs.update( |
| { |
| "num_inference_steps": 8, |
| "guidance_scale": 1.0, |
| "audio_guidance_scale": 1.0, |
| "audio_cfg_scale": 1.0, |
| "alt_guidance_scale": 1.0, |
| "alt_scale": 0.0, |
| } |
| ) |
| if inputs.get("perturbation",0) == 2: |
| inputs["perturbation"] = 0 |
| else: |
| sample_solver = inputs.get("sample_solver", "euler" if base_model_type == "ltx2_22B" else "").lower() |
| if base_model_type in LTX2_22B_CLASS: |
| if sample_solver not in {"distilled_8_steps", "euler", "res2s"}: |
| return f"Unsupported LTX2 sampler '{sample_solver}'." |
| inputs["sample_solver"] = sample_solver |
| if sample_solver == "distilled_8_steps": |
| inputs["num_inference_steps"] = 8 |
| if sample_solver == "res2s": |
| if inputs.get("apg_switch", 0): |
| return "HQ sampler does not support APG yet." |
| if inputs.get("cfg_star_switch", 0): |
| return "HQ sampler does not support CFG Star yet." |
| if inputs.get("self_refiner_setting", 0): |
| return "HQ sampler does not support Self Refiner yet." |
| if inputs.get("perturbation_switch", 0) not in (0, 2): |
| return "HQ sampler supports only Off or Skip Self Attention guidance." |
| elif sample_solver not in {"", "euler"}: |
| return f"Sampler '{sample_solver}' is not supported for {base_model_type}." |
| video_guide_outpainting = inputs.get("video_guide_outpainting", None) |
| video_guide_outpainting_ratio = inputs.get("video_guide_outpainting_ratio", "") |
| video_prompt_type = inputs.get("video_prompt_type", "") or "" |
| audio_prompt_type = inputs.get("audio_prompt_type", "") or "" |
| from shared.utils.utils import get_outpainting_dims |
| any_outpainting = get_outpainting_dims(video_guide_outpainting, video_guide_outpainting_ratio) is not None |
| if "2" in audio_prompt_type: |
| if any(letter in audio_prompt_type for letter in "AK"): |
| return "LTX2 audio generation from Control Video must use the dedicated audio option, without an Audio Source or Control Video Audio Track prompt." |
| if "V" not in video_prompt_type or "G" not in video_prompt_type: |
| return "LTX2 audio generation from Control Video requires 'LTX2 Raw Format / Control Video for Ic Lora'." |
| if any(letter in video_prompt_type for letter in "OPDE&AFKI") or any_outpainting: |
| return "LTX2 audio generation from Control Video supports only raw Control Video, without Pose/Depth/Canny/HDR/Outpaint/Mask/Inject Frames." |
| if inputs.get("video_guide") is None: |
| return "You must provide a Control Video to generate audio from it." |
| if "&" in video_prompt_type: |
| if base_model_type != "ltx2_22B": |
| return "LTX2 HDR IC-LoRA is supported only with LTX-2.3 22B." |
| if any(letter in video_prompt_type for letter in "OPDE") or any_outpainting: |
| return "LTX2 HDR IC-LoRA is not compatible with Pose/Depth/Canny/Outpaint control modes." |
| if "F" in video_prompt_type: |
| return "LTX2 HDR IC-LoRA is not yet compatible with Inject Frames." |
| if any_outpainting: |
| if "V" in video_prompt_type : |
| if any(letter in video_prompt_type for letter in "OPDE"): |
| return "LTX2 outpainting on Control Video supports only LTX2 Raw Format / Contro Video for Ic Lora." |
| if "1" in audio_prompt_type: |
| return "LTX2 outpainting on Control Video is not compatible with the ID-LoRA option." |
| if "F" in video_prompt_type : |
| return "LTX2 outpainting is not yet compatible with Inject Frames." |
| if "A" in video_prompt_type : |
| return "LTX2 outpainting doesnt support Video Mask." |
|
|
| guide_phases = inputs.get("guidance_phases", 1) |
| if guide_phases !=1 and "V" in video_prompt_type and any_outpainting: |
| inputs["guidance_phases"]= 1 |
| gr.Info("Number of Phases has been set to 1 as Outpainting is enabled") |
| if "2" not in audio_prompt_type: |
| error = _notify_control_video_phase2(base_model_type, model_def, inputs, any_outpainting) |
| if error: |
| return error |
| if "A" in audio_prompt_type and inputs.get("audio_guide") is None: |
| audio_source = inputs.get("audio_source") |
| if audio_source is not None: |
| inputs["audio_guide"] = audio_source |
|
|
| @staticmethod |
| def load_model( |
| model_filename, |
| model_type, |
| base_model_type, |
| model_def, |
| quantizeTransformer=False, |
| text_encoder_quantization=None, |
| dtype=torch.bfloat16, |
| VAE_dtype=torch.float32, |
| mixed_precision_transformer=False, |
| save_quantized=False, |
| submodel_no_list=None, |
| text_encoder_filename=None, |
| **kwargs, |
| ): |
| from .ltx2 import LTX2, LTX2_ENABLE_EMBEDDING_LORAS |
|
|
| checkpoint_paths = _resolve_multi_file_paths(model_def, base_model_type) |
| transformer_modules = [] |
| if isinstance(model_filename, (list, tuple)): |
| submodel_no_list = submodel_no_list or [1] * len(model_filename) |
| transformer_path = [path for path, submodel_no in zip(model_filename, submodel_no_list) if submodel_no == 1] |
| transformer_modules = [path for path, submodel_no in zip(model_filename, submodel_no_list) if submodel_no == 0] |
| if len(transformer_path) == 1: |
| transformer_path = transformer_path[0] |
| else: |
| transformer_path = model_filename |
| checkpoint_paths["transformer"] = transformer_path |
| if transformer_modules: |
| checkpoint_paths["transformer_modules"] = transformer_modules |
|
|
| ltx2_model = LTX2( |
| model_filename=model_filename, |
| model_type=model_type, |
| base_model_type=base_model_type, |
| model_def=model_def, |
| dtype=dtype, |
| VAE_dtype=VAE_dtype, |
| text_encoder_filename=text_encoder_filename, |
| text_encoder_filepath = model_def.get("text_encoder_folder", os.path.dirname(text_encoder_filename)), |
| checkpoint_paths=checkpoint_paths, |
| ) |
|
|
| if save_quantized: |
| from wgp import save_quantized_model |
|
|
| quantized_source = transformer_path[0] if isinstance(transformer_path, (list, tuple)) else transformer_path |
| quantized_transformer = getattr(ltx2_model.model, "velocity_model", ltx2_model.model) |
| save_quantized_model( |
| quantized_transformer, |
| model_type, |
| quantized_source, |
| dtype, |
| checkpoint_paths["model_config"], |
| ) |
|
|
| pipe = { |
| "transformer": ltx2_model.model, |
| "text_encoder": ltx2_model.text_encoder, |
| "text_embedding_projection": ltx2_model.text_embedding_projection, |
| "text_embeddings_connector": ltx2_model.text_embeddings_connector, |
| "vae": ltx2_model.video_decoder, |
| "video_encoder": ltx2_model.video_encoder, |
| "audio_encoder": ltx2_model.audio_encoder, |
| "audio_decoder": ltx2_model.audio_decoder, |
| "vocoder": ltx2_model.vocoder, |
| "spatial_upsampler": ltx2_model.spatial_upsampler, |
| } |
| if ltx2_model.model2 is not None: |
| pipe["transformer2"] = ltx2_model.model2 |
|
|
| if LTX2_ENABLE_EMBEDDING_LORAS: |
| pipe = { "pipe": pipe, "loras" : ["text_embedding_projection", "text_embeddings_connector"] } |
|
|
| return ltx2_model, pipe |
|
|
| @staticmethod |
| def fix_settings(base_model_type, settings_version, model_def, ui_defaults): |
| default_perturbation_layers = _default_perturbation_layers(base_model_type) |
| pipeline_kind = model_def.get("ltx2_pipeline", "two_stage") |
| if pipeline_kind != "distilled" and ui_defaults.get("sample_solver", "") in {"", None}: |
| ui_defaults["sample_solver"] = "euler" |
|
|
| if settings_version < 2.43: |
| ui_defaults.update( |
| { |
| "denoising_strength": 1.0, |
| "masking_strength": 0, |
| } |
| ) |
|
|
| if settings_version < 2.45: |
| ui_defaults.update( |
| { |
| "alt_guidance_scale": 1.0, |
| "perturbation_layers": default_perturbation_layers, |
| } |
| ) |
|
|
| if settings_version < 2.49: |
| ui_defaults.update( |
| { |
| "self_refiner_plan": "2-8:3", |
| } |
| ) |
|
|
| if settings_version < 2.55 and pipeline_kind != "distilled": |
| ui_defaults.update({ |
| "audio_guidance_scale": 1.0, |
| "alt_guidance_scale": 1.0, |
| "alt_scale": 0.0, |
| }) |
|
|
| |
|
|
| if settings_version < 2.52: |
| plan = ui_defaults.get("self_refiner_plan") |
| if isinstance(plan, list): |
| from shared.utils.self_refiner import convert_refiner_list_to_string |
| ui_defaults["self_refiner_plan"] = convert_refiner_list_to_string(plan) |
|
|
| if settings_version < 2.58 and pipeline_kind == "distilled": |
| ui_defaults["guidance_phases"]=2 |
| @staticmethod |
| def update_default_settings(base_model_type, model_def, ui_defaults): |
| default_perturbation_layers = _default_perturbation_layers(base_model_type) |
| ui_defaults.update( |
| { |
| "sliding_window_size": 481, |
| "sliding_window_overlap": 17, |
| "denoising_strength": 1.0, |
| "masking_strength": 0, |
| "audio_prompt_type": "", |
| "perturbation_layers": default_perturbation_layers, |
| "guidance_phases": 2, |
| } |
| ) |
| ui_defaults.setdefault("audio_scale", 1.0) |
| pipeline_kind = model_def.get("ltx2_pipeline", "two_stage") |
| if pipeline_kind != "distilled": |
| ui_defaults.update(_default_dev_settings(base_model_type)) |
| ui_defaults.setdefault("sample_solver", "euler") |
| if _is_editanything_model(model_def): |
| ui_defaults.update( |
| { |
| "audio_prompt_type": "K", |
| "video_prompt_type": "VGI", |
| "remove_background_images_ref": 1, |
| } |
| ) |
|
|
| @staticmethod |
| def get_custom_prompt_enhancer_instructions(model_type, prompt_enhancer_mode, is_image, enhancer_kwargs): |
| from .prompt_enhancer import get_custom_prompt_enhancer_instructions |
| return get_custom_prompt_enhancer_instructions(model_type, prompt_enhancer_mode, is_image, enhancer_kwargs) |
|
|