dagloop5 commited on
Commit
216c0cf
·
verified ·
1 Parent(s): 341076d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1013 -0
app.py ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+
5
+ # Disable torch.compile / dynamo before any torch import
6
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
7
+ os.environ["TORCHDYNAMO_DISABLE"] = "1"
8
+
9
+ # Clone LTX-2 repo and install packages
10
+ LTX_REPO_URL = "https://github.com/Lightricks/LTX-2.git"
11
+ LTX_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "LTX-2")
12
+
13
+ LTX_COMMIT = "ae855f8538843825f9015a419cf4ba5edaf5eec2" # known working commit with decode_video
14
+
15
+ if not os.path.exists(LTX_REPO_DIR):
16
+ print(f"Cloning {LTX_REPO_URL}...")
17
+ subprocess.run(["git", "clone", LTX_REPO_URL, LTX_REPO_DIR], check=True)
18
+ subprocess.run(["git", "checkout", LTX_COMMIT], cwd=LTX_REPO_DIR, check=True)
19
+
20
+ print("Installing ltx-core and ltx-pipelines from cloned repo...")
21
+ subprocess.run(
22
+ [sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", "-e",
23
+ os.path.join(LTX_REPO_DIR, "packages", "ltx-core"),
24
+ "-e", os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines")],
25
+ check=True,
26
+ )
27
+
28
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
29
+ sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
30
+
31
+ import logging
32
+ import random
33
+ import tempfile
34
+ from pathlib import Path
35
+ import gc
36
+ import hashlib
37
+
38
+ import torch
39
+ torch._dynamo.config.suppress_errors = True
40
+ torch._dynamo.config.disable = True
41
+
42
+ import spaces
43
+ import gradio as gr
44
+ import numpy as np
45
+ from huggingface_hub import hf_hub_download, snapshot_download
46
+ from safetensors.torch import load_file, save_file
47
+ from safetensors import safe_open
48
+ import json
49
+ import requests
50
+
51
+ from ltx_core.components.diffusion_steps import EulerDiffusionStep
52
+ from ltx_core.components.noisers import GaussianNoiser
53
+ from ltx_core.components.protocols import DiffusionStepProtocol
54
+ from ltx_core.model.audio_vae import decode_audio as vae_decode_audio
55
+ from ltx_core.model.audio_vae import encode_audio as vae_encode_audio
56
+ from ltx_core.model.upsampler import upsample_video
57
+ from ltx_core.model.video_vae import TilingConfig, get_video_chunks_number, decode_video as vae_decode_video
58
+ from ltx_core.quantization import QuantizationPolicy
59
+ from ltx_core.types import Audio, LatentState, AudioLatentShape, VideoPixelShape
60
+ from ltx_pipelines.distilled import DistilledPipeline
61
+ from ltx_pipelines.utils import ModelLedger, euler_denoising_loop
62
+ from ltx_pipelines.utils.args import ImageConditioningInput
63
+ from ltx_pipelines.utils.constants import DISTILLED_SIGMA_VALUES, STAGE_2_DISTILLED_SIGMA_VALUES
64
+ from ltx_pipelines.utils.helpers import (
65
+ cleanup_memory,
66
+ combined_image_conditionings,
67
+ denoise_video_only,
68
+ denoise_audio_video,
69
+ get_device,
70
+ encode_prompts,
71
+ simple_denoising_func,
72
+ )
73
+ from ltx_pipelines.utils.media_io import decode_audio_from_file, encode_video
74
+ from ltx_core.loader.primitives import (
75
+ LoraPathStrengthAndSDOps,
76
+ LoraStateDictWithStrength,
77
+ StateDict,
78
+ )
79
+ from ltx_core.loader.sd_ops import LTXV_LORA_COMFY_RENAMING_MAP
80
+ from ltx_core.loader.fuse_loras import apply_loras
81
+ from ltx_core.loader.sft_loader import SafetensorsStateDictLoader
82
+
83
+ from ltx_pipelines.utils.types import PipelineComponents
84
+
85
+ logging.getLogger().setLevel(logging.INFO)
86
+
87
+ MAX_SEED = np.iinfo(np.int32).max
88
+ DEFAULT_PROMPT = (
89
+ "An astronaut hatches from a fragile egg on the surface of the Moon, "
90
+ "the shell cracking and peeling apart in gentle low-gravity motion. "
91
+ "Fine lunar dust lifts and drifts outward with each movement, floating "
92
+ "in slow arcs before settling back onto the ground."
93
+ )
94
+ DEFAULT_FRAME_RATE = 24.0
95
+
96
+ # Resolution presets: (width, height)
97
+ RESOLUTIONS = {
98
+ "high": {"16:9": (1536, 1024), "9:16": (1024, 1536), "1:1": (1024, 1024)},
99
+ "low": {"16:9": (768, 512), "9:16": (512, 768), "1:1": (768, 768)},
100
+ }
101
+
102
+
103
+ class LTX23DistilledA2VPipeline:
104
+ """DistilledPipeline with optional audio conditioning."""
105
+
106
+ def __init__(
107
+ self,
108
+ distilled_checkpoint_path: str,
109
+ gemma_root: str,
110
+ spatial_upsampler_path: str,
111
+ loras: tuple,
112
+ quantization: QuantizationPolicy | None = None,
113
+ ):
114
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
+ self.dtype = torch.bfloat16
116
+
117
+ self.model_ledger = ModelLedger(
118
+ dtype=self.dtype,
119
+ device=self.device,
120
+ checkpoint_path=distilled_checkpoint_path,
121
+ spatial_upsampler_path=spatial_upsampler_path,
122
+ gemma_root_path=gemma_root,
123
+ loras=loras,
124
+ quantization=quantization,
125
+ )
126
+
127
+ self.pipeline_components = PipelineComponents(
128
+ dtype=self.dtype,
129
+ device=self.device,
130
+ )
131
+
132
+ def __call__(
133
+ self,
134
+ prompt: str,
135
+ seed: int,
136
+ height: int,
137
+ width: int,
138
+ num_frames: int,
139
+ frame_rate: float,
140
+ images: list[ImageConditioningInput],
141
+ tiling_config: TilingConfig | None = None,
142
+ enhance_prompt: bool = False,
143
+ ):
144
+
145
+ generator = torch.Generator(device=self.device).manual_seed(seed)
146
+ noiser = GaussianNoiser(generator=generator)
147
+ stepper = EulerDiffusionStep()
148
+ dtype = torch.bfloat16
149
+
150
+ (ctx_p,) = encode_prompts(
151
+ [prompt],
152
+ self.model_ledger,
153
+ enhance_first_prompt=enhance_prompt,
154
+ enhance_prompt_image=images[0][0] if len(images) > 0 else None,
155
+ )
156
+ video_context, audio_context = ctx_p.video_encoding, ctx_p.audio_encoding
157
+
158
+ # Stage 1: Initial low resolution video generation.
159
+ video_encoder = self.model_ledger.video_encoder()
160
+ transformer = self.model_ledger.transformer()
161
+ stage_1_sigmas = torch.Tensor(DISTILLED_SIGMA_VALUES).to(self.device)
162
+
163
+ def denoising_loop(
164
+ sigmas: torch.Tensor, video_state: LatentState, audio_state: LatentState, stepper: DiffusionStepProtocol
165
+ ) -> tuple[LatentState, LatentState]:
166
+ return euler_denoising_loop(
167
+ sigmas=sigmas,
168
+ video_state=video_state,
169
+ audio_state=audio_state,
170
+ stepper=stepper,
171
+ denoise_fn=simple_denoising_func(
172
+ video_context=video_context,
173
+ audio_context=audio_context,
174
+ transformer=transformer, # noqa: F821
175
+ ),
176
+ )
177
+
178
+ stage_1_output_shape = VideoPixelShape(
179
+ batch=1,
180
+ frames=num_frames,
181
+ width=width,
182
+ height=height,
183
+ fps=frame_rate,
184
+ )
185
+ stage_1_conditionings = combined_image_conditionings(
186
+ images=images,
187
+ height=stage_1_output_shape.height,
188
+ width=stage_1_output_shape.width,
189
+ video_encoder=video_encoder,
190
+ dtype=dtype,
191
+ device=self.device,
192
+ )
193
+
194
+ video_state, audio_state = denoise_audio_video(
195
+ output_shape=stage_1_output_shape,
196
+ conditionings=stage_1_conditionings,
197
+ noiser=noiser,
198
+ sigmas=stage_1_sigmas,
199
+ stepper=stepper,
200
+ denoising_loop_fn=denoising_loop,
201
+ components=self.pipeline_components,
202
+ dtype=dtype,
203
+ device=self.device,
204
+ )
205
+
206
+ torch.cuda.synchronize()
207
+ del transformer
208
+ del video_encoder
209
+ cleanup_memory()
210
+
211
+ decoded_video = vae_decode_video(
212
+ video_state.latent, self.model_ledger.video_decoder(), tiling_config, generator
213
+ )
214
+ decoded_audio = vae_decode_audio(
215
+ audio_state.latent, self.model_ledger.audio_decoder(), self.model_ledger.vocoder()
216
+ )
217
+ return decoded_video, decoded_audio
218
+
219
+
220
+ # Model repos
221
+ LTX_MODEL_REPO = "Lightricks/LTX-2.3"
222
+ GEMMA_REPO ="Lightricks/gemma-3-12b-it-qat-q4_0-unquantized"
223
+
224
+ # Download model checkpoints
225
+ print("=" * 80)
226
+ print("Downloading LTX-2.3 distilled model + Gemma...")
227
+ print("=" * 80)
228
+
229
+ # LoRA cache directory and currently-applied key
230
+ LORA_CACHE_DIR = Path("lora_cache")
231
+ LORA_CACHE_DIR.mkdir(exist_ok=True)
232
+ current_lora_key: str | None = None
233
+
234
+ PENDING_LORA_KEY: str | None = None
235
+ PENDING_LORA_STATE: dict[str, torch.Tensor] | None = None
236
+ PENDING_LORA_STATUS: str = "No LoRA state prepared yet."
237
+
238
+ # New: cached base transformer SD and in-memory LoRA SDs
239
+ _base_transformer_sd: StateDict | None = None
240
+ _lora_state_dicts: dict[str, StateDict] = {}
241
+ _lora_loader = SafetensorsStateDictLoader()
242
+
243
+ weights_dir = Path("weights")
244
+ weights_dir.mkdir(exist_ok=True)
245
+ checkpoint_path = hf_hub_download(
246
+ repo_id="TenStrip/LTX2.3-10Eros",
247
+ filename="10Eros_v1.2_bf16.safetensors",
248
+ local_dir=str(weights_dir),
249
+ local_dir_use_symlinks=False,
250
+ )
251
+ spatial_upsampler_path = hf_hub_download(repo_id=LTX_MODEL_REPO, filename="ltx-2.3-spatial-upscaler-x2-1.1.safetensors")
252
+ gemma_root = snapshot_download(repo_id=GEMMA_REPO)
253
+
254
+
255
+ # ---- Insert block (LoRA downloads) between lines 268 and 269 ----
256
+ # LoRA repo + download the requested LoRA adapters
257
+ LORA_REPO = "dagloop5/LoRA"
258
+
259
+ print("=" * 80)
260
+ print("Downloading LoRA adapters from dagloop5/LoRA...")
261
+ print("=" * 80)
262
+ singularity_lora_path = hf_hub_download(repo_id="TenStrip/LTX2.3_JoyAI_Lora_Extracted", filename="JoyAI-Echo_r256.safetensors")
263
+ teneros_lora_path = hf_hub_download(repo_id="TenStrip/LTX2.3_Distilled_Lora_1.1_Experiments", filename="ltx-2.3-22b-distilled-lora-1.1_fro90_ceil52_condsafe.safetensors")
264
+ pose_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX2_3_NSFW_furry_concat_v2.safetensors")
265
+ general_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX2.3_reasoning_I2V_V3.safetensors")
266
+ motion_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="Sulphur_LTX 2.3_better _NSFW_motion.safetensors")
267
+ dreamlay_lora_path = hf_hub_download(repo_id="lynaNSFW/DR34ML4Y_AIO_NSFW_LTX23", filename="DR34ML4Y_LTXXX_V2.safetensors") # m15510n4ry, bl0wj0b, d0ubl3_bj, d0gg1e, c0wg1rl
268
+ mself_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="Furry Hyper Masturbation - LTX-2 I2V v1.safetensors") # Hyperfap
269
+ dramatic_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX-2.3 - Orgasm.safetensors") # buddr
270
+ fluid_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="Cr3ampi3_animation_sulphur-2_i2v_v1.0.safetensors") # cr3ampi3 animation
271
+ liquid_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="liquid_wet_dr1pp_ltx2_v1.0_scaled.safetensors") # wet dr1pp
272
+ demopose_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="clapping-cheeks-audio-v001-alpha.safetensors")
273
+ voice_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="hentai_voice_ltx23_v2.comfy.safetensors")
274
+ realism_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="FurryenhancerLTX2.3V4.094fused.safetensors")
275
+ transition_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX-2_takerpov_lora_v1.2.safetensors") # takerpov1, taker pov
276
+ physics_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX2.3_Physics_V2_000002000.safetensors")
277
+ reasoning_lora_path = hf_hub_download(repo_id="LiconStudio/Ltx2.3-VBVR-lora-I2V", filename="Ltx2.3-Licon-VBVR-I2V-390K-R32.safetensors")
278
+ twostep_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX2.3_Multi_step_video_reasoning_V0.1.safetensors")
279
+ mcfurry_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="LTX-2_3_mc_Furry_realistic_lora_v1.safetensors")
280
+ dm_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="Doggy_mission_sulphur-2_v0.5.safetensors")
281
+ praxis_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="Penile_Praxis_V4.safetensors")
282
+ threed_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="ltx2-3d-animations-12500-steps-k3nk.safetensors")
283
+ concept_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="ltx23_nsfw_helper_multi_concept_lora_v2.safetensors")
284
+ bulge_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="stomach_bulge_10eros_sulphur_v1.safetensors")
285
+
286
+ print(f"Singularity LoRA: {singularity_lora_path}")
287
+ print(f"10Eros LoRA: {teneros_lora_path}")
288
+ print(f"Pose LoRA: {pose_lora_path}")
289
+ print(f"General LoRA: {general_lora_path}")
290
+ print(f"Motion LoRA: {motion_lora_path}")
291
+ print(f"Dreamlay LoRA: {dreamlay_lora_path}")
292
+ print(f"Mself LoRA: {mself_lora_path}")
293
+ print(f"Dramatic LoRA: {dramatic_lora_path}")
294
+ print(f"Fluid LoRA: {fluid_lora_path}")
295
+ print(f"Liquid LoRA: {liquid_lora_path}")
296
+ print(f"Demopose LoRA: {demopose_lora_path}")
297
+ print(f"Voice LoRA: {voice_lora_path}")
298
+ print(f"Realism LoRA: {realism_lora_path}")
299
+ print(f"Transition LoRA: {transition_lora_path}")
300
+ print(f"Physics LoRA: {physics_lora_path}")
301
+ print(f"Reasoning LoRA: {reasoning_lora_path}")
302
+ print(f"Twostep LoRA: {twostep_lora_path}")
303
+ print(f"Mcfurry LoRA: {mcfurry_lora_path}")
304
+ print(f"DM LoRA: {dm_lora_path}")
305
+ print(f"Praxis LoRA: {praxis_lora_path}")
306
+ print(f"ThreeD LoRA: {threed_lora_path}")
307
+ print(f"Concept LoRA: {concept_lora_path}")
308
+ print(f"Bulge LoRA: {bulge_lora_path}")
309
+
310
+ print(f"Checkpoint: {checkpoint_path}")
311
+ print(f"Spatial upsampler: {spatial_upsampler_path}")
312
+ print(f"[Gemma] Root ready: {gemma_root}")
313
+
314
+ # Initialize pipeline WITH text encoder and optional audio support
315
+ # ---- Replace block (pipeline init) lines 275-281 ----
316
+ pipeline = LTX23DistilledA2VPipeline(
317
+ distilled_checkpoint_path=checkpoint_path,
318
+ spatial_upsampler_path=spatial_upsampler_path,
319
+ gemma_root=gemma_root,
320
+ loras=[],
321
+ quantization=None,
322
+ )
323
+ # ----------------------------------------------------------------
324
+
325
+ def _make_lora_key(singularity_strength: float, teneros_strength: float, pose_strength: float, general_strength: float, motion_strength: float, dreamlay_strength: float, mself_strength: float, dramatic_strength: float, fluid_strength: float, liquid_strength: float, demopose_strength: float, voice_strength: float, realism_strength: float, transition_strength: float, physics_strength: float, reasoning_strength: float, twostep_strength: float, mcfurry_strength: float, dm_strength: float, praxis_strength: float, threed_strength: float, concept_strength: float, bulge_strength: float) -> tuple[str, str]:
326
+ rx = round(float(singularity_strength), 2)
327
+ ra = round(float(teneros_strength), 2)
328
+ rp = round(float(pose_strength), 2)
329
+ rg = round(float(general_strength), 2)
330
+ rm = round(float(motion_strength), 2)
331
+ rd = round(float(dreamlay_strength), 2)
332
+ rs = round(float(mself_strength), 2)
333
+ rr = round(float(dramatic_strength), 2)
334
+ rf = round(float(fluid_strength), 2)
335
+ rl = round(float(liquid_strength), 2)
336
+ ro = round(float(demopose_strength), 2)
337
+ rv = round(float(voice_strength), 2)
338
+ re = round(float(realism_strength), 2)
339
+ rt = round(float(transition_strength), 2)
340
+ ry = round(float(physics_strength), 2)
341
+ ri = round(float(reasoning_strength), 2)
342
+ rw = round(float(twostep_strength), 2)
343
+ mc = round(float(mcfurry_strength), 2)
344
+ dm = round(float(dm_strength), 2)
345
+ pr = round(float(praxis_strength), 2)
346
+ td = round(float(threed_strength), 2)
347
+ co = round(float(concept_strength), 2)
348
+ bu = round(float(bulge_strength), 2)
349
+ key_str = f"{singularity_lora_path}:{rx}|{teneros_lora_path}:{ra}|{pose_lora_path}:{rp}|{general_lora_path}:{rg}|{motion_lora_path}:{rm}|{dreamlay_lora_path}:{rd}|{mself_lora_path}:{rs}|{dramatic_lora_path}:{rr}|{fluid_lora_path}:{rf}|{liquid_lora_path}:{rl}|{demopose_lora_path}:{ro}|{voice_lora_path}:{rv}|{realism_lora_path}:{re}|{transition_lora_path}:{rt}|{physics_lora_path}:{ry}|{reasoning_lora_path}:{ri}|{twostep_lora_path}:{rw}|{mcfurry_lora_path}:{mc}|{dm_lora_path}:{dm}|{praxis_lora_path}:{pr}|{threed_lora_path}:{td}|{concept_lora_path}:{co}|{bulge_lora_path}:{bu}"
350
+ key = hashlib.sha256(key_str.encode("utf-8")).hexdigest()
351
+ return key, key_str
352
+
353
+
354
+ def prepare_lora_cache(
355
+ singularity_strength: float,
356
+ teneros_strength: float,
357
+ pose_strength: float,
358
+ general_strength: float,
359
+ motion_strength: float,
360
+ dreamlay_strength: float,
361
+ mself_strength: float,
362
+ dramatic_strength: float,
363
+ fluid_strength: float,
364
+ liquid_strength: float,
365
+ demopose_strength: float,
366
+ voice_strength: float,
367
+ realism_strength: float,
368
+ transition_strength: float,
369
+ physics_strength: float,
370
+ reasoning_strength: float,
371
+ twostep_strength: float,
372
+ mcfurry_strength: float,
373
+ dm_strength: float,
374
+ praxis_strength: float,
375
+ threed_strength: float,
376
+ concept_strength: float,
377
+ bulge_strength: float,
378
+ progress=gr.Progress(track_tqdm=True),
379
+ ):
380
+ """
381
+ Fast CPU step:
382
+ - checks on-disk cache
383
+ - if missing, fuses the selected LoRA strengths against the pre-cached base SD
384
+ - saves the fused SD so the next apply step is just load_state_dict
385
+ """
386
+ global PENDING_LORA_KEY, PENDING_LORA_STATE, PENDING_LORA_STATUS
387
+
388
+ key, _ = _make_lora_key(
389
+ singularity_strength, teneros_strength, pose_strength, general_strength,
390
+ motion_strength, dreamlay_strength, mself_strength, dramatic_strength,
391
+ fluid_strength, liquid_strength, demopose_strength, voice_strength,
392
+ realism_strength, transition_strength, physics_strength, reasoning_strength,
393
+ twostep_strength, mcfurry_strength, dm_strength, praxis_strength,
394
+ threed_strength, concept_strength, bulge_strength,
395
+ )
396
+ cache_path = LORA_CACHE_DIR / f"{key}.safetensors"
397
+
398
+ progress(0.05, desc="Preparing LoRA state")
399
+
400
+ if cache_path.exists():
401
+ try:
402
+ progress(0.20, desc="Loading cached fused state")
403
+ state = load_file(str(cache_path))
404
+ PENDING_LORA_KEY = key
405
+ PENDING_LORA_STATE = state
406
+ PENDING_LORA_STATUS = f"Loaded cached LoRA state: {cache_path.name}"
407
+ return PENDING_LORA_STATUS
408
+ except Exception as e:
409
+ print(f"[LoRA] Cache load failed: {type(e).__name__}: {e}")
410
+
411
+ strengths = {
412
+ "singularity": round(float(singularity_strength), 2),
413
+ "teneros": round(float(teneros_strength), 2),
414
+ "pose": round(float(pose_strength), 2),
415
+ "general": round(float(general_strength), 2),
416
+ "motion": round(float(motion_strength), 2),
417
+ "dreamlay": round(float(dreamlay_strength), 2),
418
+ "mself": round(float(mself_strength), 2),
419
+ "dramatic": round(float(dramatic_strength), 2),
420
+ "fluid": round(float(fluid_strength), 2),
421
+ "liquid": round(float(liquid_strength), 2),
422
+ "demopose": round(float(demopose_strength), 2),
423
+ "voice": round(float(voice_strength), 2),
424
+ "realism": round(float(realism_strength), 2),
425
+ "transition": round(float(transition_strength), 2),
426
+ "physics": round(float(physics_strength), 2),
427
+ "reasoning": round(float(reasoning_strength), 2),
428
+ "twostep": round(float(twostep_strength), 2),
429
+ "mcfurry": round(float(mcfurry_strength), 2),
430
+ "dm": round(float(dm_strength), 2),
431
+ "praxis": round(float(praxis_strength), 2),
432
+ "threed": round(float(threed_strength), 2),
433
+ "concept": round(float(concept_strength), 2),
434
+ "bulge": round(float(bulge_strength), 2),
435
+ }
436
+
437
+ if not any(v != 0.0 for v in strengths.values()):
438
+ PENDING_LORA_KEY = None
439
+ PENDING_LORA_STATE = None
440
+ PENDING_LORA_STATUS = "No non-zero LoRA strengths selected; nothing to prepare."
441
+ return PENDING_LORA_STATUS
442
+
443
+ try:
444
+ progress(0.25, desc="Fusing LoRAs into base state dict")
445
+ fused_state = _fuse_lora_state_dict(strengths)
446
+
447
+ progress(0.70, desc="Saving fused state to cache")
448
+ save_file(fused_state, str(cache_path))
449
+
450
+ PENDING_LORA_KEY = key
451
+ PENDING_LORA_STATE = fused_state
452
+ PENDING_LORA_STATUS = f"Built and cached LoRA state: {cache_path.name}"
453
+ return PENDING_LORA_STATUS
454
+
455
+ except Exception as e:
456
+ import traceback
457
+ print(f"[LoRA] Prepare failed: {type(e).__name__}: {e}")
458
+ print(traceback.format_exc())
459
+ PENDING_LORA_KEY = None
460
+ PENDING_LORA_STATE = None
461
+ PENDING_LORA_STATUS = f"LoRA prepare failed: {type(e).__name__}: {e}"
462
+ return PENDING_LORA_STATUS
463
+
464
+ finally:
465
+ gc.collect()
466
+
467
+
468
+ def apply_prepared_lora_state_to_pipeline():
469
+ """
470
+ Fast step: copy the already prepared CPU state into the live transformer.
471
+ This is the only part that should remain near generation time.
472
+ """
473
+ global current_lora_key, PENDING_LORA_KEY, PENDING_LORA_STATE
474
+
475
+ if PENDING_LORA_STATE is None or PENDING_LORA_KEY is None:
476
+ print("[LoRA] No prepared LoRA state available; skipping.")
477
+ return False
478
+
479
+ if current_lora_key == PENDING_LORA_KEY:
480
+ print("[LoRA] Prepared LoRA state already active; skipping.")
481
+ return True
482
+
483
+ existing_transformer = _transformer
484
+ with torch.no_grad():
485
+ missing, unexpected = existing_transformer.load_state_dict(PENDING_LORA_STATE, strict=False)
486
+ if missing or unexpected:
487
+ print(f"[LoRA] load_state_dict mismatch: missing={len(missing)}, unexpected={len(unexpected)}")
488
+
489
+ current_lora_key = PENDING_LORA_KEY
490
+ print("[LoRA] Prepared LoRA state applied to the pipeline.")
491
+ return True
492
+
493
+ def _capture_base_transformer_state() -> None:
494
+ """
495
+ After _transformer has been preloaded with no LoRAs, snapshot its complete
496
+ state_dict to CPU. This is the reference "W_base" for all future LoRA fusing.
497
+ """
498
+ global _base_transformer_sd
499
+ assert _transformer is not None, "transformer must be preloaded first"
500
+
501
+ sd_dict = {
502
+ k: v.detach().to(device=torch.device("cpu"), dtype=torch.bfloat16).contiguous()
503
+ for k, v in _transformer.state_dict().items()
504
+ }
505
+ _base_transformer_sd = StateDict(
506
+ sd=sd_dict,
507
+ device=torch.device("cpu"),
508
+ size=sum(t.nbytes for t in sd_dict.values()),
509
+ dtype={torch.bfloat16},
510
+ )
511
+ print(f"[LoRA] Base transformer SD captured: {_base_transformer_sd.size / 1024**3:.2f} GB on CPU")
512
+
513
+
514
+ def _preload_lora_state_dicts() -> None:
515
+ """
516
+ Load every downloaded LoRA file into system RAM once.
517
+ The files are renamed with the LTXV Comfy map so they match base-model keys.
518
+ """
519
+ print("=" * 80)
520
+ print("Pre-loading LoRA state dicts into CPU memory...")
521
+ print("=" * 80)
522
+
523
+ entries = [
524
+ ("singularity", singularity_lora_path),
525
+ ("teneros", teneros_lora_path),
526
+ ("pose", pose_lora_path),
527
+ ("general", general_lora_path),
528
+ ("motion", motion_lora_path),
529
+ ("dreamlay", dreamlay_lora_path),
530
+ ("mself", mself_lora_path),
531
+ ("dramatic", dramatic_lora_path),
532
+ ("fluid", fluid_lora_path),
533
+ ("liquid", liquid_lora_path),
534
+ ("demopose", demopose_lora_path),
535
+ ("voice", voice_lora_path),
536
+ ("realism", realism_lora_path),
537
+ ("transition", transition_lora_path),
538
+ ("physics", physics_lora_path),
539
+ ("reasoning", reasoning_lora_path),
540
+ ("twostep", twostep_lora_path),
541
+ ("mcfurry", mcfurry_lora_path),
542
+ ("dm", dm_lora_path),
543
+ ("praxis", praxis_lora_path),
544
+ ("threed", threed_lora_path),
545
+ ("concept", concept_lora_path),
546
+ ("bulge", bulge_lora_path),
547
+ ]
548
+ for label, path in entries:
549
+ _lora_state_dicts[label] = _lora_loader.load(
550
+ [path],
551
+ sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
552
+ device=torch.device("cpu"),
553
+ )
554
+ print(f"[LoRA] Pre-loaded {label}: {path}")
555
+
556
+
557
+ def _fuse_lora_state_dict(strengths: dict[str, float]) -> dict[str, torch.Tensor]:
558
+ """
559
+ Compute W_eff = W_base + sum_i(alpha_i * B_i @ A_i) for all active LoRAs.
560
+ This calls the same primitive SingleGPUModelBuilder uses internally,
561
+ but without rebuilding the meta-model or reloading the base checkpoint.
562
+ """
563
+ assert _base_transformer_sd is not None, "Base transformer state dict not ready"
564
+
565
+ active = []
566
+ for label, strength in strengths.items():
567
+ if float(strength) == 0.0:
568
+ continue
569
+ active.append(
570
+ LoraStateDictWithStrength(
571
+ sd=_lora_state_dicts[label],
572
+ strength=float(strength),
573
+ )
574
+ )
575
+
576
+ if not active:
577
+ return {
578
+ k: v.detach().clone().contiguous()
579
+ for k, v in _base_transformer_sd.sd.items()
580
+ }
581
+
582
+ fused = apply_loras(
583
+ model_sd=_base_transformer_sd,
584
+ lora_sd_and_strengths=active,
585
+ dtype=torch.bfloat16,
586
+ destination_sd=None,
587
+ )
588
+
589
+ return {
590
+ k: v.detach().to(device=torch.device("cpu")).contiguous()
591
+ for k, v in fused.sd.items()
592
+ }
593
+
594
+ # ---- REPLACE PRELOAD BLOCK START ----
595
+ # Preload all models for ZeroGPU tensor packing.
596
+ print("Preloading all models (including Gemma and audio components)...")
597
+ ledger = pipeline.model_ledger
598
+
599
+ # Save the original factory methods so we can rebuild individual components later.
600
+ # These are bound callables on ledger that will call the builder when invoked.
601
+ _orig_transformer_factory = ledger.transformer
602
+ _orig_video_encoder_factory = ledger.video_encoder
603
+ _orig_video_decoder_factory = ledger.video_decoder
604
+ _orig_audio_encoder_factory = ledger.audio_encoder
605
+ _orig_audio_decoder_factory = ledger.audio_decoder
606
+ _orig_vocoder_factory = ledger.vocoder
607
+ _orig_spatial_upsampler_factory = ledger.spatial_upsampler
608
+ _orig_text_encoder_factory = ledger.text_encoder
609
+ _orig_gemma_embeddings_factory = ledger.gemma_embeddings_processor
610
+
611
+ # Call the original factories once to create the cached instances we will serve by default.
612
+ _transformer = _orig_transformer_factory()
613
+ _video_encoder = _orig_video_encoder_factory()
614
+ _video_decoder = _orig_video_decoder_factory()
615
+ _audio_encoder = _orig_audio_encoder_factory()
616
+ _audio_decoder = _orig_audio_decoder_factory()
617
+ _vocoder = _orig_vocoder_factory()
618
+ _spatial_upsampler = _orig_spatial_upsampler_factory()
619
+ _text_encoder = _orig_text_encoder_factory()
620
+ _embeddings_processor = _orig_gemma_embeddings_factory()
621
+
622
+ # Replace ledger methods with lightweight lambdas that return the cached instances.
623
+ # We keep the original factories above so we can call them later to rebuild components.
624
+ ledger.transformer = lambda: _transformer
625
+ ledger.video_encoder = lambda: _video_encoder
626
+ ledger.video_decoder = lambda: _video_decoder
627
+ ledger.audio_encoder = lambda: _audio_encoder
628
+ ledger.audio_decoder = lambda: _audio_decoder
629
+ ledger.vocoder = lambda: _vocoder
630
+ ledger.spatial_upsampler = lambda: _spatial_upsampler
631
+ ledger.text_encoder = lambda: _text_encoder
632
+ ledger.gemma_embeddings_processor = lambda: _embeddings_processor
633
+
634
+ print("All models preloaded (including Gemma text encoder and audio encoder)!")
635
+ # ---- REPLACE PRELOAD BLOCK END ----
636
+
637
+ _capture_base_transformer_state()
638
+ _preload_lora_state_dicts()
639
+ cleanup_memory()
640
+
641
+ print("=" * 80)
642
+ print("Pipeline ready!")
643
+ print("=" * 80)
644
+
645
+
646
+ def log_memory(tag: str):
647
+ if torch.cuda.is_available():
648
+ allocated = torch.cuda.memory_allocated() / 1024**3
649
+ peak = torch.cuda.max_memory_allocated() / 1024**3
650
+ free, total = torch.cuda.mem_get_info()
651
+ print(f"[VRAM {tag}] allocated={allocated:.2f}GB peak={peak:.2f}GB free={free / 1024**3:.2f}GB total={total / 1024**3:.2f}GB")
652
+
653
+
654
+ def detect_aspect_ratio(image) -> str:
655
+ if image is None:
656
+ return "16:9"
657
+ if hasattr(image, "size"):
658
+ w, h = image.size
659
+ elif hasattr(image, "shape"):
660
+ h, w = image.shape[:2]
661
+ else:
662
+ return "16:9"
663
+ ratio = w / h
664
+ candidates = {"16:9": 16 / 9, "9:16": 9 / 16, "1:1": 1.0}
665
+ return min(candidates, key=lambda k: abs(ratio - candidates[k]))
666
+
667
+
668
+ def on_image_upload(first_image, last_image, high_res):
669
+ ref_image = first_image if first_image is not None else last_image
670
+ aspect = detect_aspect_ratio(ref_image)
671
+ tier = "high" if high_res else "low"
672
+ w, h = RESOLUTIONS[tier][aspect]
673
+ return gr.update(value=w), gr.update(value=h)
674
+
675
+
676
+ def on_highres_toggle(first_image, last_image, high_res):
677
+ ref_image = first_image if first_image is not None else last_image
678
+ aspect = detect_aspect_ratio(ref_image)
679
+ tier = "high" if high_res else "low"
680
+ w, h = RESOLUTIONS[tier][aspect]
681
+ return gr.update(value=w), gr.update(value=h)
682
+
683
+
684
+ def get_gpu_duration(
685
+ first_image,
686
+ last_image,
687
+ prompt: str,
688
+ duration: float,
689
+ gpu_duration: float,
690
+ enhance_prompt: bool = True,
691
+ seed: int = 42,
692
+ randomize_seed: bool = True,
693
+ height: int = 1024,
694
+ width: int = 1536,
695
+ singularity_strength: float = 0.0,
696
+ teneros_strength: float = 0.0,
697
+ pose_strength: float = 0.0,
698
+ general_strength: float = 0.0,
699
+ motion_strength: float = 0.0,
700
+ dreamlay_strength: float = 0.0,
701
+ mself_strength: float = 0.0,
702
+ dramatic_strength: float = 0.0,
703
+ fluid_strength: float = 0.0,
704
+ liquid_strength: float = 0.0,
705
+ demopose_strength: float = 0.0,
706
+ voice_strength: float = 0.0,
707
+ realism_strength: float = 0.0,
708
+ transition_strength: float = 0.0,
709
+ physics_strength: float = 0.0,
710
+ reasoning_strength: float = 0.0,
711
+ twostep_strength: float = 0.0,
712
+ mcfurry_strength: float = 0.0,
713
+ dm_strength: float = 0.0,
714
+ praxis_strength: float = 0.0,
715
+ threed_strength: float = 0.0,
716
+ concept_strength: float = 0.0,
717
+ bulge_strength: float = 0.0,
718
+ progress=None,
719
+ ):
720
+ return int(gpu_duration)
721
+
722
+ @spaces.GPU(duration=get_gpu_duration)
723
+ @torch.inference_mode()
724
+ def generate_video(
725
+ first_image,
726
+ last_image,
727
+ prompt: str,
728
+ duration: float,
729
+ gpu_duration: float,
730
+ enhance_prompt: bool = True,
731
+ seed: int = 42,
732
+ randomize_seed: bool = True,
733
+ height: int = 1024,
734
+ width: int = 1536,
735
+ singularity_strength: float = 0.0,
736
+ teneros_strength: float = 0.0,
737
+ pose_strength: float = 0.0,
738
+ general_strength: float = 0.0,
739
+ motion_strength: float = 0.0,
740
+ dreamlay_strength: float = 0.0,
741
+ mself_strength: float = 0.0,
742
+ dramatic_strength: float = 0.0,
743
+ fluid_strength: float = 0.0,
744
+ liquid_strength: float = 0.0,
745
+ demopose_strength: float = 0.0,
746
+ voice_strength: float = 0.0,
747
+ realism_strength: float = 0.0,
748
+ transition_strength: float = 0.0,
749
+ physics_strength: float = 0.0,
750
+ reasoning_strength: float = 0.0,
751
+ twostep_strength: float = 0.0,
752
+ mcfurry_strength: float = 0.0,
753
+ dm_strength: float = 0.0,
754
+ praxis_strength: float = 0.0,
755
+ threed_strength: float = 0.0,
756
+ concept_strength: float = 0.0,
757
+ bulge_strength: float = 0.0,
758
+ progress=gr.Progress(track_tqdm=True),
759
+ ):
760
+ try:
761
+ torch.cuda.reset_peak_memory_stats()
762
+ log_memory("start")
763
+
764
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
765
+
766
+ frame_rate = DEFAULT_FRAME_RATE
767
+ num_frames = int(duration * frame_rate) + 1
768
+ num_frames = ((num_frames - 1 + 7) // 8) * 8 + 1
769
+
770
+ print(f"Generating: {height}x{width}, {num_frames} frames ({duration}s), seed={current_seed}")
771
+
772
+ images = []
773
+ output_dir = Path("outputs")
774
+ output_dir.mkdir(exist_ok=True)
775
+
776
+ if first_image is not None:
777
+ temp_first_path = output_dir / f"temp_first_{current_seed}.jpg"
778
+ if hasattr(first_image, "save"):
779
+ first_image.save(temp_first_path)
780
+ else:
781
+ temp_first_path = Path(first_image)
782
+ images.append(ImageConditioningInput(path=str(temp_first_path), frame_idx=0, strength=1.0))
783
+
784
+ if last_image is not None:
785
+ temp_last_path = output_dir / f"temp_last_{current_seed}.jpg"
786
+ if hasattr(last_image, "save"):
787
+ last_image.save(temp_last_path)
788
+ else:
789
+ temp_last_path = Path(last_image)
790
+ images.append(ImageConditioningInput(path=str(temp_last_path), frame_idx=num_frames - 1, strength=1.0))
791
+
792
+ tiling_config = TilingConfig.default()
793
+ video_chunks_number = get_video_chunks_number(num_frames, tiling_config)
794
+
795
+ log_memory("before pipeline call")
796
+
797
+ apply_prepared_lora_state_to_pipeline()
798
+
799
+ video, audio = pipeline(
800
+ prompt=prompt,
801
+ seed=current_seed,
802
+ height=int(height),
803
+ width=int(width),
804
+ num_frames=num_frames,
805
+ frame_rate=frame_rate,
806
+ images=images,
807
+ tiling_config=tiling_config,
808
+ enhance_prompt=enhance_prompt,
809
+ )
810
+
811
+ log_memory("after pipeline call")
812
+
813
+ output_path = tempfile.mktemp(suffix=".mp4")
814
+ encode_video(
815
+ video=video,
816
+ fps=frame_rate,
817
+ audio=audio,
818
+ output_path=output_path,
819
+ video_chunks_number=video_chunks_number,
820
+ )
821
+
822
+ log_memory("after encode_video")
823
+ return str(output_path), current_seed
824
+
825
+ except Exception as e:
826
+ import traceback
827
+ log_memory("on error")
828
+ print(f"Error: {str(e)}\n{traceback.format_exc()}")
829
+ return None, current_seed
830
+
831
+
832
+ with gr.Blocks(title="LTX-2.3 Distilled") as demo:
833
+ gr.Markdown("# LTX-2.3 F2LF with Fast Audio-Video Generation with Frame Conditioning")
834
+
835
+
836
+ with gr.Row():
837
+ with gr.Column():
838
+ with gr.Row():
839
+ first_image = gr.Image(label="First Frame (Optional)", type="pil")
840
+ last_image = gr.Image(label="Last Frame (Optional)", type="pil")
841
+ prompt = gr.Textbox(
842
+ label="Prompt",
843
+ info="for best results - make it as elaborate as possible",
844
+ value="Make this image come alive with cinematic motion, smooth animation",
845
+ lines=3,
846
+ placeholder="Describe the motion and animation you want...",
847
+ )
848
+ duration = gr.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, value=10.0, step=0.1)
849
+
850
+
851
+ generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
852
+
853
+ with gr.Accordion("Advanced Settings", open=False):
854
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=10, step=1)
855
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
856
+ with gr.Row():
857
+ width = gr.Number(label="Width", value=1536, precision=0)
858
+ height = gr.Number(label="Height", value=1024, precision=0)
859
+ with gr.Row():
860
+ enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
861
+ high_res = gr.Checkbox(label="High Resolution", value=True)
862
+ with gr.Column():
863
+ gr.Markdown("### LoRA adapter strengths (set to 0 to disable; slow and WIP)")
864
+ singularity_strength = gr.Slider(
865
+ label="JoyAI Lora strength",
866
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
867
+ )
868
+ teneros_strength = gr.Slider(
869
+ label="10Eros Distilled Lora strength",
870
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
871
+ )
872
+ pose_strength = gr.Slider(
873
+ label="Anthro Enhancer strength",
874
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
875
+ )
876
+ general_strength = gr.Slider(
877
+ label="Reasoning Enhancer strength",
878
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
879
+ )
880
+ motion_strength = gr.Slider(
881
+ label="Anthro Posing Helper strength",
882
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
883
+ )
884
+ dreamlay_strength = gr.Slider(
885
+ label="Dreamlay strength",
886
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
887
+ )
888
+ mself_strength = gr.Slider(
889
+ label="Mself strength",
890
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
891
+ )
892
+ dramatic_strength = gr.Slider(
893
+ label="Dramatic strength",
894
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
895
+ )
896
+ fluid_strength = gr.Slider(
897
+ label="Fluid Helper strength",
898
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
899
+ )
900
+ liquid_strength = gr.Slider(
901
+ label="Liquid Helper strength",
902
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
903
+ )
904
+ demopose_strength = gr.Slider(
905
+ label="Audio Helper strength",
906
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
907
+ )
908
+ voice_strength = gr.Slider(
909
+ label="Voice Helper strength",
910
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
911
+ )
912
+ realism_strength = gr.Slider(
913
+ label="Anthro Realism strength",
914
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
915
+ )
916
+ transition_strength = gr.Slider(
917
+ label="POV strength",
918
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
919
+ )
920
+ physics_strength = gr.Slider(
921
+ label="Physics strength",
922
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
923
+ )
924
+ reasoning_strength = gr.Slider(
925
+ label="Official Reasoning strength",
926
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
927
+ )
928
+ twostep_strength = gr.Slider(
929
+ label="Two Step Reasoning strength",
930
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
931
+ )
932
+ mcfurry_strength = gr.Slider(
933
+ label="t2v anthro realism strength",
934
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
935
+ )
936
+ dm_strength = gr.Slider(
937
+ label="DM3D strength",
938
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
939
+ )
940
+ praxis_strength = gr.Slider(
941
+ label="Praxis strength",
942
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
943
+ )
944
+ threed_strength = gr.Slider(
945
+ label="3D animation strength",
946
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
947
+ )
948
+ concept_strength = gr.Slider(
949
+ label="Conceptual strength",
950
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
951
+ )
952
+ bulge_strength = gr.Slider(
953
+ label="Bulge strength",
954
+ minimum=0.0, maximum=2.0, value=0.0, step=0.01
955
+ )
956
+ prepare_lora_btn = gr.Button("Prepare / Load LoRA Cache", variant="secondary")
957
+ lora_status = gr.Textbox(
958
+ label="LoRA Cache Status",
959
+ value="No LoRA state prepared yet.",
960
+ interactive=False,
961
+ )
962
+
963
+ with gr.Column():
964
+ output_video = gr.Video(label="Generated Video", autoplay=False)
965
+ gpu_duration = gr.Slider(
966
+ label="ZeroGPU duration (seconds; 10 second Img2Vid with 1024x1024 and LoRAs = ~70)",
967
+ minimum=30.0,
968
+ maximum=240.0,
969
+ value=75.0,
970
+ step=1.0,
971
+ )
972
+
973
+ first_image.change(
974
+ fn=on_image_upload,
975
+ inputs=[first_image, last_image, high_res],
976
+ outputs=[width, height],
977
+ )
978
+
979
+ last_image.change(
980
+ fn=on_image_upload,
981
+ inputs=[first_image, last_image, high_res],
982
+ outputs=[width, height],
983
+ )
984
+
985
+ high_res.change(
986
+ fn=on_highres_toggle,
987
+ inputs=[first_image, last_image, high_res],
988
+ outputs=[width, height],
989
+ )
990
+
991
+ prepare_lora_btn.click(
992
+ fn=prepare_lora_cache,
993
+ inputs=[singularity_strength, teneros_strength, pose_strength, general_strength, motion_strength, dreamlay_strength, mself_strength, dramatic_strength, fluid_strength, liquid_strength, demopose_strength, voice_strength, realism_strength, transition_strength, physics_strength, reasoning_strength, twostep_strength, mcfurry_strength, dm_strength, praxis_strength, threed_strength, concept_strength, bulge_strength],
994
+ outputs=[lora_status],
995
+ )
996
+
997
+ generate_btn.click(
998
+ fn=generate_video,
999
+ inputs=[
1000
+ first_image, last_image, prompt, duration, gpu_duration, enhance_prompt,
1001
+ seed, randomize_seed, height, width,
1002
+ singularity_strength, teneros_strength, pose_strength, general_strength, motion_strength, dreamlay_strength, mself_strength, dramatic_strength, fluid_strength, liquid_strength, demopose_strength, voice_strength, realism_strength, transition_strength, physics_strength, reasoning_strength, twostep_strength, mcfurry_strength, dm_strength, praxis_strength, threed_strength, concept_strength, bulge_strength,
1003
+ ],
1004
+ outputs=[output_video, seed],
1005
+ )
1006
+
1007
+
1008
+ css = """
1009
+ .fillable{max-width: 1200px !important}
1010
+ """
1011
+
1012
+ if __name__ == "__main__":
1013
+ demo.launch(theme=gr.themes.Citrus(), css=css)