prithivMLmods commited on
Commit
c556be6
·
verified ·
1 Parent(s): 4da3bbf

update app

Browse files
Files changed (1) hide show
  1. app.py +619 -726
app.py CHANGED
@@ -1,726 +1,619 @@
1
- import sys
2
- from pathlib import Path
3
- import uuid
4
- import tempfile
5
-
6
- # Add packages to Python path
7
- current_dir = Path(__file__).parent
8
- sys.path.insert(0, str(current_dir / "packages" / "ltx-pipelines" / "src"))
9
- sys.path.insert(0, str(current_dir / "packages" / "ltx-core" / "src"))
10
-
11
- import spaces
12
- import flash_attn_interface
13
- import time
14
- import gradio as gr
15
- import numpy as np
16
- import random
17
- import torch
18
- from typing import Optional
19
- from pathlib import Path
20
- from huggingface_hub import hf_hub_download, snapshot_download
21
- from ltx_pipelines.distilled import DistilledPipeline
22
- from ltx_core.model.video_vae import TilingConfig
23
- from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
24
- from ltx_core.loader.sd_ops import LTXV_LORA_COMFY_RENAMING_MAP
25
- from ltx_pipelines.utils.constants import (
26
- DEFAULT_SEED,
27
- DEFAULT_1_STAGE_HEIGHT,
28
- DEFAULT_1_STAGE_WIDTH ,
29
- DEFAULT_NUM_FRAMES,
30
- DEFAULT_FRAME_RATE,
31
- DEFAULT_LORA_STRENGTH,
32
- )
33
-
34
-
35
- MAX_SEED = np.iinfo(np.int32).max
36
- # Import from public LTX-2 package
37
- # Install with: pip install git+https://github.com/Lightricks/LTX-2.git
38
- from ltx_pipelines.utils import ModelLedger
39
- from ltx_pipelines.utils.helpers import generate_enhanced_prompt
40
-
41
- # HuggingFace Hub defaults
42
- DEFAULT_REPO_ID = "Lightricks/LTX-2"
43
- DEFAULT_GEMMA_REPO_ID = "unsloth/gemma-3-12b-it-qat-bnb-4bit"
44
- DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev.safetensors"
45
-
46
-
47
- def get_hub_or_local_checkpoint(repo_id: str, filename: str):
48
- """Download from HuggingFace Hub."""
49
- print(f"Downloading {filename} from {repo_id}...")
50
- ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename)
51
- print(f"Downloaded to {ckpt_path}")
52
- return ckpt_path
53
-
54
- def download_gemma_model(repo_id: str):
55
- """Download the full Gemma model directory."""
56
- print(f"Downloading Gemma model from {repo_id}...")
57
- local_dir = snapshot_download(repo_id=repo_id)
58
- print(f"Gemma model downloaded to {local_dir}")
59
- return local_dir
60
-
61
- # Initialize model ledger and text encoder at startup (load once, keep in memory)
62
- print("=" * 80)
63
- print("Loading Gemma Text Encoder...")
64
- print("=" * 80)
65
-
66
- checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
67
- gemma_local_path = download_gemma_model(DEFAULT_GEMMA_REPO_ID)
68
- device = "cuda"
69
-
70
- print(f"Initializing text encoder with:")
71
- print(f" checkpoint_path={checkpoint_path}")
72
- print(f" gemma_root={gemma_local_path}")
73
- print(f" device={device}")
74
-
75
-
76
- model_ledger = ModelLedger(
77
- dtype=torch.bfloat16,
78
- device=device,
79
- checkpoint_path=checkpoint_path,
80
- gemma_root_path=DEFAULT_GEMMA_REPO_ID,
81
- local_files_only=False
82
- )
83
-
84
-
85
- # Load text encoder once and keep it in memory
86
- text_encoder = model_ledger.text_encoder()
87
-
88
- print("=" * 80)
89
- print("Text encoder loaded and ready!")
90
- print("=" * 80)
91
-
92
- def encode_text_simple(text_encoder, prompt: str):
93
- """Simple text encoding without using pipeline_utils."""
94
- v_context, a_context, _ = text_encoder(prompt)
95
- return v_context, a_context
96
-
97
- @spaces.GPU()
98
- def encode_prompt(
99
- prompt: str,
100
- enhance_prompt: bool = True,
101
- input_image=None, # this is now filepath (string) or None
102
- seed: int = 42,
103
- negative_prompt: str = ""
104
- ):
105
- start_time = time.time()
106
- try:
107
- final_prompt = prompt
108
- if enhance_prompt:
109
- final_prompt = generate_enhanced_prompt(
110
- text_encoder=text_encoder,
111
- prompt=prompt,
112
- image_path=input_image if input_image is not None else None,
113
- seed=seed,
114
- )
115
-
116
- with torch.inference_mode():
117
- video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
118
-
119
- video_context_negative = None
120
- audio_context_negative = None
121
- if negative_prompt:
122
- video_context_negative, audio_context_negative = encode_text_simple(text_encoder, negative_prompt)
123
-
124
- # IMPORTANT: return tensors directly (no torch.save)
125
- embedding_data = {
126
- "video_context": video_context.detach().cpu(),
127
- "audio_context": audio_context.detach().cpu(),
128
- "prompt": final_prompt,
129
- "original_prompt": prompt,
130
- }
131
- if video_context_negative is not None:
132
- embedding_data["video_context_negative"] = video_context_negative
133
- embedding_data["audio_context_negative"] = audio_context_negative
134
- embedding_data["negative_prompt"] = negative_prompt
135
-
136
- elapsed_time = time.time() - start_time
137
- if torch.cuda.is_available():
138
- allocated = torch.cuda.memory_allocated() / 1024**3
139
- peak = torch.cuda.max_memory_allocated() / 1024**3
140
- status = f"✓ Encoded in {elapsed_time:.2f}s | VRAM: {allocated:.2f}GB allocated, {peak:.2f}GB peak"
141
- else:
142
- status = f"✓ Encoded in {elapsed_time:.2f}s (CPU mode)"
143
-
144
- return embedding_data, final_prompt, status
145
-
146
- except Exception as e:
147
- import traceback
148
- error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
149
- print(error_msg)
150
- return None, prompt, error_msg
151
-
152
-
153
- # Default prompt from docstring example
154
- DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot."
155
-
156
- # HuggingFace Hub defaults
157
- DEFAULT_REPO_ID = "Lightricks/LTX-2"
158
- DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev.safetensors"
159
- DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384.safetensors"
160
- DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0.safetensors"
161
-
162
- def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
163
- """Download from HuggingFace Hub or use local checkpoint."""
164
- if repo_id is None and filename is None:
165
- raise ValueError("Please supply at least one of `repo_id` or `filename`")
166
-
167
- if repo_id is not None:
168
- if filename is None:
169
- raise ValueError("If repo_id is specified, filename must also be specified.")
170
- print(f"Downloading {filename} from {repo_id}...")
171
- ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename)
172
- print(f"Downloaded to {ckpt_path}")
173
- else:
174
- ckpt_path = filename
175
-
176
- return ckpt_path
177
-
178
-
179
- # Initialize pipeline at startup
180
- print("=" * 80)
181
- print("Loading LTX-2 Distilled pipeline...")
182
- print("=" * 80)
183
-
184
- checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
185
- distilled_lora_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_DISTILLED_LORA_FILENAME)
186
- spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
187
-
188
- print(f"Initializing pipeline with:")
189
- print(f" checkpoint_path={checkpoint_path}")
190
- print(f" distilled_lora_path={distilled_lora_path}")
191
- print(f" spatial_upsampler_path={spatial_upsampler_path}")
192
-
193
-
194
- # Load distilled LoRA as a regular LoRA
195
- loras = [
196
- LoraPathStrengthAndSDOps(
197
- path=distilled_lora_path,
198
- strength=DEFAULT_LORA_STRENGTH,
199
- sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
200
- )
201
- ]
202
-
203
- # Initialize pipeline WITHOUT text encoder (gemma_root=None)
204
- # Text encoding will be done by external space
205
- pipeline = DistilledPipeline(
206
- device=torch.device("cuda"),
207
- checkpoint_path=checkpoint_path,
208
- spatial_upsampler_path=spatial_upsampler_path,
209
- gemma_root=None, # No text encoder in this space
210
- loras=loras,
211
- fp8transformer=False,
212
- local_files_only=False,
213
- )
214
-
215
- pipeline._video_encoder = pipeline.model_ledger.video_encoder()
216
- pipeline._transformer = pipeline.model_ledger.transformer()
217
- # pipeline.device = torch.device("cuda")
218
- # pipeline.model_ledger.device = torch.device("cuda")
219
-
220
-
221
- print("=" * 80)
222
- print("Pipeline fully loaded and ready!")
223
- print("=" * 80)
224
-
225
- def get_duration(
226
- input_image,
227
- prompt,
228
- duration,
229
- enhance_prompt,
230
- seed,
231
- randomize_seed,
232
- height,
233
- width,
234
- progress
235
- ):
236
- if duration <= 5:
237
- return 80
238
- else:
239
- return 120
240
-
241
- class RadioAnimated(gr.HTML):
242
- """
243
- Animated segmented radio (like iOS pill selector).
244
- Outputs: selected option string, e.g. "768x512"
245
- """
246
- def __init__(self, choices, value=None, **kwargs):
247
- if not choices or len(choices) < 2:
248
- raise ValueError("RadioAnimated requires at least 2 choices.")
249
- if value is None:
250
- value = choices[0]
251
-
252
- uid = uuid.uuid4().hex[:8] # unique per instance
253
- group_name = f"ra-{uid}"
254
-
255
- inputs_html = "\n".join(
256
- f"""
257
- <input class="ra-input" type="radio" name="{group_name}" id="{group_name}-{i}" value="{c}">
258
- <label class="ra-label" for="{group_name}-{i}">{c}</label>
259
- """
260
- for i, c in enumerate(choices)
261
- )
262
-
263
- # NOTE: use classes instead of duplicate IDs
264
- html_template = f"""
265
- <div class="ra-wrap" data-ra="{uid}">
266
- <div class="ra-inner">
267
- <div class="ra-highlight"></div>
268
- {inputs_html}
269
- </div>
270
- </div>
271
- """
272
-
273
- js_on_load = r"""
274
- (() => {
275
- const wrap = element.querySelector('.ra-wrap');
276
- const inner = element.querySelector('.ra-inner');
277
- const highlight = element.querySelector('.ra-highlight');
278
- const inputs = Array.from(element.querySelectorAll('.ra-input'));
279
-
280
- if (!inputs.length) return;
281
-
282
- const choices = inputs.map(i => i.value);
283
-
284
- function setHighlightByIndex(idx) {
285
- const n = choices.length;
286
- const pct = 100 / n;
287
- highlight.style.width = `calc(${pct}% - 6px)`;
288
- highlight.style.transform = `translateX(${idx * 100}%)`;
289
- }
290
-
291
- function setCheckedByValue(val, shouldTrigger=false) {
292
- const idx = Math.max(0, choices.indexOf(val));
293
- inputs.forEach((inp, i) => { inp.checked = (i === idx); });
294
- setHighlightByIndex(idx);
295
-
296
- props.value = choices[idx];
297
- if (shouldTrigger) trigger('change', props.value);
298
- }
299
-
300
- // Init from props.value
301
- setCheckedByValue(props.value ?? choices[0], false);
302
-
303
- // Input handlers
304
- inputs.forEach((inp) => {
305
- inp.addEventListener('change', () => {
306
- setCheckedByValue(inp.value, true);
307
- });
308
- });
309
- })();
310
- """
311
-
312
- super().__init__(
313
- value=value,
314
- html_template=html_template,
315
- js_on_load=js_on_load,
316
- **kwargs
317
- )
318
-
319
- def generate_video_example(input_image, prompt, duration, progress=gr.Progress(track_tqdm=True)):
320
- output_video, seed = generate_video(input_image, prompt, 5, True, 42, True, DEFAULT_1_STAGE_HEIGHT, DEFAULT_1_STAGE_WIDTH, progress)
321
-
322
- return output_video
323
-
324
- @spaces.GPU(duration=get_duration)
325
- def generate_video(
326
- input_image,
327
- prompt: str,
328
- duration: float,
329
- enhance_prompt: bool = True,
330
- seed: int = 42,
331
- randomize_seed: bool = True,
332
- height: int = DEFAULT_1_STAGE_HEIGHT,
333
- width: int = DEFAULT_1_STAGE_WIDTH,
334
- progress=gr.Progress(track_tqdm=True),
335
- ):
336
- """
337
- Generate a short cinematic video from a text prompt and optional input image using the LTX-2 distilled pipeline.
338
- Args:
339
- input_image: Optional input image for image-to-video. If provided, it is injected at frame 0 to guide motion.
340
- prompt: Text description of the scene, motion, and cinematic style to generate.
341
- duration: Desired video length in seconds. Converted to frames using a fixed 24 FPS rate.
342
- enhance_prompt: Whether to enhance the prompt using the prompt enhancer before encoding.
343
- seed: Base random seed for reproducibility (ignored if randomize_seed is True).
344
- randomize_seed: If True, a random seed is generated for each run.
345
- height: Output video height in pixels.
346
- width: Output video width in pixels.
347
- progress: Gradio progress tracker.
348
- Returns:
349
- A tuple of:
350
- - output_path: Path to the generated MP4 video file.
351
- - seed: The seed used for generation.
352
- Notes:
353
- - Uses a fixed frame rate of 24 FPS.
354
- - Prompt embeddings are generated externally to avoid reloading the text encoder.
355
- - GPU cache is cleared after generation to reduce VRAM pressure.
356
- - If an input image is provided, it is temporarily saved to disk for processing.
357
- """
358
- try:
359
- # Randomize seed if checkbox is enabled
360
- current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
361
-
362
- # Calculate num_frames from duration (using fixed 24 fps)
363
- frame_rate = 24.0
364
- num_frames = int(duration * frame_rate) + 1 # +1 to ensure we meet the duration
365
-
366
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
367
- output_path = tmpfile.name
368
-
369
- # Handle image input
370
- images = []
371
- temp_image_path = None # Initialize to None
372
-
373
- images = []
374
- if input_image is not None:
375
- images = [(input_image, 0, 1.0)] # input_image is already a path
376
-
377
- # Prepare image for upload if it exists
378
- image_input = None
379
-
380
-
381
- embeddings, final_prompt, status = encode_prompt(
382
- prompt=prompt,
383
- enhance_prompt=enhance_prompt,
384
- input_image=input_image,
385
- seed=current_seed,
386
- negative_prompt="",
387
- )
388
-
389
- video_context = embeddings["video_context"].to("cuda", non_blocking=True)
390
- audio_context = embeddings["audio_context"].to("cuda", non_blocking=True)
391
- print("✓ Embeddings loaded successfully")
392
-
393
- # free prompt enhancer / encoder temps ASAP
394
- del embeddings, final_prompt, status
395
- torch.cuda.empty_cache()
396
-
397
- # Run inference - progress automatically tracks tqdm from pipeline
398
- pipeline(
399
- prompt=prompt,
400
- output_path=str(output_path),
401
- seed=current_seed,
402
- height=height,
403
- width=width,
404
- num_frames=num_frames,
405
- frame_rate=frame_rate,
406
- images=images,
407
- tiling_config=TilingConfig.default(),
408
- video_context=video_context,
409
- audio_context=audio_context,
410
- )
411
- del video_context, audio_context
412
- torch.cuda.empty_cache()
413
- print("successful generation")
414
-
415
- return str(output_path), current_seed
416
-
417
- except Exception as e:
418
- import traceback
419
- error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
420
- print(error_msg)
421
- return None, current_seed
422
-
423
-
424
- def apply_resolution(resolution: str):
425
- w, h = resolution.split("x")
426
- return int(w), int(h)
427
-
428
- def apply_duration(duration: str):
429
- duration_s = int(duration[:-1])
430
- return duration_s
431
-
432
- css = """
433
- #col-container {
434
- margin: 0 auto;
435
- max-width: 1600px;
436
- }
437
- #modal-container {
438
- width: 100vw; /* Take full viewport width */
439
- height: 100vh; /* Take full viewport height (optional) */
440
- display: flex;
441
- justify-content: center; /* Center content horizontally */
442
- align-items: center; /* Center content vertically if desired */
443
- }
444
- #modal-content {
445
- width: 100%;
446
- max-width: 700px; /* Limit content width */
447
- margin: 0 auto;
448
- border-radius: 8px;
449
- padding: 1.5rem;
450
- }
451
- #step-column {
452
- padding: 10px;
453
- border-radius: 8px;
454
- box-shadow: var(--card-shadow);
455
- margin: 10px;
456
- }
457
- #col-showcase {
458
- margin: 0 auto;
459
- max-width: 1100px;
460
- }
461
- .button-gradient {
462
- background: linear-gradient(45deg, rgb(255, 65, 108), rgb(255, 75, 43), rgb(255, 155, 0), rgb(255, 65, 108)) 0% 0% / 400% 400%;
463
- border: none;
464
- padding: 14px 28px;
465
- font-size: 16px;
466
- font-weight: bold;
467
- color: white;
468
- border-radius: 10px;
469
- cursor: pointer;
470
- transition: 0.3s ease-in-out;
471
- animation: 2s linear 0s infinite normal none running gradientAnimation;
472
- box-shadow: rgba(255, 65, 108, 0.6) 0px 4px 10px;
473
- }
474
- .toggle-container {
475
- display: inline-flex;
476
- background-color: #ffd6ff; /* light pink background */
477
- border-radius: 9999px;
478
- padding: 4px;
479
- position: relative;
480
- width: fit-content;
481
- font-family: sans-serif;
482
- }
483
- .toggle-container input[type="radio"] {
484
- display: none;
485
- }
486
- .toggle-container label {
487
- position: relative;
488
- z-index: 2;
489
- flex: 1;
490
- text-align: center;
491
- font-weight: 700;
492
- color: #4b2ab5; /* dark purple text for unselected */
493
- padding: 6px 22px;
494
- border-radius: 9999px;
495
- cursor: pointer;
496
- transition: color 0.25s ease;
497
- }
498
- /* Moving highlight */
499
- .toggle-highlight {
500
- position: absolute;
501
- top: 4px;
502
- left: 4px;
503
- width: calc(50% - 4px);
504
- height: calc(100% - 8px);
505
- background-color: #4b2ab5; /* dark purple background */
506
- border-radius: 9999px;
507
- transition: transform 0.25s ease;
508
- z-index: 1;
509
- }
510
- /* When "True" is checked */
511
- #true:checked ~ label[for="true"] {
512
- color: #ffd6ff; /* light pink text */
513
- }
514
- /* When "False" is checked */
515
- #false:checked ~ label[for="false"] {
516
- color: #ffd6ff; /* light pink text */
517
- }
518
- /* Move highlight to right side when False is checked */
519
- #false:checked ~ .toggle-highlight {
520
- transform: translateX(100%);
521
- }
522
- """
523
-
524
- css += """
525
- /* ---- radioanimated ---- */
526
- .ra-wrap{
527
- width: fit-content;
528
- }
529
- .ra-inner{
530
- position: relative;
531
- display: inline-flex;
532
- align-items: center;
533
- gap: 0;
534
- padding: 6px;
535
- background: #0b0b0b;
536
- border-radius: 9999px;
537
- overflow: hidden;
538
- user-select: none;
539
- }
540
- .ra-input{
541
- display: none;
542
- }
543
- .ra-label{
544
- position: relative;
545
- z-index: 2;
546
- padding: 10px 18px;
547
- font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
548
- font-size: 14px;
549
- font-weight: 600;
550
- color: rgba(255,255,255,0.7);
551
- cursor: pointer;
552
- transition: color 180ms ease;
553
- white-space: nowrap;
554
- }
555
- .ra-highlight{
556
- position: absolute;
557
- z-index: 1;
558
- top: 6px;
559
- left: 6px;
560
- height: calc(100% - 12px);
561
- border-radius: 9999px;
562
- background: #8bff97; /* green knob */
563
- transition: transform 200ms ease, width 200ms ease;
564
- }
565
- /* selected label becomes darker like your screenshot */
566
- .ra-input:checked + .ra-label{
567
- color: rgba(0,0,0,0.75);
568
- }
569
- """
570
-
571
-
572
- with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
573
- gr.HTML(
574
- """
575
- <div style="text-align: center;">
576
- <p style="font-size:16px; display: inline; margin: 0;">
577
- <strong>LTX-2 Distilled</strong> DiT-based audio-video foundation model
578
- </p>
579
- <a href="https://huggingface.co/Lightricks/LTX-2" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
580
- [model]
581
- </a>
582
- </div>
583
- <div style="text-align: center;">
584
- <p style="font-size:16px; display: inline; margin: 0;">
585
- Using FA3 and Gemma 3 12B 4bit Quantisation for Faster Inference
586
- </p>
587
- </div>
588
- <div style="text-align: center;">
589
- <strong>HF Space by:</strong>
590
- <a href="https://huggingface.co/alexnasa" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
591
- <img src="https://img.shields.io/badge/🤗-Follow Me-green.svg">
592
- </a>
593
- </div>
594
- """
595
- )
596
- with gr.Column(elem_id="col-container"):
597
- with gr.Row():
598
- with gr.Column(elem_id="step-column"):
599
-
600
- input_image = gr.Image(
601
- label="Input Image (Optional)",
602
- type="filepath", # <-- was "pil"
603
- height=512
604
- )
605
-
606
- prompt = gr.Textbox(
607
- label="Prompt",
608
- value="Make this image come alive with cinematic motion, smooth animation",
609
- lines=3,
610
- max_lines=3,
611
- placeholder="Describe the motion and animation you want..."
612
- )
613
-
614
- enhance_prompt = gr.Checkbox(
615
- label="Enhance Prompt",
616
- value=True,
617
- visible=False
618
- )
619
-
620
- with gr.Accordion("Advanced Settings", open=False, visible=False):
621
- seed = gr.Slider(
622
- label="Seed",
623
- minimum=0,
624
- maximum=MAX_SEED,
625
- value=DEFAULT_SEED,
626
- step=1
627
- )
628
-
629
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
630
-
631
-
632
- with gr.Column(elem_id="step-column"):
633
- output_video = gr.Video(label="Generated Video", autoplay=True, height=512)
634
-
635
- with gr.Row():
636
-
637
- with gr.Column():
638
- radioanimated_duration = RadioAnimated(
639
- choices=["3s", "5s", "10s"],
640
- value="3s",
641
- elem_id="radioanimated_duration"
642
- )
643
-
644
- duration = gr.Slider(
645
- label="Duration (seconds)",
646
- minimum=1.0,
647
- maximum=10.0,
648
- value=3.0,
649
- step=0.1,
650
- visible=False
651
- )
652
-
653
- with gr.Column():
654
- radioanimated_resolution = RadioAnimated(
655
- choices=["768x512", "512x512", "512x768"],
656
- value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
657
- elem_id="radioanimated_resolution"
658
- )
659
-
660
- width = gr.Number(label="Width", value=DEFAULT_1_STAGE_WIDTH, precision=0, visible=False)
661
- height = gr.Number(label="Height", value=DEFAULT_1_STAGE_HEIGHT, precision=0, visible=False)
662
-
663
-
664
- generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
665
-
666
-
667
- radioanimated_duration.change(
668
- fn=apply_duration,
669
- inputs=radioanimated_duration,
670
- outputs=[duration],
671
- api_visibility="private"
672
- )
673
- radioanimated_resolution.change(
674
- fn=apply_resolution,
675
- inputs=radioanimated_resolution,
676
- outputs=[width, height],
677
- api_visibility="private"
678
- )
679
-
680
- generate_btn.click(
681
- fn=generate_video,
682
- inputs=[
683
- input_image,
684
- prompt,
685
- duration,
686
- enhance_prompt,
687
- seed,
688
- randomize_seed,
689
- height,
690
- width,
691
- ],
692
- outputs=[output_video,seed]
693
- )
694
-
695
- # Add example
696
- gr.Examples(
697
- examples=[
698
- [
699
- "supergirl.png",
700
- "A fuzzy puppet superhero character resembling a female puppet with blonde hair and a blue superhero suit stands inside an icy cave made of frozen walls and icicles, she looks panicked and frantic, rapidly turning her head left and right and scanning the cave while waving her arms and shouting angrily and desperately, mouthing the words “where the hell is my dog,” her movements exaggerated and puppet-like with high energy and urgency, suddenly a second puppet dog bursts into frame from the side, jumping up excitedly and tackling her affectionately while licking her face repeatedly, she freezes in surprise and then breaks into relief and laughter as the dog continues licking her, the scene feels chaotic, comedic, and emotional with expressive puppet reactions, cinematic lighting, smooth camera motion, shallow depth of field, and high-quality puppet-style animation"
701
- ],
702
- [
703
- "highland.png",
704
- "Realistic POV selfie-style video in a snowy, foggy field. Two shaggy Highland cows with long curved horns stand ahead. The camera is handheld and slightly shaky. The woman filming talks nervously and excitedly in a vlog tone: \"Oh my god guys… look how big those horns are… I’m kinda scared.\" The cow on the left walks toward the camera in a cute, bouncy, hopping way, curious and gentle. Snow crunches under its hooves, breath visible in the cold air. The horns look massive from the POV. As the cow gets very close, its wet nose with slight dripping fills part of the frame. She laughs nervously but reaches out and pets the cow. The cow makes deep, soft, interesting mooing and snorting sounds, calm and friendly. Ultra-realistic, natural lighting, immersive audio, documentary-style realism.",
705
- ],
706
- [
707
- "wednesday.png",
708
- "A cinematic close-up of Wednesday Addams frozen mid-dance on a dark, blue-lit ballroom floor as students move indistinctly behind her, their footsteps and muffled music reduced to a distant, underwater thrum; the audio foregrounds her steady breathing and the faint rustle of fabric as she slowly raises one arm, never breaking eye contact with the camera, then after a deliberately long silence she speaks in a flat, dry, perfectly controlled voice, “I don’t dance… I vibe code,” each word crisp and unemotional, followed by an abrupt cutoff of her voice as the background sound swells slightly, reinforcing the deadpan humor, with precise lip sync, minimal facial movement, stark gothic lighting, and cinematic realism.",
709
- ],
710
- [
711
- "astronaut.png",
712
- "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot.",
713
- ]
714
-
715
- ],
716
- fn=generate_video_example,
717
- inputs=[input_image, prompt],
718
- outputs = [output_video],
719
- label="Example",
720
- cache_examples=True,
721
- )
722
-
723
-
724
-
725
- if __name__ == "__main__":
726
- demo.launch(ssr_mode=False, mcp_server=True, css=css)
 
1
+ import sys
2
+ import os
3
+ import gc
4
+ from pathlib import Path
5
+ import uuid
6
+ import tempfile
7
+ import time
8
+ import random
9
+ import numpy as np
10
+ import torch
11
+ import gradio as gr
12
+ import spaces
13
+ from typing import Iterable, Optional
14
+ from PIL import Image
15
+
16
+ # Gradio Theme Imports
17
+ from gradio.themes import Soft
18
+ from gradio.themes.utils import colors, fonts, sizes
19
+
20
+ # Add packages to Python path
21
+ current_dir = Path(__file__).parent
22
+ sys.path.insert(0, str(current_dir / "packages" / "ltx-pipelines" / "src"))
23
+ sys.path.insert(0, str(current_dir / "packages" / "ltx-core" / "src"))
24
+
25
+ import flash_attn_interface
26
+ from huggingface_hub import hf_hub_download, snapshot_download
27
+
28
+ # LTX Imports
29
+ from ltx_pipelines.distilled import DistilledPipeline
30
+ from ltx_core.model.video_vae import TilingConfig
31
+ from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
32
+ from ltx_core.loader.sd_ops import LTXV_LORA_COMFY_RENAMING_MAP
33
+ from ltx_pipelines.utils import ModelLedger
34
+ from ltx_pipelines.utils.helpers import generate_enhanced_prompt
35
+ from ltx_pipelines.utils.constants import (
36
+ DEFAULT_SEED,
37
+ DEFAULT_1_STAGE_HEIGHT,
38
+ DEFAULT_1_STAGE_WIDTH,
39
+ DEFAULT_NUM_FRAMES,
40
+ DEFAULT_FRAME_RATE,
41
+ DEFAULT_LORA_STRENGTH,
42
+ )
43
+
44
+ # -----------------------------------------------------------------------------
45
+ # 1. OrangeRed Theme Configuration
46
+ # -----------------------------------------------------------------------------
47
+
48
+ colors.orange_red = colors.Color(
49
+ name="orange_red",
50
+ c50="#FFF0E5",
51
+ c100="#FFE0CC",
52
+ c200="#FFC299",
53
+ c300="#FFA366",
54
+ c400="#FF8533",
55
+ c500="#FF4500",
56
+ c600="#E63E00",
57
+ c700="#CC3700",
58
+ c800="#B33000",
59
+ c900="#992900",
60
+ c950="#802200",
61
+ )
62
+
63
+ class OrangeRedTheme(Soft):
64
+ def __init__(
65
+ self,
66
+ *,
67
+ primary_hue: colors.Color | str = colors.gray,
68
+ secondary_hue: colors.Color | str = colors.orange_red,
69
+ neutral_hue: colors.Color | str = colors.slate,
70
+ text_size: sizes.Size | str = sizes.text_lg,
71
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
72
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
73
+ ),
74
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
75
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
76
+ ),
77
+ ):
78
+ super().__init__(
79
+ primary_hue=primary_hue,
80
+ secondary_hue=secondary_hue,
81
+ neutral_hue=neutral_hue,
82
+ text_size=text_size,
83
+ font=font,
84
+ font_mono=font_mono,
85
+ )
86
+ super().set(
87
+ background_fill_primary="*primary_50",
88
+ background_fill_primary_dark="*primary_900",
89
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
90
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
91
+ button_primary_text_color="white",
92
+ button_primary_text_color_hover="white",
93
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
94
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
95
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
96
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
97
+ button_secondary_text_color="black",
98
+ button_secondary_text_color_hover="white",
99
+ button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
100
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
101
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
102
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
103
+ slider_color="*secondary_500",
104
+ slider_color_dark="*secondary_600",
105
+ block_title_text_weight="600",
106
+ block_border_width="3px",
107
+ block_shadow="*shadow_drop_lg",
108
+ button_primary_shadow="*shadow_drop_lg",
109
+ button_large_padding="11px",
110
+ color_accent_soft="*primary_100",
111
+ block_label_background_fill="*primary_200",
112
+ )
113
+
114
+ orange_red_theme = OrangeRedTheme()
115
+
116
+ # -----------------------------------------------------------------------------
117
+ # 2. Configuration & Adapters
118
+ # -----------------------------------------------------------------------------
119
+
120
+ MAX_SEED = np.iinfo(np.int32).max
121
+
122
+ # HuggingFace Hub defaults
123
+ DEFAULT_REPO_ID = "Lightricks/LTX-2"
124
+ DEFAULT_GEMMA_REPO_ID = "unsloth/gemma-3-12b-it-qat-bnb-4bit"
125
+ DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev.safetensors"
126
+ DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384.safetensors"
127
+ DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0.safetensors"
128
+
129
+ # New Adapter Definitions
130
+ ADAPTER_SPECS = {
131
+ "None": None,
132
+ "Camera-Control-Dolly-Left": {
133
+ "repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Left",
134
+ "weights": "ltx-2-19b-lora-camera-control-dolly-left.safetensors",
135
+ "adapter_name": "camera-control-dolly-left"
136
+ },
137
+ "Camera-Control-Dolly-Right": {
138
+ "repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Right",
139
+ "weights": "ltx-2-19b-lora-camera-control-dolly-right.safetensors",
140
+ "adapter_name": "camera-control-dolly-right"
141
+ },
142
+ "Camera-Control-Dolly-In": {
143
+ "repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-In",
144
+ "weights": "ltx-2-19b-lora-camera-control-dolly-in.safetensors",
145
+ "adapter_name": "camera-control-dolly-in"
146
+ },
147
+ "Camera-Control-Dolly-Out": {
148
+ "repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Out",
149
+ "weights": "ltx-2-19b-lora-camera-control-dolly-out.safetensors",
150
+ "adapter_name": "camera-control-dolly-out"
151
+ }
152
+ }
153
+
154
+ # -----------------------------------------------------------------------------
155
+ # 3. Model Loading Helper Functions
156
+ # -----------------------------------------------------------------------------
157
+
158
+ def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
159
+ """Download from HuggingFace Hub or use local checkpoint."""
160
+ if repo_id is None and filename is None:
161
+ raise ValueError("Please supply at least one of `repo_id` or `filename`")
162
+
163
+ if repo_id is not None:
164
+ if filename is None:
165
+ raise ValueError("If repo_id is specified, filename must also be specified.")
166
+ print(f"Downloading {filename} from {repo_id}...")
167
+ ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename)
168
+ print(f"Downloaded to {ckpt_path}")
169
+ else:
170
+ ckpt_path = filename
171
+
172
+ return ckpt_path
173
+
174
+ def download_gemma_model(repo_id: str):
175
+ """Download the full Gemma model directory."""
176
+ print(f"Downloading Gemma model from {repo_id}...")
177
+ local_dir = snapshot_download(repo_id=repo_id)
178
+ print(f"Gemma model downloaded to {local_dir}")
179
+ return local_dir
180
+
181
+ # -----------------------------------------------------------------------------
182
+ # 4. Global Initialization (Text Encoder & Paths)
183
+ # -----------------------------------------------------------------------------
184
+
185
+ print("=" * 80)
186
+ print("Initializing LTX-2 Environment...")
187
+ print("=" * 80)
188
+
189
+ device = "cuda"
190
+
191
+ # Load Text Encoder Weights
192
+ checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
193
+ gemma_local_path = download_gemma_model(DEFAULT_GEMMA_REPO_ID)
194
+ distilled_lora_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_DISTILLED_LORA_FILENAME)
195
+ spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
196
+
197
+ print("Loading Gemma Text Encoder...")
198
+ model_ledger = ModelLedger(
199
+ dtype=torch.bfloat16,
200
+ device=device,
201
+ checkpoint_path=checkpoint_path,
202
+ gemma_root_path=DEFAULT_GEMMA_REPO_ID,
203
+ local_files_only=False
204
+ )
205
+ text_encoder = model_ledger.text_encoder()
206
+ print("Text encoder loaded.")
207
+
208
+ # -----------------------------------------------------------------------------
209
+ # 5. Inference Logic
210
+ # -----------------------------------------------------------------------------
211
+
212
+ def encode_text_simple(text_encoder, prompt: str):
213
+ """Simple text encoding without using pipeline_utils."""
214
+ v_context, a_context, _ = text_encoder(prompt)
215
+ return v_context, a_context
216
+
217
+ @spaces.GPU()
218
+ def encode_prompt(
219
+ prompt: str,
220
+ enhance_prompt: bool = True,
221
+ input_image=None,
222
+ seed: int = 42,
223
+ negative_prompt: str = ""
224
+ ):
225
+ start_time = time.time()
226
+ try:
227
+ final_prompt = prompt
228
+ if enhance_prompt:
229
+ final_prompt = generate_enhanced_prompt(
230
+ text_encoder=text_encoder,
231
+ prompt=prompt,
232
+ image_path=input_image if input_image is not None else None,
233
+ seed=seed,
234
+ )
235
+
236
+ with torch.inference_mode():
237
+ video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
238
+
239
+ video_context_negative = None
240
+ audio_context_negative = None
241
+ if negative_prompt:
242
+ video_context_negative, audio_context_negative = encode_text_simple(text_encoder, negative_prompt)
243
+
244
+ embedding_data = {
245
+ "video_context": video_context.detach().cpu(),
246
+ "audio_context": audio_context.detach().cpu(),
247
+ "prompt": final_prompt,
248
+ "original_prompt": prompt,
249
+ }
250
+ if video_context_negative is not None:
251
+ embedding_data["video_context_negative"] = video_context_negative
252
+ embedding_data["audio_context_negative"] = audio_context_negative
253
+ embedding_data["negative_prompt"] = negative_prompt
254
+
255
+ elapsed_time = time.time() - start_time
256
+ status = f"✓ Encoded in {elapsed_time:.2f}s"
257
+ return embedding_data, final_prompt, status
258
+
259
+ except Exception as e:
260
+ import traceback
261
+ error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
262
+ print(error_msg)
263
+ return None, prompt, error_msg
264
+
265
+ def get_duration(input_image, prompt, lora_adapter, duration, enhance_prompt, seed, randomize_seed, height, width, progress):
266
+ if duration <= 5:
267
+ return 80
268
+ else:
269
+ return 120
270
+
271
+ @spaces.GPU(duration=get_duration)
272
+ def generate_video(
273
+ input_image,
274
+ prompt: str,
275
+ lora_adapter: str,
276
+ duration: float,
277
+ enhance_prompt: bool = True,
278
+ seed: int = 42,
279
+ randomize_seed: bool = True,
280
+ height: int = DEFAULT_1_STAGE_HEIGHT,
281
+ width: int = DEFAULT_1_STAGE_WIDTH,
282
+ progress=gr.Progress(track_tqdm=True),
283
+ ):
284
+ gc.collect()
285
+ torch.cuda.empty_cache()
286
+
287
+ try:
288
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
289
+ frame_rate = 24.0
290
+ num_frames = int(duration * frame_rate) + 1
291
+
292
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
293
+ output_path = tmpfile.name
294
+
295
+ # Prepare Inputs
296
+ images = []
297
+ if input_image is not None:
298
+ images = [(input_image, 0, 1.0)]
299
+
300
+ # Encode Prompt
301
+ embeddings, final_prompt, status = encode_prompt(
302
+ prompt=prompt,
303
+ enhance_prompt=enhance_prompt,
304
+ input_image=input_image,
305
+ seed=current_seed,
306
+ negative_prompt="",
307
+ )
308
+
309
+ if embeddings is None:
310
+ raise Exception("Failed to encode prompt")
311
+
312
+ video_context = embeddings["video_context"].to("cuda", non_blocking=True)
313
+ audio_context = embeddings["audio_context"].to("cuda", non_blocking=True)
314
+
315
+ # ---------------------------
316
+ # Configure LoRAs
317
+ # ---------------------------
318
+ # Always start with the base Distilled LoRA
319
+ active_loras = [
320
+ LoraPathStrengthAndSDOps(
321
+ path=distilled_lora_path,
322
+ strength=DEFAULT_LORA_STRENGTH,
323
+ sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
324
+ )
325
+ ]
326
+
327
+ # Add additional selected Adapter
328
+ if lora_adapter and lora_adapter != "None":
329
+ spec = ADAPTER_SPECS.get(lora_adapter)
330
+ if spec:
331
+ print(f"Loading Adapter: {lora_adapter}")
332
+ # Download on demand
333
+ adapter_path = get_hub_or_local_checkpoint(repo_id=spec["repo"], filename=spec["weights"])
334
+
335
+ # Append to list
336
+ active_loras.append(
337
+ LoraPathStrengthAndSDOps(
338
+ path=adapter_path,
339
+ strength=0.8, # Default strength for style/camera LoRAs
340
+ sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
341
+ )
342
+ )
343
+
344
+ # ---------------------------
345
+ # Instantiate Pipeline
346
+ # ---------------------------
347
+ # We instantiate the pipeline inside the GPU function to ensure LoRAs are applied correctly
348
+ # for this specific run without global state pollution.
349
+ # Since 'model_ledger' and checkpoints are already downloaded/cached, this is fast.
350
+ pipeline = DistilledPipeline(
351
+ device=torch.device("cuda"),
352
+ checkpoint_path=checkpoint_path,
353
+ spatial_upsampler_path=spatial_upsampler_path,
354
+ gemma_root=None, # Already handled externally
355
+ loras=active_loras,
356
+ fp8transformer=False,
357
+ local_files_only=False,
358
+ )
359
+
360
+ # Explicitly link the pre-loaded encoder/transformer to avoid VRAM bloat
361
+ pipeline._video_encoder = pipeline.model_ledger.video_encoder()
362
+ pipeline._transformer = pipeline.model_ledger.transformer()
363
+
364
+ # Run Generation
365
+ pipeline(
366
+ prompt=prompt,
367
+ output_path=str(output_path),
368
+ seed=current_seed,
369
+ height=height,
370
+ width=width,
371
+ num_frames=num_frames,
372
+ frame_rate=frame_rate,
373
+ images=images,
374
+ tiling_config=TilingConfig.default(),
375
+ video_context=video_context,
376
+ audio_context=audio_context,
377
+ )
378
+
379
+ del video_context, audio_context, pipeline
380
+ gc.collect()
381
+ torch.cuda.empty_cache()
382
+
383
+ return str(output_path), current_seed
384
+
385
+ except Exception as e:
386
+ import traceback
387
+ error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
388
+ print(error_msg)
389
+ return None, current_seed
390
+
391
+ def generate_video_example(input_image, prompt, lora_adapter, duration):
392
+ output, seed = generate_video(
393
+ input_image=input_image,
394
+ prompt=prompt,
395
+ lora_adapter=lora_adapter,
396
+ duration=5.0,
397
+ enhance_prompt=True,
398
+ seed=42,
399
+ randomize_seed=True,
400
+ height=DEFAULT_1_STAGE_HEIGHT,
401
+ width=DEFAULT_1_STAGE_WIDTH
402
+ )
403
+ return output
404
+
405
+ # -----------------------------------------------------------------------------
406
+ # 6. UI Components
407
+ # -----------------------------------------------------------------------------
408
+
409
+ def apply_resolution(resolution: str):
410
+ w, h = resolution.split("x")
411
+ return int(w), int(h)
412
+
413
+ def apply_duration(duration: str):
414
+ duration_s = int(duration[:-1])
415
+ return duration_s
416
+
417
+ class RadioAnimated(gr.HTML):
418
+ def __init__(self, choices, value=None, **kwargs):
419
+ if not choices or len(choices) < 2:
420
+ raise ValueError("RadioAnimated requires at least 2 choices.")
421
+ if value is None:
422
+ value = choices[0]
423
+
424
+ uid = uuid.uuid4().hex[:8]
425
+ group_name = f"ra-{uid}"
426
+
427
+ inputs_html = "\n".join(
428
+ f"""
429
+ <input class="ra-input" type="radio" name="{group_name}" id="{group_name}-{i}" value="{c}">
430
+ <label class="ra-label" for="{group_name}-{i}">{c}</label>
431
+ """
432
+ for i, c in enumerate(choices)
433
+ )
434
+
435
+ html_template = f"""
436
+ <div class="ra-wrap" data-ra="{uid}">
437
+ <div class="ra-inner">
438
+ <div class="ra-highlight"></div>
439
+ {inputs_html}
440
+ </div>
441
+ </div>
442
+ """
443
+
444
+ js_on_load = r"""
445
+ (() => {
446
+ const wrap = element.querySelector('.ra-wrap');
447
+ const inner = element.querySelector('.ra-inner');
448
+ const highlight = element.querySelector('.ra-highlight');
449
+ const inputs = Array.from(element.querySelectorAll('.ra-input'));
450
+
451
+ if (!inputs.length) return;
452
+
453
+ const choices = inputs.map(i => i.value);
454
+
455
+ function setHighlightByIndex(idx) {
456
+ const n = choices.length;
457
+ const pct = 100 / n;
458
+ highlight.style.width = `calc(${pct}% - 6px)`;
459
+ highlight.style.transform = `translateX(${idx * 100}%)`;
460
+ }
461
+
462
+ function setCheckedByValue(val, shouldTrigger=false) {
463
+ const idx = Math.max(0, choices.indexOf(val));
464
+ inputs.forEach((inp, i) => { inp.checked = (i === idx); });
465
+ setHighlightByIndex(idx);
466
+
467
+ props.value = choices[idx];
468
+ if (shouldTrigger) trigger('change', props.value);
469
+ }
470
+
471
+ setCheckedByValue(props.value ?? choices[0], false);
472
+
473
+ inputs.forEach((inp) => {
474
+ inp.addEventListener('change', () => {
475
+ setCheckedByValue(inp.value, true);
476
+ });
477
+ });
478
+ })();
479
+ """
480
+
481
+ super().__init__(
482
+ value=value,
483
+ html_template=html_template,
484
+ js_on_load=js_on_load,
485
+ **kwargs
486
+ )
487
+
488
+ # -----------------------------------------------------------------------------
489
+ # 7. Gradio Application
490
+ # -----------------------------------------------------------------------------
491
+
492
+ css = """
493
+ #col-container {
494
+ margin: 0 auto;
495
+ max-width: 1200px;
496
+ }
497
+ #step-column {
498
+ padding: 20px;
499
+ border-radius: 12px;
500
+ background: var(--background-fill-secondary);
501
+ border: 1px solid var(--border-color-primary);
502
+ margin-bottom: 20px;
503
+ }
504
+ .button-gradient {
505
+ background: linear-gradient(90deg, #FF4500, #E63E00);
506
+ border: none;
507
+ color: white;
508
+ font-weight: bold;
509
+ }
510
+ .ra-wrap{ width: fit-content; }
511
+ .ra-inner{
512
+ position: relative; display: inline-flex; align-items: center; gap: 0; padding: 6px;
513
+ background: var(--neutral-200); border-radius: 9999px; overflow: hidden;
514
+ }
515
+ .ra-input{ display: none; }
516
+ .ra-label{
517
+ position: relative; z-index: 2; padding: 8px 16px;
518
+ font-family: inherit; font-size: 14px; font-weight: 600;
519
+ color: var(--neutral-500); cursor: pointer; transition: color 0.2s; white-space: nowrap;
520
+ }
521
+ .ra-highlight{
522
+ position: absolute; z-index: 1; top: 6px; left: 6px;
523
+ height: calc(100% - 12px); border-radius: 9999px;
524
+ background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);
525
+ transition: transform 0.2s, width 0.2s;
526
+ }
527
+ .ra-input:checked + .ra-label{ color: black; }
528
+
529
+ /* Dark mode adjustments for Radio */
530
+ .dark .ra-inner { background: var(--neutral-800); }
531
+ .dark .ra-label { color: var(--neutral-400); }
532
+ .dark .ra-highlight { background: var(--neutral-600); }
533
+ .dark .ra-input:checked + .ra-label { color: white; }
534
+ """
535
+
536
+ with gr.Blocks() as demo:
537
+ with gr.Column(elem_id="col-container"):
538
+ gr.Markdown("# **LTX-2 Video Distilled + LoRA Adapters**")
539
+ gr.Markdown("Create cinematic video from text or image using LTX-2 Distilled model. Select LoRA adapters for specific camera movements or styles.")
540
+
541
+ with gr.Row():
542
+ # Left Column: Inputs
543
+ with gr.Column(elem_id="step-column"):
544
+ input_image = gr.Image(
545
+ label="Input Image (Optional)",
546
+ type="filepath",
547
+ height=300
548
+ )
549
+
550
+ prompt = gr.Textbox(
551
+ label="Prompt",
552
+ value="Make this image come alive with cinematic motion...",
553
+ lines=3,
554
+ placeholder="Describe the motion and animation you want..."
555
+ )
556
+
557
+ lora_adapter = gr.Dropdown(
558
+ label="Camera Control / Adapter",
559
+ choices=list(ADAPTER_SPECS.keys()),
560
+ value="None",
561
+ info="Select a specific camera movement or style adapter."
562
+ )
563
+
564
+ enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=True, visible=False)
565
+
566
+ with gr.Accordion("Advanced Settings", open=False):
567
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=DEFAULT_SEED, step=1)
568
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
569
+
570
+ # Right Column: Output & Settings
571
+ with gr.Column(elem_id="step-column"):
572
+ output_video = gr.Video(label="Generated Video", autoplay=True, height=350)
573
+
574
+ with gr.Row():
575
+ with gr.Column():
576
+ gr.Markdown("**Duration**")
577
+ radioanimated_duration = RadioAnimated(
578
+ choices=["3s", "5s", "10s"],
579
+ value="5s",
580
+ elem_id="radioanimated_duration"
581
+ )
582
+ duration = gr.Number(value=5.0, visible=False)
583
+
584
+ with gr.Column():
585
+ gr.Markdown("**Resolution**")
586
+ radioanimated_resolution = RadioAnimated(
587
+ choices=["768x512", "512x512", "512x768"],
588
+ value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
589
+ elem_id="radioanimated_resolution"
590
+ )
591
+ width = gr.Number(value=DEFAULT_1_STAGE_WIDTH, visible=False)
592
+ height = gr.Number(value=DEFAULT_1_STAGE_HEIGHT, visible=False)
593
+
594
+ generate_btn = gr.Button("Generate Video", variant="primary", elem_classes="button-gradient")
595
+
596
+ # Wire up events
597
+ radioanimated_duration.change(fn=apply_duration, inputs=radioanimated_duration, outputs=[duration], api_visibility="private")
598
+ radioanimated_resolution.change(fn=apply_resolution, inputs=radioanimated_resolution, outputs=[width, height], api_visibility="private")
599
+
600
+ generate_btn.click(
601
+ fn=generate_video,
602
+ inputs=[input_image, prompt, lora_adapter, duration, enhance_prompt, seed, randomize_seed, height, width],
603
+ outputs=[output_video, seed]
604
+ )
605
+
606
+ gr.Examples(
607
+ examples=[
608
+ ["examples/supergirl.png", "A fuzzy puppet superhero...", "Camera-Control-Dolly-Left"],
609
+ ["examples/astronaut.png", "An astronaut hatches from a fragile egg...", "Camera-Control-Dolly-In"],
610
+ ],
611
+ fn=generate_video_example,
612
+ inputs=[input_image, prompt, lora_adapter],
613
+ outputs=[output_video],
614
+ label="Examples",
615
+ cache_examples=False
616
+ )
617
+
618
+ if __name__ == "__main__":
619
+ demo.launch(css=css, theme=orange_red_theme, ssr_mode=False, mcp_server=True)