linoyts HF Staff commited on
Commit
0ae7933
·
verified ·
1 Parent(s): 5a392ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -84
app.py CHANGED
@@ -10,7 +10,7 @@ import spaces
10
  import gradio as gr
11
  from typing import Optional
12
  from huggingface_hub import hf_hub_download
13
- from ltx_pipelines.ti2vid_two_stages import TI2VidTwoStagesPipeline
14
  from ltx_core.tiling import TilingConfig
15
  from ltx_pipelines.constants import (
16
  DEFAULT_SEED,
@@ -18,22 +18,15 @@ from ltx_pipelines.constants import (
18
  DEFAULT_WIDTH,
19
  DEFAULT_NUM_FRAMES,
20
  DEFAULT_FRAME_RATE,
21
- DEFAULT_NUM_INFERENCE_STEPS,
22
- DEFAULT_CFG_GUIDANCE_SCALE,
23
- DEFAULT_LORA_STRENGTH,
24
  )
25
 
26
- # Custom negative prompt
27
- DEFAULT_NEGATIVE_PROMPT = "shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static"
28
-
29
  # Default prompt from docstring example
30
  DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot."
31
 
32
  # HuggingFace Hub defaults
33
  DEFAULT_REPO_ID = "LTX-Colab/LTX-Video-Preview"
34
  DEFAULT_GEMMA_REPO_ID = "google/gemma-3-12b-it-qat-q4_0-unquantized"
35
- DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev-rc1.safetensors"
36
- DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384-rc1.safetensors"
37
  DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0-rc1.safetensors"
38
 
39
  def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
@@ -55,73 +48,36 @@ def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optiona
55
 
56
  # Initialize pipeline at startup
57
  print("=" * 80)
58
- print("Loading LTX-2 2-stage pipeline...")
59
  print("=" * 80)
60
 
61
  checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
62
- distilled_lora_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_DISTILLED_LORA_FILENAME)
63
  spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
64
 
65
  print(f"Initializing pipeline with:")
66
  print(f" checkpoint_path={checkpoint_path}")
67
- print(f" distilled_lora_path={distilled_lora_path}")
68
  print(f" spatial_upsampler_path={spatial_upsampler_path}")
69
  print(f" gemma_root={DEFAULT_GEMMA_REPO_ID}")
70
 
71
- pipeline = TI2VidTwoStagesPipeline(
72
  checkpoint_path=checkpoint_path,
73
- distilled_lora_path=distilled_lora_path,
74
- distilled_lora_strength=DEFAULT_LORA_STRENGTH,
75
  spatial_upsampler_path=spatial_upsampler_path,
76
  gemma_root=DEFAULT_GEMMA_REPO_ID,
77
  loras=[],
78
  fp8transformer=False,
79
- local_files_only=False
80
  )
81
 
82
- # print("=" * 80)
83
- # print("Warming up pipeline (loading Gemma text encoder)...")
84
- # print("=" * 80)
85
-
86
- # # Do a dummy warmup to load all models including Gemma
87
- # import tempfile
88
- # import os
89
- # warmup_output = tempfile.mktemp(suffix=".mp4")
90
- # try:
91
- # pipeline(
92
- # prompt="warmup",
93
- # negative_prompt="",
94
- # output_path=warmup_output,
95
- # seed=42,
96
- # height=256,
97
- # width=256,
98
- # num_frames=9,
99
- # frame_rate=8,
100
- # num_inference_steps=1,
101
- # cfg_guidance_scale=1.0,
102
- # images=[],
103
- # tiling_config=TilingConfig.default(),
104
- # )
105
- # # Clean up warmup output
106
- # if os.path.exists(warmup_output):
107
- # os.remove(warmup_output)
108
- # except Exception as e:
109
- # print(f"Warmup completed with note: {e}")
110
-
111
- # print("=" * 80)
112
- # print("Pipeline fully loaded and ready!")
113
- # print("=" * 80)
114
 
115
  @spaces.GPU(duration=300)
116
  def generate_video(
117
  input_image,
118
  prompt: str,
119
  duration: float,
120
- negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
121
  seed: int = 42,
122
  randomize_seed: bool = True,
123
- num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
124
- cfg_guidance_scale: float = DEFAULT_CFG_GUIDANCE_SCALE,
125
  height: int = DEFAULT_HEIGHT,
126
  width: int = DEFAULT_WIDTH,
127
  progress=gr.Progress(track_tqdm=True)
@@ -158,15 +114,12 @@ def generate_video(
158
  # Run inference - progress automatically tracks tqdm from pipeline
159
  pipeline(
160
  prompt=prompt,
161
- negative_prompt=negative_prompt,
162
  output_path=str(output_path),
163
  seed=seed,
164
  height=height,
165
  width=width,
166
  num_frames=num_frames,
167
  frame_rate=frame_rate,
168
- num_inference_steps=num_inference_steps,
169
- cfg_guidance_scale=cfg_guidance_scale,
170
  images=images,
171
  tiling_config=TilingConfig.default(),
172
  )
@@ -181,8 +134,8 @@ def generate_video(
181
 
182
 
183
  # Create Gradio interface
184
- with gr.Blocks(title="LTX-2 Image-to-Video") as demo:
185
- gr.Markdown("# LTX-2 Image-to-Video Generation")
186
 
187
  with gr.Row():
188
  with gr.Column():
@@ -210,12 +163,6 @@ with gr.Blocks(title="LTX-2 Image-to-Video") as demo:
210
  generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
211
 
212
  with gr.Accordion("Advanced Settings", open=False):
213
- negative_prompt = gr.Textbox(
214
- label="Negative Prompt",
215
- value=DEFAULT_NEGATIVE_PROMPT,
216
- lines=2
217
- )
218
-
219
  seed = gr.Slider(
220
  label="Seed",
221
  minimum=0,
@@ -229,22 +176,6 @@ with gr.Blocks(title="LTX-2 Image-to-Video") as demo:
229
  value=True
230
  )
231
 
232
- num_inference_steps = gr.Slider(
233
- label="Inference Steps",
234
- minimum=1,
235
- maximum=100,
236
- value=DEFAULT_NUM_INFERENCE_STEPS,
237
- step=1
238
- )
239
-
240
- cfg_guidance_scale = gr.Slider(
241
- label="CFG Guidance Scale",
242
- minimum=1.0,
243
- maximum=10.0,
244
- value=DEFAULT_CFG_GUIDANCE_SCALE,
245
- step=0.1
246
- )
247
-
248
  with gr.Row():
249
  width = gr.Number(
250
  label="Width",
@@ -266,11 +197,8 @@ with gr.Blocks(title="LTX-2 Image-to-Video") as demo:
266
  input_image,
267
  prompt,
268
  duration,
269
- negative_prompt,
270
  seed,
271
  randomize_seed,
272
- num_inference_steps,
273
- cfg_guidance_scale,
274
  height,
275
  width,
276
  ],
@@ -290,10 +218,9 @@ with gr.Blocks(title="LTX-2 Image-to-Video") as demo:
290
  inputs=[input_image, prompt, duration],
291
  outputs = [output_video],
292
  label="Example",
293
- cache_examples=True,
294
- cache_mode="lazy",
295
  )
296
 
297
 
298
  if __name__ == "__main__":
299
- demo.launch(theme=gr.themes.Citrus())
 
10
  import gradio as gr
11
  from typing import Optional
12
  from huggingface_hub import hf_hub_download
13
+ from ltx_pipelines.distilled import DistilledPipeline
14
  from ltx_core.tiling import TilingConfig
15
  from ltx_pipelines.constants import (
16
  DEFAULT_SEED,
 
18
  DEFAULT_WIDTH,
19
  DEFAULT_NUM_FRAMES,
20
  DEFAULT_FRAME_RATE,
 
 
 
21
  )
22
 
 
 
 
23
  # Default prompt from docstring example
24
  DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot."
25
 
26
  # HuggingFace Hub defaults
27
  DEFAULT_REPO_ID = "LTX-Colab/LTX-Video-Preview"
28
  DEFAULT_GEMMA_REPO_ID = "google/gemma-3-12b-it-qat-q4_0-unquantized"
29
+ DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-distilled-rc1.safetensors"
 
30
  DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0-rc1.safetensors"
31
 
32
  def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
 
48
 
49
  # Initialize pipeline at startup
50
  print("=" * 80)
51
+ print("Loading LTX-2 Distilled pipeline...")
52
  print("=" * 80)
53
 
54
  checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
 
55
  spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
56
 
57
  print(f"Initializing pipeline with:")
58
  print(f" checkpoint_path={checkpoint_path}")
 
59
  print(f" spatial_upsampler_path={spatial_upsampler_path}")
60
  print(f" gemma_root={DEFAULT_GEMMA_REPO_ID}")
61
 
62
+ pipeline = DistilledPipeline(
63
  checkpoint_path=checkpoint_path,
 
 
64
  spatial_upsampler_path=spatial_upsampler_path,
65
  gemma_root=DEFAULT_GEMMA_REPO_ID,
66
  loras=[],
67
  fp8transformer=False,
 
68
  )
69
 
70
+ print("=" * 80)
71
+ print("Pipeline fully loaded and ready!")
72
+ print("=" * 80)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  @spaces.GPU(duration=300)
75
  def generate_video(
76
  input_image,
77
  prompt: str,
78
  duration: float,
 
79
  seed: int = 42,
80
  randomize_seed: bool = True,
 
 
81
  height: int = DEFAULT_HEIGHT,
82
  width: int = DEFAULT_WIDTH,
83
  progress=gr.Progress(track_tqdm=True)
 
114
  # Run inference - progress automatically tracks tqdm from pipeline
115
  pipeline(
116
  prompt=prompt,
 
117
  output_path=str(output_path),
118
  seed=seed,
119
  height=height,
120
  width=width,
121
  num_frames=num_frames,
122
  frame_rate=frame_rate,
 
 
123
  images=images,
124
  tiling_config=TilingConfig.default(),
125
  )
 
134
 
135
 
136
  # Create Gradio interface
137
+ with gr.Blocks(title="LTX-2 Distilled Image-to-Video") as demo:
138
+ gr.Markdown("# LTX-2 Distilled Image-to-Video Generation")
139
 
140
  with gr.Row():
141
  with gr.Column():
 
163
  generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
164
 
165
  with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
166
  seed = gr.Slider(
167
  label="Seed",
168
  minimum=0,
 
176
  value=True
177
  )
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  with gr.Row():
180
  width = gr.Number(
181
  label="Width",
 
197
  input_image,
198
  prompt,
199
  duration,
 
200
  seed,
201
  randomize_seed,
 
 
202
  height,
203
  width,
204
  ],
 
218
  inputs=[input_image, prompt, duration],
219
  outputs = [output_video],
220
  label="Example",
221
+ cache_examples=False,
 
222
  )
223
 
224
 
225
  if __name__ == "__main__":
226
+ demo.launch(theme=gr.themes.Citrus())