alexnasa commited on
Commit
977e52e
·
verified ·
1 Parent(s): 7357e70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -25
app.py CHANGED
@@ -31,6 +31,7 @@ from ltx_pipelines.utils.constants import (
31
  DEFAULT_LORA_STRENGTH,
32
  )
33
  from ltx_core.loader.single_gpu_model_builder import enable_only_lora
 
34
 
35
  MAX_SEED = np.iinfo(np.int32).max
36
  # Import from public LTX-2 package
@@ -235,7 +236,7 @@ loras = [
235
  # --- fused / base behavior ---
236
  LoraPathStrengthAndSDOps(
237
  path=distilled_lora_path,
238
- strength=0.6,
239
  sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
240
  ),
241
  LoraPathStrengthAndSDOps(static_lora_path, DEFAULT_LORA_STRENGTH, LTXV_LORA_COMFY_RENAMING_MAP),
@@ -570,18 +571,20 @@ class CameraDropdown(gr.HTML):
570
  )
571
 
572
 
573
- def generate_video_example(input_image, prompt, camera_lora, progress=gr.Progress(track_tqdm=True)):
 
 
574
 
575
  output_video, seed = generate_video(
576
  input_image,
577
  prompt,
578
- 5, # duration seconds
579
  True, # enhance_prompt
580
  42, # seed
581
  True, # randomize_seed
582
- DEFAULT_1_STAGE_HEIGHT, # height
583
- DEFAULT_1_STAGE_WIDTH, # width
584
- camera_lora,
585
  progress
586
  )
587
 
@@ -714,7 +717,14 @@ def generate_video(
714
 
715
 
716
  def apply_resolution(resolution: str):
717
- w, h = resolution.split("x")
 
 
 
 
 
 
 
718
  return int(w), int(h)
719
 
720
  def apply_duration(duration: str):
@@ -739,15 +749,6 @@ css = """
739
  min-width: 0 !important;
740
  }
741
 
742
- #controls-row #camera_lora_ui {
743
- margin-left: auto !important;
744
- }
745
-
746
- /* Gradio HTML components often have an inner wrapper div that is width:100% */
747
- #camera_lora_ui,
748
- #camera_lora_ui > div {
749
- width: fit-content !important;
750
- }
751
 
752
  /* Same idea for your radio HTML blocks (optional but helps) */
753
  #radioanimated_duration,
@@ -1138,24 +1139,26 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
1138
 
1139
  with gr.Row(elem_id="controls-row"):
1140
 
1141
- radioanimated_duration = RadioAnimated(
1142
  choices=["3s", "5s", "10s", "15s"],
1143
- value="3s",
 
1144
  elem_id="radioanimated_duration"
1145
  )
1146
-
1147
  duration = gr.Slider(
1148
  label="Duration (seconds)",
1149
  minimum=1.0,
1150
  maximum=15.0,
1151
- value=3.0,
1152
  step=0.1,
1153
  visible=False
1154
  )
1155
 
1156
- radioanimated_resolution = RadioAnimated(
1157
- choices=["768x512", "512x512", "512x768"],
1158
- value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
 
1159
  elem_id="radioanimated_resolution"
1160
  )
1161
 
@@ -1179,6 +1182,28 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
1179
 
1180
  generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
1181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1182
  camera_lora_ui.change(
1183
  fn=lambda x: x,
1184
  inputs=camera_lora_ui,
@@ -1229,26 +1254,45 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
1229
  "supergirl.png",
1230
  "A fuzzy puppet superhero character resembling a female puppet with blonde hair and a blue superhero suit stands inside an icy cave made of frozen walls and icicles, she looks panicked and frantic, rapidly turning her head left and right and scanning the cave while waving her arms and shouting angrily and desperately, mouthing the words “where the hell is my dog,” her movements exaggerated and puppet-like with high energy and urgency, suddenly a second puppet dog bursts into frame from the side, jumping up excitedly and tackling her affectionately while licking her face repeatedly, she freezes in surprise and then breaks into relief and laughter as the dog continues licking her, the scene feels chaotic, comedic, and emotional with expressive puppet reactions, cinematic lighting, smooth camera motion, shallow depth of field, and high-quality puppet-style animation",
1231
  "No LoRA",
 
1232
  ],
1233
  [
1234
  "highland.png",
1235
  "Realistic POV selfie-style video in a snowy, foggy field. Two shaggy Highland cows with long curved horns stand ahead. The camera is handheld and slightly shaky. The woman filming talks nervously and excitedly in a vlog tone: \"Oh my god guys… look how big those horns are… I’m kinda scared.\" The cow on the left walks toward the camera in a cute, bouncy, hopping way, curious and gentle. Snow crunches under its hooves, breath visible in the cold air. The horns look massive from the POV. As the cow gets very close, its wet nose with slight dripping fills part of the frame. She laughs nervously but reaches out and pets the cow. The cow makes deep, soft, interesting mooing and snorting sounds, calm and friendly. Ultra-realistic, natural lighting, immersive audio, documentary-style realism.",
1236
  "No LoRA",
 
1237
  ],
1238
  [
1239
  "wednesday.png",
1240
  "A cinematic close-up of Wednesday Addams frozen mid-dance on a dark, blue-lit ballroom floor as students move indistinctly behind her, their footsteps and muffled music reduced to a distant, underwater thrum; the audio foregrounds her steady breathing and the faint rustle of fabric as she slowly raises one arm, never breaking eye contact with the camera, then after a deliberately long silence she speaks in a flat, dry, perfectly controlled voice, “I don’t dance… I vibe code,” each word crisp and unemotional, followed by an abrupt cutoff of her voice as the background sound swells slightly, reinforcing the deadpan humor, with precise lip sync, minimal facial movement, stark gothic lighting, and cinematic realism.",
1241
  "Dolly Out",
 
1242
  ],
1243
  [
1244
  "astronaut.png",
1245
  "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot.",
1246
  "Static",
1247
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1248
 
1249
  ],
1250
  fn=generate_video_example,
1251
- inputs=[input_image, prompt_ui, camera_lora_ui],
1252
  outputs = [output_video],
1253
  label="Example",
1254
  cache_examples=True,
 
31
  DEFAULT_LORA_STRENGTH,
32
  )
33
  from ltx_core.loader.single_gpu_model_builder import enable_only_lora
34
+ from PIL import Image
35
 
36
  MAX_SEED = np.iinfo(np.int32).max
37
  # Import from public LTX-2 package
 
236
  # --- fused / base behavior ---
237
  LoraPathStrengthAndSDOps(
238
  path=distilled_lora_path,
239
+ strength=DEFAULT_LORA_STRENGTH,
240
  sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
241
  ),
242
  LoraPathStrengthAndSDOps(static_lora_path, DEFAULT_LORA_STRENGTH, LTXV_LORA_COMFY_RENAMING_MAP),
 
571
  )
572
 
573
 
574
+ def generate_video_example(input_image, prompt, camera_lora, resolution, progress=gr.Progress(track_tqdm=True)):
575
+
576
+ w, h = apply_resolution(resolution)
577
 
578
  output_video, seed = generate_video(
579
  input_image,
580
  prompt,
581
+ 10, # duration seconds
582
  True, # enhance_prompt
583
  42, # seed
584
  True, # randomize_seed
585
+ h, # height
586
+ w, # width
587
+ camera_lora,
588
  progress
589
  )
590
 
 
717
 
718
 
719
  def apply_resolution(resolution: str):
720
+
721
+ if resolution == "16:9":
722
+ w, h = 768, 512
723
+ elif resolution == "1:1":
724
+ w, h = 512, 512
725
+ elif resolution == "9:16":
726
+ w, h = 512, 768
727
+
728
  return int(w), int(h)
729
 
730
  def apply_duration(duration: str):
 
749
  min-width: 0 !important;
750
  }
751
 
 
 
 
 
 
 
 
 
 
752
 
753
  /* Same idea for your radio HTML blocks (optional but helps) */
754
  #radioanimated_duration,
 
1139
 
1140
  with gr.Row(elem_id="controls-row"):
1141
 
1142
+ radioanimated_duration = CameraDropdown(
1143
  choices=["3s", "5s", "10s", "15s"],
1144
+ value="5s",
1145
+ title="Clip Duration",
1146
  elem_id="radioanimated_duration"
1147
  )
1148
+
1149
  duration = gr.Slider(
1150
  label="Duration (seconds)",
1151
  minimum=1.0,
1152
  maximum=15.0,
1153
+ value=5.0,
1154
  step=0.1,
1155
  visible=False
1156
  )
1157
 
1158
+ radioanimated_resolution = CameraDropdown(
1159
+ choices=["16:9", "1:1", "9:16"],
1160
+ value="16:9",
1161
+ title="Resolution",
1162
  elem_id="radioanimated_resolution"
1163
  )
1164
 
 
1182
 
1183
  generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
1184
 
1185
+ # with gr.Sidebar(width=280):
1186
+ # gr.Examples(
1187
+ # examples=[
1188
+ # [
1189
+ # "The video opens on a cake. A knife, held by a hand, is coming into frame and hovering over the cake. The knife then begins cutting into the cake to c4k3 cakeify it. As the knife slices the cake open, the inside of the cake is revealed to be cake with chocolate layers. The knife cuts through and the contents of the cake are revealed. Greek music playing in the background.",
1190
+ # "Cakify",
1191
+ # ],
1192
+
1193
+ # [
1194
+ # "The video showcases an item. The camera zooms out. Then infl4t3 inflates it, the item expanding into giant, inflated balloon against the landscape.",
1195
+ # "Inflate",
1196
+ # ],
1197
+
1198
+ # [
1199
+ # "The video begins with an item. A hydraulic press positioned above slowly descends towards the item. Upon contact, the hydraulic press c5us4 crushes it, deforming and flattening the item, causing the item to collapse inward until the item is no longer recognizable.",
1200
+ # "Hydraulic",
1201
+ # ],
1202
+ # ],
1203
+ # inputs=[prompt_ui, camera_lora_ui],
1204
+ # label="Example",
1205
+ # cache_examples=False,
1206
+ # )
1207
  camera_lora_ui.change(
1208
  fn=lambda x: x,
1209
  inputs=camera_lora_ui,
 
1254
  "supergirl.png",
1255
  "A fuzzy puppet superhero character resembling a female puppet with blonde hair and a blue superhero suit stands inside an icy cave made of frozen walls and icicles, she looks panicked and frantic, rapidly turning her head left and right and scanning the cave while waving her arms and shouting angrily and desperately, mouthing the words “where the hell is my dog,” her movements exaggerated and puppet-like with high energy and urgency, suddenly a second puppet dog bursts into frame from the side, jumping up excitedly and tackling her affectionately while licking her face repeatedly, she freezes in surprise and then breaks into relief and laughter as the dog continues licking her, the scene feels chaotic, comedic, and emotional with expressive puppet reactions, cinematic lighting, smooth camera motion, shallow depth of field, and high-quality puppet-style animation",
1256
  "No LoRA",
1257
+ "16:9",
1258
  ],
1259
  [
1260
  "highland.png",
1261
  "Realistic POV selfie-style video in a snowy, foggy field. Two shaggy Highland cows with long curved horns stand ahead. The camera is handheld and slightly shaky. The woman filming talks nervously and excitedly in a vlog tone: \"Oh my god guys… look how big those horns are… I’m kinda scared.\" The cow on the left walks toward the camera in a cute, bouncy, hopping way, curious and gentle. Snow crunches under its hooves, breath visible in the cold air. The horns look massive from the POV. As the cow gets very close, its wet nose with slight dripping fills part of the frame. She laughs nervously but reaches out and pets the cow. The cow makes deep, soft, interesting mooing and snorting sounds, calm and friendly. Ultra-realistic, natural lighting, immersive audio, documentary-style realism.",
1262
  "No LoRA",
1263
+ "16:9",
1264
  ],
1265
  [
1266
  "wednesday.png",
1267
  "A cinematic close-up of Wednesday Addams frozen mid-dance on a dark, blue-lit ballroom floor as students move indistinctly behind her, their footsteps and muffled music reduced to a distant, underwater thrum; the audio foregrounds her steady breathing and the faint rustle of fabric as she slowly raises one arm, never breaking eye contact with the camera, then after a deliberately long silence she speaks in a flat, dry, perfectly controlled voice, “I don’t dance… I vibe code,” each word crisp and unemotional, followed by an abrupt cutoff of her voice as the background sound swells slightly, reinforcing the deadpan humor, with precise lip sync, minimal facial movement, stark gothic lighting, and cinematic realism.",
1268
  "Dolly Out",
1269
+ "16:9",
1270
  ],
1271
  [
1272
  "astronaut.png",
1273
  "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot.",
1274
  "Static",
1275
+ "1:1",
1276
+ ],
1277
+ # [
1278
+ # "astronaut.png",
1279
+ # "The video opens on object. A knife, held by a hand, is coming into frame and hovering over the object. The knife then begins cutting into the object to c4k3 cakeify them. As the knife slices the object open, the insides of the object are revealed to be cakes with chocolate layers. The knife cuts through and the contents of object are revealed.",
1280
+ # "Cakify",
1281
+ # ],
1282
+ # [
1283
+ # "astronaut.png",
1284
+ # "The video showcases an item. The camera zooms out. Then infl4t3 inflates it, the item expanding into giant, inflated balloon against the landscape.",
1285
+ # "Inflate",
1286
+ # ],
1287
+ # [
1288
+ # "astronaut.png",
1289
+ # "The video begins with an item. A hydraulic press positioned above slowly descends towards the item. Upon contact, the hydraulic press c5us4 crushes it, deforming and flattening the item, causing the item to collapse inward until the item is no longer recognizable.",
1290
+ # "Hydraulic",
1291
+ # ],
1292
 
1293
  ],
1294
  fn=generate_video_example,
1295
+ inputs=[input_image, prompt_ui, camera_lora_ui, radioanimated_resolution],
1296
  outputs = [output_video],
1297
  label="Example",
1298
  cache_examples=True,