alexnasa commited on
Commit
9087721
·
verified ·
1 Parent(s): 6ed7319

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -88
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import sys
2
  from pathlib import Path
3
- import uuid
4
 
5
  # Add packages to Python path
6
  current_dir = Path(__file__).parent
@@ -97,82 +96,41 @@ def encode_text_simple(text_encoder, prompt: str):
97
  def encode_prompt(
98
  prompt: str,
99
  enhance_prompt: bool = True,
100
- input_image = None,
101
  seed: int = 42,
102
  negative_prompt: str = ""
103
  ):
104
- """
105
- Encode a text prompt using Gemma text encoder.
106
- Args:
107
- prompt: Text prompt to encode
108
- enhance_prompt: Whether to use AI to enhance the prompt
109
- input_image: Optional image for image-to-video enhancement
110
- seed: Random seed for prompt enhancement
111
- negative_prompt: Optional negative prompt for CFG (two-stage pipeline)
112
- Returns:
113
- tuple: (file_path, enhanced_prompt_text, status_message)
114
- """
115
  start_time = time.time()
116
-
117
  try:
118
- # Enhance prompt if requested
119
  final_prompt = prompt
120
  if enhance_prompt:
121
- if input_image is not None:
122
- # Save image temporarily
123
- temp_dir = Path("temp_images")
124
- temp_dir.mkdir(exist_ok=True)
125
- temp_image_path = temp_dir / f"temp_{int(time.time())}.jpg"
126
- if hasattr(input_image, 'save'):
127
- input_image.save(temp_image_path)
128
- else:
129
- temp_image_path = input_image
130
-
131
- final_prompt = generate_enhanced_prompt(
132
- text_encoder=text_encoder,
133
- prompt=prompt,
134
- image_path=str(temp_image_path),
135
- seed=seed
136
- )
137
- else:
138
- final_prompt = generate_enhanced_prompt(
139
- text_encoder=text_encoder,
140
- prompt=prompt,
141
- image_path=None,
142
- seed=seed
143
- )
144
 
145
- # Encode the positive prompt using the pre-loaded text encoder
146
- video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
147
 
148
- # Encode negative prompt if provided
149
  video_context_negative = None
150
  audio_context_negative = None
151
  if negative_prompt:
152
  video_context_negative, audio_context_negative = encode_text_simple(text_encoder, negative_prompt)
153
 
154
- run_id = uuid.uuid4().hex
155
- output_dir = Path("embeddings")
156
- output_dir.mkdir(exist_ok=True)
157
- output_path = output_dir / f"embedding_{run_id}.pt"
158
-
159
- # Save embeddings (with negative contexts if provided)
160
  embedding_data = {
161
- 'video_context': video_context.cpu(),
162
- 'audio_context': audio_context.cpu(),
163
- 'prompt': final_prompt,
164
- 'original_prompt': prompt if enhance_prompt else final_prompt,
165
  }
166
-
167
- # Add negative contexts if they were encoded
168
  if video_context_negative is not None:
169
- embedding_data['video_context_negative'] = video_context_negative.cpu()
170
- embedding_data['audio_context_negative'] = audio_context_negative.cpu()
171
- embedding_data['negative_prompt'] = negative_prompt
172
-
173
- torch.save(embedding_data, output_path)
174
 
175
- # Get memory stats
176
  elapsed_time = time.time() - start_time
177
  if torch.cuda.is_available():
178
  allocated = torch.cuda.memory_allocated() / 1024**3
@@ -181,7 +139,7 @@ def encode_prompt(
181
  else:
182
  status = f"✓ Encoded in {elapsed_time:.2f}s (CPU mode)"
183
 
184
- return str(output_path), final_prompt, status
185
 
186
  except Exception as e:
187
  import traceback
@@ -189,6 +147,7 @@ def encode_prompt(
189
  print(error_msg)
190
  return None, prompt, error_msg
191
 
 
192
  # Default prompt from docstring example
193
  DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot."
194
 
@@ -406,49 +365,39 @@ def generate_video(
406
  frame_rate = 24.0
407
  num_frames = int(duration * frame_rate) + 1 # +1 to ensure we meet the duration
408
 
409
- run_id = uuid.uuid4().hex
410
-
411
  output_dir = Path("outputs")
412
  output_dir.mkdir(exist_ok=True)
413
-
414
- output_path = output_dir / f"video_{run_id}.mp4"
415
- temp_image_path = output_dir / f"temp_input_{run_id}.jpg"
416
-
417
 
418
  # Handle image input
419
  images = []
420
-
 
 
421
  if input_image is not None:
422
-
423
- if hasattr(input_image, 'save'):
424
- input_image.save(temp_image_path)
425
- else:
426
- # If it's a file path already
427
- temp_image_path = Path(input_image)
428
- # Format: (image_path, frame_idx, strength)
429
- images = [(str(temp_image_path), 0, 1.0)]
430
 
431
-
432
  # Prepare image for upload if it exists
433
  image_input = None
434
 
435
 
436
- result = encode_prompt(
437
  prompt=prompt,
438
  enhance_prompt=enhance_prompt,
439
  input_image=input_image,
440
  seed=current_seed,
441
  negative_prompt="",
442
  )
443
- embedding_path = result[0] # Path to .pt file
444
- print(f"Embeddings received from: {embedding_path}")
445
-
446
- # Load embeddings
447
- embeddings = torch.load(embedding_path)
448
- video_context = embeddings['video_context']
449
- audio_context = embeddings['audio_context']
450
  print("✓ Embeddings loaded successfully")
451
 
 
 
 
 
452
  # Run inference - progress automatically tracks tqdm from pipeline
453
  pipeline(
454
  prompt=prompt,
@@ -463,6 +412,7 @@ def generate_video(
463
  video_context=video_context,
464
  audio_context=audio_context,
465
  )
 
466
  torch.cuda.empty_cache()
467
  print("successful generation")
468
 
@@ -472,7 +422,7 @@ def generate_video(
472
  import traceback
473
  error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
474
  print(error_msg)
475
- return None
476
 
477
 
478
  def apply_resolution(resolution: str):
@@ -649,9 +599,10 @@ with gr.Blocks(title="LTX-2 Video Distilled 🎥🔈") as demo:
649
 
650
  input_image = gr.Image(
651
  label="Input Image (Optional)",
652
- type="pil",
653
- height=512)
654
-
 
655
  prompt = gr.Textbox(
656
  label="Prompt",
657
  value="Make this image come alive with cinematic motion, smooth animation",
 
1
  import sys
2
  from pathlib import Path
 
3
 
4
  # Add packages to Python path
5
  current_dir = Path(__file__).parent
 
96
  def encode_prompt(
97
  prompt: str,
98
  enhance_prompt: bool = True,
99
+ input_image=None, # this is now filepath (string) or None
100
  seed: int = 42,
101
  negative_prompt: str = ""
102
  ):
 
 
 
 
 
 
 
 
 
 
 
103
  start_time = time.time()
 
104
  try:
 
105
  final_prompt = prompt
106
  if enhance_prompt:
107
+ final_prompt = generate_enhanced_prompt(
108
+ text_encoder=text_encoder,
109
+ prompt=prompt,
110
+ image_path=input_image if input_image is not None else None,
111
+ seed=seed,
112
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ with torch.inference_mode():
115
+ video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
116
 
 
117
  video_context_negative = None
118
  audio_context_negative = None
119
  if negative_prompt:
120
  video_context_negative, audio_context_negative = encode_text_simple(text_encoder, negative_prompt)
121
 
122
+ # IMPORTANT: return tensors directly (no torch.save)
 
 
 
 
 
123
  embedding_data = {
124
+ "video_context": video_context.detach().cpu(),
125
+ "audio_context": audio_context.detach().cpu(),
126
+ "prompt": final_prompt,
127
+ "original_prompt": prompt,
128
  }
 
 
129
  if video_context_negative is not None:
130
+ embedding_data["video_context_negative"] = video_context_negative
131
+ embedding_data["audio_context_negative"] = audio_context_negative
132
+ embedding_data["negative_prompt"] = negative_prompt
 
 
133
 
 
134
  elapsed_time = time.time() - start_time
135
  if torch.cuda.is_available():
136
  allocated = torch.cuda.memory_allocated() / 1024**3
 
139
  else:
140
  status = f"✓ Encoded in {elapsed_time:.2f}s (CPU mode)"
141
 
142
+ return embedding_data, final_prompt, status
143
 
144
  except Exception as e:
145
  import traceback
 
147
  print(error_msg)
148
  return None, prompt, error_msg
149
 
150
+
151
  # Default prompt from docstring example
152
  DEFAULT_PROMPT = "An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot."
153
 
 
365
  frame_rate = 24.0
366
  num_frames = int(duration * frame_rate) + 1 # +1 to ensure we meet the duration
367
 
368
+ # Create output directory if it doesn't exist
 
369
  output_dir = Path("outputs")
370
  output_dir.mkdir(exist_ok=True)
371
+ output_path = output_dir / f"video_{current_seed}.mp4"
 
 
 
372
 
373
  # Handle image input
374
  images = []
375
+ temp_image_path = None # Initialize to None
376
+
377
+ images = []
378
  if input_image is not None:
379
+ images = [(input_image, 0, 1.0)] # input_image is already a path
 
 
 
 
 
 
 
380
 
 
381
  # Prepare image for upload if it exists
382
  image_input = None
383
 
384
 
385
+ embeddings, final_prompt, status = encode_prompt(
386
  prompt=prompt,
387
  enhance_prompt=enhance_prompt,
388
  input_image=input_image,
389
  seed=current_seed,
390
  negative_prompt="",
391
  )
392
+
393
+ video_context = embeddings["video_context"].to("cuda", non_blocking=True)
394
+ audio_context = embeddings["audio_context"].to("cuda", non_blocking=True)
 
 
 
 
395
  print("✓ Embeddings loaded successfully")
396
 
397
+ # free prompt enhancer / encoder temps ASAP
398
+ del embeddings, final_prompt, status
399
+ torch.cuda.empty_cache()
400
+
401
  # Run inference - progress automatically tracks tqdm from pipeline
402
  pipeline(
403
  prompt=prompt,
 
412
  video_context=video_context,
413
  audio_context=audio_context,
414
  )
415
+ del video_context, audio_context
416
  torch.cuda.empty_cache()
417
  print("successful generation")
418
 
 
422
  import traceback
423
  error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
424
  print(error_msg)
425
+ return None, current_seed
426
 
427
 
428
  def apply_resolution(resolution: str):
 
599
 
600
  input_image = gr.Image(
601
  label="Input Image (Optional)",
602
+ type="filepath", # <-- was "pil"
603
+ height=512
604
+ )
605
+
606
  prompt = gr.Textbox(
607
  label="Prompt",
608
  value="Make this image come alive with cinematic motion, smooth animation",