Spaces:
Runtime error
Runtime error
Optimization
Browse files
app.py
CHANGED
|
@@ -399,9 +399,10 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
|
|
| 399 |
rnd = torch.Generator("cpu").manual_seed(seed)
|
| 400 |
|
| 401 |
history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
|
|
|
|
| 402 |
history_pixels = None
|
| 403 |
|
| 404 |
-
history_latents = torch.cat([history_latents, start_latent
|
| 405 |
total_generated_latent_frames = 1
|
| 406 |
|
| 407 |
if enable_preview:
|
|
@@ -481,7 +482,7 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
|
|
| 481 |
transformer.initialize_teacache(enable_teacache=False)
|
| 482 |
|
| 483 |
clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2)
|
| 484 |
-
clean_latents = torch.cat([start_latent
|
| 485 |
|
| 486 |
generated_latents = sample_hunyuan(
|
| 487 |
transformer=transformer,
|
|
@@ -620,9 +621,10 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
|
|
| 620 |
rnd = torch.Generator("cpu").manual_seed(seed)
|
| 621 |
|
| 622 |
history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
|
|
|
|
| 623 |
history_pixels = None
|
| 624 |
|
| 625 |
-
history_latents = torch.cat([start_latent
|
| 626 |
total_generated_latent_frames = 1
|
| 627 |
|
| 628 |
if enable_preview:
|
|
@@ -702,7 +704,7 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
|
|
| 702 |
transformer.initialize_teacache(enable_teacache=False)
|
| 703 |
|
| 704 |
clean_latents_1x, clean_latents_2x, clean_latents_4x = history_latents[:, :, :sum([1, 2, 16]), :, :].split([1, 2, 16], dim=2)
|
| 705 |
-
clean_latents = torch.cat([clean_latents_1x, start_latent
|
| 706 |
|
| 707 |
generated_latents = sample_hunyuan(
|
| 708 |
transformer=transformer,
|
|
@@ -794,6 +796,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
|
|
| 794 |
|
| 795 |
# 20250506 pftq: Encode video
|
| 796 |
start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
|
|
|
| 797 |
|
| 798 |
# CLIP Vision
|
| 799 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
|
|
@@ -883,7 +886,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
|
|
| 883 |
if effective_clean_frames > 0 and split_idx < len(splits):
|
| 884 |
clean_latents_1x = splits[split_idx]
|
| 885 |
|
| 886 |
-
clean_latents = torch.cat([start_latent
|
| 887 |
|
| 888 |
# 20250507 pftq: Fix for <=1 sec videos.
|
| 889 |
max_frames = min(latent_window_size * 4 - 3, history_latents.shape[2] * 4)
|
|
@@ -1341,7 +1344,7 @@ with block:
|
|
| 1341 |
examples = [
|
| 1342 |
[
|
| 1343 |
"./img_examples/Example2.webp", # input_image
|
| 1344 |
-
|
| 1345 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
|
| 1346 |
"image", # generation_mode
|
| 1347 |
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
|
@@ -1376,12 +1379,12 @@ with block:
|
|
| 1376 |
0.0, # rs
|
| 1377 |
6, # gpu_memory_preservation
|
| 1378 |
False, # enable_preview
|
| 1379 |
-
|
| 1380 |
16 # mp4_crf
|
| 1381 |
],
|
| 1382 |
[
|
| 1383 |
"./img_examples/Example3.jpg", # input_image
|
| 1384 |
-
|
| 1385 |
"A boy is walking to the right, full view, full-length view, cartoon",
|
| 1386 |
"image", # generation_mode
|
| 1387 |
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
|
@@ -1396,7 +1399,7 @@ with block:
|
|
| 1396 |
0.0, # rs
|
| 1397 |
6, # gpu_memory_preservation
|
| 1398 |
False, # enable_preview
|
| 1399 |
-
|
| 1400 |
16 # mp4_crf
|
| 1401 |
],
|
| 1402 |
],
|
|
|
|
| 399 |
rnd = torch.Generator("cpu").manual_seed(seed)
|
| 400 |
|
| 401 |
history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
|
| 402 |
+
start_latent = start_latent.to(history_latents)
|
| 403 |
history_pixels = None
|
| 404 |
|
| 405 |
+
history_latents = torch.cat([history_latents, start_latent], dim=2)
|
| 406 |
total_generated_latent_frames = 1
|
| 407 |
|
| 408 |
if enable_preview:
|
|
|
|
| 482 |
transformer.initialize_teacache(enable_teacache=False)
|
| 483 |
|
| 484 |
clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2)
|
| 485 |
+
clean_latents = torch.cat([start_latent, clean_latents_1x], dim=2)
|
| 486 |
|
| 487 |
generated_latents = sample_hunyuan(
|
| 488 |
transformer=transformer,
|
|
|
|
| 621 |
rnd = torch.Generator("cpu").manual_seed(seed)
|
| 622 |
|
| 623 |
history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
|
| 624 |
+
start_latent = start_latent.to(history_latents)
|
| 625 |
history_pixels = None
|
| 626 |
|
| 627 |
+
history_latents = torch.cat([start_latent, history_latents], dim=2)
|
| 628 |
total_generated_latent_frames = 1
|
| 629 |
|
| 630 |
if enable_preview:
|
|
|
|
| 704 |
transformer.initialize_teacache(enable_teacache=False)
|
| 705 |
|
| 706 |
clean_latents_1x, clean_latents_2x, clean_latents_4x = history_latents[:, :, :sum([1, 2, 16]), :, :].split([1, 2, 16], dim=2)
|
| 707 |
+
clean_latents = torch.cat([clean_latents_1x, start_latent], dim=2)
|
| 708 |
|
| 709 |
generated_latents = sample_hunyuan(
|
| 710 |
transformer=transformer,
|
|
|
|
| 796 |
|
| 797 |
# 20250506 pftq: Encode video
|
| 798 |
start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
|
| 799 |
+
start_latent = start_latent.to(dtype=torch.float32).cpu()
|
| 800 |
|
| 801 |
# CLIP Vision
|
| 802 |
stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
|
|
|
|
| 886 |
if effective_clean_frames > 0 and split_idx < len(splits):
|
| 887 |
clean_latents_1x = splits[split_idx]
|
| 888 |
|
| 889 |
+
clean_latents = torch.cat([start_latent, clean_latents_1x], dim=2)
|
| 890 |
|
| 891 |
# 20250507 pftq: Fix for <=1 sec videos.
|
| 892 |
max_frames = min(latent_window_size * 4 - 3, history_latents.shape[2] * 4)
|
|
|
|
| 1344 |
examples = [
|
| 1345 |
[
|
| 1346 |
"./img_examples/Example2.webp", # input_image
|
| 1347 |
+
0, # image_position
|
| 1348 |
"A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
|
| 1349 |
"image", # generation_mode
|
| 1350 |
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
|
|
|
| 1379 |
0.0, # rs
|
| 1380 |
6, # gpu_memory_preservation
|
| 1381 |
False, # enable_preview
|
| 1382 |
+
True, # use_teacache
|
| 1383 |
16 # mp4_crf
|
| 1384 |
],
|
| 1385 |
[
|
| 1386 |
"./img_examples/Example3.jpg", # input_image
|
| 1387 |
+
100, # image_position
|
| 1388 |
"A boy is walking to the right, full view, full-length view, cartoon",
|
| 1389 |
"image", # generation_mode
|
| 1390 |
"Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
|
|
|
|
| 1399 |
0.0, # rs
|
| 1400 |
6, # gpu_memory_preservation
|
| 1401 |
False, # enable_preview
|
| 1402 |
+
False, # use_teacache
|
| 1403 |
16 # mp4_crf
|
| 1404 |
],
|
| 1405 |
],
|