SUPIR

Runtime error

App Files Files Community

Fabrice-TIERCELIN commited on Jun 19, 2025

Commit

cbfbd85

verified ·

1 Parent(s): 55436b0

Optimization

Browse files

Files changed (1) hide show

app.py +12 -9

app.py CHANGED Viewed

@@ -399,9 +399,10 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
         rnd = torch.Generator("cpu").manual_seed(seed)
         history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
         history_pixels = None
-        history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
         total_generated_latent_frames = 1
         if enable_preview:
@@ -481,7 +482,7 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
                 transformer.initialize_teacache(enable_teacache=False)
             clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2)
-            clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)
             generated_latents = sample_hunyuan(
                 transformer=transformer,
@@ -620,9 +621,10 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
         rnd = torch.Generator("cpu").manual_seed(seed)
         history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
         history_pixels = None
-        history_latents = torch.cat([start_latent.to(history_latents), history_latents], dim=2)
         total_generated_latent_frames = 1
         if enable_preview:
@@ -702,7 +704,7 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
                 transformer.initialize_teacache(enable_teacache=False)
             clean_latents_1x, clean_latents_2x, clean_latents_4x = history_latents[:, :, :sum([1, 2, 16]), :, :].split([1, 2, 16], dim=2)
-            clean_latents = torch.cat([clean_latents_1x, start_latent.to(history_latents)], dim=2)
             generated_latents = sample_hunyuan(
                 transformer=transformer,
@@ -794,6 +796,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
         # 20250506 pftq: Encode video
         start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels  = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
         # CLIP Vision
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
@@ -883,7 +886,7 @@ def worker_video(input_video, prompts, n_prompt, seed, batch, resolution, total_
                     if effective_clean_frames > 0 and split_idx < len(splits):
                         clean_latents_1x = splits[split_idx]
-            clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)
             # 20250507 pftq: Fix for <=1 sec videos.
             max_frames = min(latent_window_size * 4 - 3, history_latents.shape[2] * 4)
@@ -1341,7 +1344,7 @@ with block:
             examples = [
                     [
                         "./img_examples/Example2.webp", # input_image
-                        100, # image_position
                         "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
                         "image", # generation_mode
                         "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
@@ -1376,12 +1379,12 @@ with block:
                         0.0, # rs
                         6, # gpu_memory_preservation
                         False, # enable_preview
-                        False, # use_teacache
                         16 # mp4_crf
                     ],
                     [
                         "./img_examples/Example3.jpg", # input_image
-                        0, # image_position
                         "A boy is walking to the right, full view, full-length view, cartoon",
                         "image", # generation_mode
                         "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
@@ -1396,7 +1399,7 @@ with block:
                         0.0, # rs
                         6, # gpu_memory_preservation
                         False, # enable_preview
-                        True, # use_teacache
                         16 # mp4_crf
                     ],
                 ],

         rnd = torch.Generator("cpu").manual_seed(seed)
         history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
+        start_latent = start_latent.to(history_latents)
         history_pixels = None
+        history_latents = torch.cat([history_latents, start_latent], dim=2)
         total_generated_latent_frames = 1
         if enable_preview:
                 transformer.initialize_teacache(enable_teacache=False)
             clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2)
+            clean_latents = torch.cat([start_latent, clean_latents_1x], dim=2)
             generated_latents = sample_hunyuan(
                 transformer=transformer,
         rnd = torch.Generator("cpu").manual_seed(seed)
         history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
+        start_latent = start_latent.to(history_latents)
         history_pixels = None
+        history_latents = torch.cat([start_latent, history_latents], dim=2)
         total_generated_latent_frames = 1
         if enable_preview:
                 transformer.initialize_teacache(enable_teacache=False)
             clean_latents_1x, clean_latents_2x, clean_latents_4x = history_latents[:, :, :sum([1, 2, 16]), :, :].split([1, 2, 16], dim=2)
+            clean_latents = torch.cat([clean_latents_1x, start_latent], dim=2)
             generated_latents = sample_hunyuan(
                 transformer=transformer,
         # 20250506 pftq: Encode video
         start_latent, input_image_np, video_latents, fps, height, width, input_video_pixels  = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
+        start_latent = start_latent.to(dtype=torch.float32).cpu()
         # CLIP Vision
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))
                     if effective_clean_frames > 0 and split_idx < len(splits):
                         clean_latents_1x = splits[split_idx]
+            clean_latents = torch.cat([start_latent, clean_latents_1x], dim=2)
             # 20250507 pftq: Fix for <=1 sec videos.
             max_frames = min(latent_window_size * 4 - 3, history_latents.shape[2] * 4)
             examples = [
                     [
                         "./img_examples/Example2.webp", # input_image
+                        0, # image_position
                         "A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The man talks and the woman listens; A man on the left and a woman on the right face each other ready to start a conversation, large space between the persons, full view, full-length view, 3D, pixar, 3D render, CGI. The woman talks and the man listens",
                         "image", # generation_mode
                         "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                         0.0, # rs
                         6, # gpu_memory_preservation
                         False, # enable_preview
+                        True, # use_teacache
                         16 # mp4_crf
                     ],
                     [
                         "./img_examples/Example3.jpg", # input_image
+                        100, # image_position
                         "A boy is walking to the right, full view, full-length view, cartoon",
                         "image", # generation_mode
                         "Missing arm, unrealistic position, impossible contortion, visible bone, muscle contraction, blurred, blurry", # n_prompt
                         0.0, # rs
                         6, # gpu_memory_preservation
                         False, # enable_preview
+                        False, # use_teacache
                         16 # mp4_crf
                     ],
                 ],