Spaces:

LPX55
/

HunYuan-Keyframe2VID-Control-Lora

Runtime error

App Files Files Community

LPX55 commited on Mar 24, 2025

Commit

c8d124f

verified ·

1 Parent(s): 51d25bc

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -102,31 +102,27 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
 @spaces.GPU(duration=120)
 def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
-    # Debugging print statements
-    print(f"Frame 1 Type: {type(frame1)}")
-    print(f"Frame 2 Type: {type(frame2)}")
-    print(f"Resolution: {resolution}")
-    # Parse resolution
     width, height = map(int, resolution.split('x'))
-    # Load and preprocess frames
-    cond_frame1 = np.array(frame1)
-    cond_frame2 = np.array(frame2)
-    cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
-    cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
-    cond_video = np.zeros(shape=(num_frames, height, width, 3))
-    cond_video[0], cond_video[-1] = cond_frame1, cond_frame2
-    cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
-    cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
     with torch.no_grad():
-        image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
-        image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous()  # [B, F, C, H, W] -> [B, C, F, H, W]
         cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
         cond_latents = cond_latents * pipe.vae.config.scaling_factor
         cond_latents = cond_latents.to(dtype=pipe.dtype)
         assert not torch.any(torch.isnan(cond_latents))
-    # Generate video
     video = call_pipe(
         pipe,
         prompt=prompt,
@@ -138,10 +134,13 @@ def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolu
         guidance_scale=guidance_scale,
         generator=torch.Generator(device="cuda").manual_seed(0),
     ).frames[0]
-    # Export to video
     video_path = "output.mp4"
-    # video_bytes = io.BytesIO()
     export_to_video(video, video_path, fps=24)
     torch.cuda.empty_cache()
     return video_path

 @spaces.GPU(duration=120)
 def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     width, height = map(int, resolution.split('x'))
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Resize((height, width), antialias=True),
+        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+    ])
+    cond_frame1 = transform(frame1).cuda() # Move to GPU immediately
+    cond_frame2 = transform(frame2).cuda()
+    cond_video = torch.zeros(num_frames, 3, height, width, device='cuda', dtype=pipe.dtype)
+    cond_video[0] = cond_frame1
+    cond_video[-1] = cond_frame2
     with torch.no_grad():
+        image_or_video = cond_video.unsqueeze(0)
         cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
         cond_latents = cond_latents * pipe.vae.config.scaling_factor
         cond_latents = cond_latents.to(dtype=pipe.dtype)
         assert not torch.any(torch.isnan(cond_latents))
     video = call_pipe(
         pipe,
         prompt=prompt,
         guidance_scale=guidance_scale,
         generator=torch.Generator(device="cuda").manual_seed(0),
     ).frames[0]
     video_path = "output.mp4"
     export_to_video(video, video_path, fps=24)
+    del cond_video # Manual deletion
+    del cond_frame1 # Manual deletion
+    del cond_frame2 # Manual deletion
+    del image_or_video # Manual deletion
     torch.cuda.empty_cache()
     return video_path