Spaces:

LPX55
/

HunYuan-Keyframe2VID-Control-Lora

Runtime error

App Files Files Community

LPX55 commited on Mar 25, 2025

Commit

6747584

verified ·

1 Parent(s): c8d124f

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -20

app.py CHANGED Viewed

@@ -102,27 +102,31 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
 @spaces.GPU(duration=120)
 def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     width, height = map(int, resolution.split('x'))
-    transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Resize((height, width), antialias=True),
-        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
-    ])
-    cond_frame1 = transform(frame1).cuda() # Move to GPU immediately
-    cond_frame2 = transform(frame2).cuda()
-    cond_video = torch.zeros(num_frames, 3, height, width, device='cuda', dtype=pipe.dtype)
-    cond_video[0] = cond_frame1
-    cond_video[-1] = cond_frame2
     with torch.no_grad():
-        image_or_video = cond_video.unsqueeze(0)
         cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
         cond_latents = cond_latents * pipe.vae.config.scaling_factor
         cond_latents = cond_latents.to(dtype=pipe.dtype)
         assert not torch.any(torch.isnan(cond_latents))
     video = call_pipe(
         pipe,
         prompt=prompt,
@@ -134,13 +138,10 @@ def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolu
         guidance_scale=guidance_scale,
         generator=torch.Generator(device="cuda").manual_seed(0),
     ).frames[0]
     video_path = "output.mp4"
     export_to_video(video, video_path, fps=24)
-    del cond_video # Manual deletion
-    del cond_frame1 # Manual deletion
-    del cond_frame2 # Manual deletion
-    del image_or_video # Manual deletion
     torch.cuda.empty_cache()
     return video_path

 @spaces.GPU(duration=120)
 def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
+    # Debugging print statements
+    print(f"Frame 1 Type: {type(frame1)}")
+    print(f"Frame 2 Type: {type(frame2)}")
+    print(f"Resolution: {resolution}")
+    # Parse resolution
     width, height = map(int, resolution.split('x'))
+    # Load and preprocess frames
+    cond_frame1 = np.array(frame1)
+    cond_frame2 = np.array(frame2)
+    cond_frame1 = resize_image_to_bucket(cond_frame1, bucket_reso=(width, height))
+    cond_frame2 = resize_image_to_bucket(cond_frame2, bucket_reso=(width, height))
+    cond_video = np.zeros(shape=(num_frames, height, width, 3))
+    cond_video[0], cond_video[-1] = cond_frame1, cond_frame2
+    cond_video = torch.from_numpy(cond_video.copy()).permute(0, 3, 1, 2)
+    cond_video = torch.stack([video_transforms(x) for x in cond_video], dim=0).unsqueeze(0)
     with torch.no_grad():
+        image_or_video = cond_video.to(device="cuda", dtype=pipe.dtype)
+        image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous()  # [B, F, C, H, W] -> [B, C, F, H, W]
         cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
         cond_latents = cond_latents * pipe.vae.config.scaling_factor
         cond_latents = cond_latents.to(dtype=pipe.dtype)
         assert not torch.any(torch.isnan(cond_latents))
+    # Generate video
     video = call_pipe(
         pipe,
         prompt=prompt,
         guidance_scale=guidance_scale,
         generator=torch.Generator(device="cuda").manual_seed(0),
     ).frames[0]
+    # Export to video
     video_path = "output.mp4"
+    # video_bytes = io.BytesIO()
     export_to_video(video, video_path, fps=24)
     torch.cuda.empty_cache()
     return video_path