Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -102,31 +102,27 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
|
|
| 102 |
|
| 103 |
@spaces.GPU(duration=120)
|
| 104 |
def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
| 105 |
-
# Debugging print statements
|
| 106 |
-
print(f"Frame 1 Type: {type(frame1)}")
|
| 107 |
-
print(f"Frame 2 Type: {type(frame2)}")
|
| 108 |
-
print(f"Resolution: {resolution}")
|
| 109 |
-
|
| 110 |
-
# Parse resolution
|
| 111 |
width, height = map(int, resolution.split('x'))
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
cond_video = torch.
|
|
|
|
|
|
|
|
|
|
| 122 |
with torch.no_grad():
|
| 123 |
-
image_or_video = cond_video.
|
| 124 |
-
image_or_video = image_or_video.permute(0, 2, 1, 3, 4).contiguous() # [B, F, C, H, W] -> [B, C, F, H, W]
|
| 125 |
cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
|
| 126 |
cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
| 127 |
cond_latents = cond_latents.to(dtype=pipe.dtype)
|
| 128 |
assert not torch.any(torch.isnan(cond_latents))
|
| 129 |
-
|
| 130 |
video = call_pipe(
|
| 131 |
pipe,
|
| 132 |
prompt=prompt,
|
|
@@ -138,10 +134,13 @@ def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolu
|
|
| 138 |
guidance_scale=guidance_scale,
|
| 139 |
generator=torch.Generator(device="cuda").manual_seed(0),
|
| 140 |
).frames[0]
|
| 141 |
-
|
| 142 |
video_path = "output.mp4"
|
| 143 |
-
# video_bytes = io.BytesIO()
|
| 144 |
export_to_video(video, video_path, fps=24)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
torch.cuda.empty_cache()
|
| 146 |
return video_path
|
| 147 |
|
|
|
|
| 102 |
|
| 103 |
@spaces.GPU(duration=120)
|
| 104 |
def generate_video(prompt: str, frame1: Image.Image, frame2: Image.Image, resolution: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
width, height = map(int, resolution.split('x'))
|
| 106 |
+
|
| 107 |
+
transform = transforms.Compose([
|
| 108 |
+
transforms.ToTensor(),
|
| 109 |
+
transforms.Resize((height, width), antialias=True),
|
| 110 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
| 111 |
+
])
|
| 112 |
+
|
| 113 |
+
cond_frame1 = transform(frame1).cuda() # Move to GPU immediately
|
| 114 |
+
cond_frame2 = transform(frame2).cuda()
|
| 115 |
+
cond_video = torch.zeros(num_frames, 3, height, width, device='cuda', dtype=pipe.dtype)
|
| 116 |
+
cond_video[0] = cond_frame1
|
| 117 |
+
cond_video[-1] = cond_frame2
|
| 118 |
+
|
| 119 |
with torch.no_grad():
|
| 120 |
+
image_or_video = cond_video.unsqueeze(0)
|
|
|
|
| 121 |
cond_latents = pipe.vae.encode(image_or_video).latent_dist.sample()
|
| 122 |
cond_latents = cond_latents * pipe.vae.config.scaling_factor
|
| 123 |
cond_latents = cond_latents.to(dtype=pipe.dtype)
|
| 124 |
assert not torch.any(torch.isnan(cond_latents))
|
| 125 |
+
|
| 126 |
video = call_pipe(
|
| 127 |
pipe,
|
| 128 |
prompt=prompt,
|
|
|
|
| 134 |
guidance_scale=guidance_scale,
|
| 135 |
generator=torch.Generator(device="cuda").manual_seed(0),
|
| 136 |
).frames[0]
|
| 137 |
+
|
| 138 |
video_path = "output.mp4"
|
|
|
|
| 139 |
export_to_video(video, video_path, fps=24)
|
| 140 |
+
del cond_video # Manual deletion
|
| 141 |
+
del cond_frame1 # Manual deletion
|
| 142 |
+
del cond_frame2 # Manual deletion
|
| 143 |
+
del image_or_video # Manual deletion
|
| 144 |
torch.cuda.empty_cache()
|
| 145 |
return video_path
|
| 146 |
|