Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

JackIsNotInTheBox commited on 7 days ago

Commit

798c73c

1 Parent(s): ff95229

Fix EinopsError: handle videos with frame count not divisible by 30

Files changed (1) hide show

onset_util.py CHANGED Viewed

@@ -21,16 +21,26 @@ def extract_onset(video_path, onset_model, tmp_path, device="cuda"):
     # Load the video, change fps:
     video_path_low_fps = reencode_video_with_diff_fps(video_path, tmp_path, 15, start_second, truncate_second)
     frames, _, _ = read_video(video_path_low_fps, pts_unit="sec", output_format="TCHW")
     if frames.shape[0] >= 150:
         frames = frames[:150]
     elif frames.shape[0] >= 120:
         frames = frames[:120]
     # Transform frames
     frames = frames / 255.0
     frames = transform(frames)
-    frames = rearrange(frames, '(b t) c h w -> b c t h w', t=30).to(device)
     # Forward pass through the model to get onset features
     with torch.no_grad():

     # Load the video, change fps:
     video_path_low_fps = reencode_video_with_diff_fps(video_path, tmp_path, 15, start_second, truncate_second)
     frames, _, _ = read_video(video_path_low_fps, pts_unit="sec", output_format="TCHW")
+    t = 30
     if frames.shape[0] >= 150:
         frames = frames[:150]
     elif frames.shape[0] >= 120:
         frames = frames[:120]
+    else:
+        # Trim to the largest multiple of t that fits
+        n = (frames.shape[0] // t) * t
+        if n == 0:
+            # Pad up to t frames if video is shorter than one clip
+            pad = torch.zeros(t - frames.shape[0], *frames.shape[1:], dtype=frames.dtype)
+            frames = torch.cat([frames, pad], dim=0)
+            n = t
+        frames = frames[:n]
     # Transform frames
     frames = frames / 255.0
     frames = transform(frames)
+    frames = rearrange(frames, '(b t) c h w -> b c t h w', t=t).to(device)
     # Forward pass through the model to get onset features
     with torch.no_grad():