Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

BoxOfColors commited on 8 days ago

Commit

a5f92a7

1 Parent(s): 2b4b56f

Fix audio WAV header: ensure contiguous memory layout before torchaudio.save

numpy array slicing can produce non-contiguous memory; torch.from_numpy
on a non-contiguous array can write a malformed WAV header causing the
browser audio player to report wrong duration. np.ascontiguousarray()
forces a contiguous copy before saving. Reverts unnecessary 44100 Hz
resample.

Files changed (1) hide show

app.py +1 -4

app.py CHANGED Viewed

@@ -635,11 +635,8 @@ def generate_hunyuan(video_file, prompt, negative_prompt, seed_val,
         # Trim to exact video duration
         full_wav = full_wav[:, : int(round(total_dur_s * sr))]
-        audio_tensor = torch.from_numpy(full_wav)  # (C, samples) at sr Hz
-        # Resample to 44100 Hz — 48 kHz WAV headers can confuse browser audio players
-        audio_44k = torchaudio.functional.resample(audio_tensor, orig_freq=sr, new_freq=44100)
         audio_path = os.path.join(tmp_dir, f"hunyuan_{sample_idx}.wav")
-        torchaudio.save(audio_path, audio_44k, 44100)
         video_path = os.path.join(tmp_dir, f"hunyuan_{sample_idx}.mp4")
         merge_audio_video(audio_path, video_file, video_path)
         outputs.append((video_path, audio_path))

         # Trim to exact video duration
         full_wav = full_wav[:, : int(round(total_dur_s * sr))]
         audio_path = os.path.join(tmp_dir, f"hunyuan_{sample_idx}.wav")
+        torchaudio.save(audio_path, torch.from_numpy(np.ascontiguousarray(full_wav)), sr)
         video_path = os.path.join(tmp_dir, f"hunyuan_{sample_idx}.mp4")
         merge_audio_video(audio_path, video_file, video_path)
         outputs.append((video_path, audio_path))