Spaces:

mvp-lab
/

audio_generation

Running on Zero

App Files Files Community

Yng314 commited on Feb 28

Commit

212dda8

1 Parent(s): 5e8b75e

feat: Implement robust audio file loading with `torchaudio` and `soundfile` fallback.

Browse files

Files changed (2) hide show

acestep/handler.py +19 -2
requirements.txt +2 -0

acestep/handler.py CHANGED Viewed

@@ -131,6 +131,23 @@ class AceStepHandler:
         if self.config is None:
             return False
         return getattr(self.config, 'is_turbo', False)
     def load_lora(self, lora_path: str) -> str:
         """Load LoRA adapter into the decoder.
@@ -1235,7 +1252,7 @@ class AceStepHandler:
         try:
             # Load audio file
-            audio, sr = torchaudio.load(audio_file)
             logger.debug(f"[process_reference_audio] Reference audio shape: {audio.shape}")
             logger.debug(f"[process_reference_audio] Reference audio sample rate: {sr}")
@@ -1290,7 +1307,7 @@ class AceStepHandler:
         try:
             # Load audio file
-            audio, sr = torchaudio.load(audio_file)
             # Normalize to stereo 48kHz
             audio = self._normalize_audio_to_stereo_48k(audio, sr)

         if self.config is None:
             return False
         return getattr(self.config, 'is_turbo', False)
+    def _load_audio_file(self, audio_file: str) -> Tuple[torch.Tensor, int]:
+        """Load audio robustly with torchaudio first, then soundfile fallback."""
+        try:
+            return torchaudio.load(audio_file)
+        except Exception as torchaudio_exc:
+            logger.warning(
+                f"[_load_audio_file] torchaudio.load failed for {audio_file}, "
+                f"fallback to soundfile: {torchaudio_exc}"
+            )
+            try:
+                # soundfile returns [frames, channels]; convert to [channels, frames]
+                audio_np, sr = sf.read(audio_file, dtype="float32", always_2d=True)
+                audio = torch.from_numpy(audio_np.T.copy())
+                return audio, int(sr)
+            except Exception:
+                raise torchaudio_exc
     def load_lora(self, lora_path: str) -> str:
         """Load LoRA adapter into the decoder.
         try:
             # Load audio file
+            audio, sr = self._load_audio_file(audio_file)
             logger.debug(f"[process_reference_audio] Reference audio shape: {audio.shape}")
             logger.debug(f"[process_reference_audio] Reference audio sample rate: {sr}")
         try:
             # Load audio file
+            audio, sr = self._load_audio_file(audio_file)
             # Normalize to stereo 48kHz
             audio = self._normalize_audio_to_stereo_48k(audio, sr)

requirements.txt CHANGED Viewed

@@ -2,6 +2,8 @@ gradio==6.7.0
 spaces==0.47.0
 torch
 torchaudio
 transformers>=4.51.0,<4.58.0
 diffusers
 accelerate

 spaces==0.47.0
 torch
 torchaudio
+# torchaudio>=2.9 may require torchcodec backend for torchaudio.load
+torchcodec
 transformers>=4.51.0,<4.58.0
 diffusers
 accelerate