Spaces:
Running on Zero
Running on Zero
Yng314 commited on
Commit ·
212dda8
1
Parent(s): 5e8b75e
feat: Implement robust audio file loading with `torchaudio` and `soundfile` fallback.
Browse files- acestep/handler.py +19 -2
- requirements.txt +2 -0
acestep/handler.py
CHANGED
|
@@ -131,6 +131,23 @@ class AceStepHandler:
|
|
| 131 |
if self.config is None:
|
| 132 |
return False
|
| 133 |
return getattr(self.config, 'is_turbo', False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
def load_lora(self, lora_path: str) -> str:
|
| 136 |
"""Load LoRA adapter into the decoder.
|
|
@@ -1235,7 +1252,7 @@ class AceStepHandler:
|
|
| 1235 |
|
| 1236 |
try:
|
| 1237 |
# Load audio file
|
| 1238 |
-
audio, sr =
|
| 1239 |
|
| 1240 |
logger.debug(f"[process_reference_audio] Reference audio shape: {audio.shape}")
|
| 1241 |
logger.debug(f"[process_reference_audio] Reference audio sample rate: {sr}")
|
|
@@ -1290,7 +1307,7 @@ class AceStepHandler:
|
|
| 1290 |
|
| 1291 |
try:
|
| 1292 |
# Load audio file
|
| 1293 |
-
audio, sr =
|
| 1294 |
|
| 1295 |
# Normalize to stereo 48kHz
|
| 1296 |
audio = self._normalize_audio_to_stereo_48k(audio, sr)
|
|
|
|
| 131 |
if self.config is None:
|
| 132 |
return False
|
| 133 |
return getattr(self.config, 'is_turbo', False)
|
| 134 |
+
|
| 135 |
+
def _load_audio_file(self, audio_file: str) -> Tuple[torch.Tensor, int]:
|
| 136 |
+
"""Load audio robustly with torchaudio first, then soundfile fallback."""
|
| 137 |
+
try:
|
| 138 |
+
return torchaudio.load(audio_file)
|
| 139 |
+
except Exception as torchaudio_exc:
|
| 140 |
+
logger.warning(
|
| 141 |
+
f"[_load_audio_file] torchaudio.load failed for {audio_file}, "
|
| 142 |
+
f"fallback to soundfile: {torchaudio_exc}"
|
| 143 |
+
)
|
| 144 |
+
try:
|
| 145 |
+
# soundfile returns [frames, channels]; convert to [channels, frames]
|
| 146 |
+
audio_np, sr = sf.read(audio_file, dtype="float32", always_2d=True)
|
| 147 |
+
audio = torch.from_numpy(audio_np.T.copy())
|
| 148 |
+
return audio, int(sr)
|
| 149 |
+
except Exception:
|
| 150 |
+
raise torchaudio_exc
|
| 151 |
|
| 152 |
def load_lora(self, lora_path: str) -> str:
|
| 153 |
"""Load LoRA adapter into the decoder.
|
|
|
|
| 1252 |
|
| 1253 |
try:
|
| 1254 |
# Load audio file
|
| 1255 |
+
audio, sr = self._load_audio_file(audio_file)
|
| 1256 |
|
| 1257 |
logger.debug(f"[process_reference_audio] Reference audio shape: {audio.shape}")
|
| 1258 |
logger.debug(f"[process_reference_audio] Reference audio sample rate: {sr}")
|
|
|
|
| 1307 |
|
| 1308 |
try:
|
| 1309 |
# Load audio file
|
| 1310 |
+
audio, sr = self._load_audio_file(audio_file)
|
| 1311 |
|
| 1312 |
# Normalize to stereo 48kHz
|
| 1313 |
audio = self._normalize_audio_to_stereo_48k(audio, sr)
|
requirements.txt
CHANGED
|
@@ -2,6 +2,8 @@ gradio==6.7.0
|
|
| 2 |
spaces==0.47.0
|
| 3 |
torch
|
| 4 |
torchaudio
|
|
|
|
|
|
|
| 5 |
transformers>=4.51.0,<4.58.0
|
| 6 |
diffusers
|
| 7 |
accelerate
|
|
|
|
| 2 |
spaces==0.47.0
|
| 3 |
torch
|
| 4 |
torchaudio
|
| 5 |
+
# torchaudio>=2.9 may require torchcodec backend for torchaudio.load
|
| 6 |
+
torchcodec
|
| 7 |
transformers>=4.51.0,<4.58.0
|
| 8 |
diffusers
|
| 9 |
accelerate
|