Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on Zero

App Files Files Community

ChuxiJ commited on Jan 28

Commit

96b5f27

1 Parent(s): d5b1200

fix torchcodec ffmpeg

Browse files

Files changed (3) hide show

Dockerfile +3 -1
acestep/audio_utils.py +12 -26
acestep/handler.py +11 -26

Dockerfile CHANGED Viewed

@@ -5,10 +5,12 @@ FROM python:3.11-slim
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
-    DEBIAN_FRONTEND=noninteractive
 # Install system dependencies
 # build-essential is required for triton to compile CUDA kernels
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     ffmpeg \

 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
+    DEBIAN_FRONTEND=noninteractive \
+    TORCHAUDIO_USE_TORCHCODEC=0
 # Install system dependencies
 # build-essential is required for triton to compile CUDA kernels
+# ffmpeg is required for torchaudio ffmpeg backend (audio loading/saving)
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     ffmpeg \

acestep/audio_utils.py CHANGED Viewed

@@ -8,6 +8,11 @@ Independent audio file operations outside of handler, supporting:
 """
 import os
 import hashlib
 import json
 from pathlib import Path
@@ -130,11 +135,11 @@ class AudioSaver:
     def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
         """
-        Load audio file with fallback backends for compatibility.
-        In HuggingFace Space environment, the default torchcodec backend may fail
-        due to missing CUDA dependencies (libnppicc.so.12). This method tries
-        ffmpeg backend first (fast), then sox, then soundfile as fallbacks.
         Args:
             audio_file: Path to the audio file
@@ -144,7 +149,6 @@ class AudioSaver:
         Raises:
             FileNotFoundError: If the audio file doesn't exist
-            Exception: If all backends fail to load the audio
         """
         audio_file = str(audio_file)
@@ -152,27 +156,9 @@ class AudioSaver:
         if not Path(audio_file).exists():
             raise FileNotFoundError(f"Audio file not found: {audio_file}")
-        # Try ffmpeg backend first (fast and compatible)
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="ffmpeg")
-            return audio, sr
-        except Exception as e:
-            logger.debug(f"[AudioSaver._load_audio_file] ffmpeg backend failed: {e}, trying sox backend")
-        # Try sox backend as second option
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="sox")
-            return audio, sr
-        except Exception as e:
-            logger.debug(f"[AudioSaver._load_audio_file] sox backend failed: {e}, trying soundfile backend")
-        # Try soundfile backend as last resort
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="soundfile")
-            return audio, sr
-        except Exception as e:
-            logger.error(f"[AudioSaver._load_audio_file] All backends failed to load audio: {audio_file}")
-            raise
     def convert_audio(
         self,

 """
 import os
+# Disable torchcodec backend to avoid CUDA dependency issues on HuggingFace Space
+# This forces torchaudio to use ffmpeg/sox/soundfile backends instead
+os.environ["TORCHAUDIO_USE_TORCHCODEC"] = "0"
 import hashlib
 import json
 from pathlib import Path
     def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
         """
+        Load audio file using torchaudio.
+        Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
+        torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
+        This makes torchaudio use ffmpeg backend by default.
         Args:
             audio_file: Path to the audio file
         Raises:
             FileNotFoundError: If the audio file doesn't exist
         """
         audio_file = str(audio_file)
         if not Path(audio_file).exists():
             raise FileNotFoundError(f"Audio file not found: {audio_file}")
+        # Load audio using default backend (ffmpeg, since torchcodec is disabled)
+        audio, sr = torchaudio.load(audio_file)
+        return audio, sr
     def convert_audio(
         self,

acestep/handler.py CHANGED Viewed

@@ -7,6 +7,10 @@ import os
 # Disable tokenizers parallelism to avoid fork warning
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import math
 from copy import deepcopy
 import tempfile
@@ -1064,11 +1068,11 @@ class AceStepHandler:
     def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
         """
-        Load audio file with fallback backends for compatibility.
-        In HuggingFace Space environment, the default torchcodec backend may fail
-        due to missing CUDA dependencies (libnppicc.so.12). This method tries
-        ffmpeg backend first (fast), then sox, then soundfile as fallbacks.
         Args:
             audio_file: Path to the audio file
@@ -1078,33 +1082,14 @@ class AceStepHandler:
         Raises:
             FileNotFoundError: If the audio file doesn't exist
-            Exception: If all backends fail to load the audio
         """
         # Check if file exists first
         if not os.path.exists(audio_file):
             raise FileNotFoundError(f"Audio file not found: {audio_file}")
-        # Try ffmpeg backend first (fast and compatible)
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="ffmpeg")
-            return audio, sr
-        except Exception as e:
-            logger.debug(f"[_load_audio_file] ffmpeg backend failed: {e}, trying sox backend")
-        # Try sox backend as second option
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="sox")
-            return audio, sr
-        except Exception as e:
-            logger.debug(f"[_load_audio_file] sox backend failed: {e}, trying soundfile backend")
-        # Try soundfile backend as last resort
-        try:
-            audio, sr = torchaudio.load(audio_file, backend="soundfile")
-            return audio, sr
-        except Exception as e:
-            logger.error(f"[_load_audio_file] All backends failed to load audio: {audio_file}")
-            raise
     def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
         """

 # Disable tokenizers parallelism to avoid fork warning
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Disable torchcodec backend to avoid CUDA dependency issues on HuggingFace Space
+# This forces torchaudio to use ffmpeg/sox/soundfile backends instead
+os.environ["TORCHAUDIO_USE_TORCHCODEC"] = "0"
 import math
 from copy import deepcopy
 import tempfile
     def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
         """
+        Load audio file using torchaudio.
+        Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
+        torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
+        This makes torchaudio use ffmpeg backend by default.
         Args:
             audio_file: Path to the audio file
         Raises:
             FileNotFoundError: If the audio file doesn't exist
         """
         # Check if file exists first
         if not os.path.exists(audio_file):
             raise FileNotFoundError(f"Audio file not found: {audio_file}")
+        # Load audio using default backend (ffmpeg, since torchcodec is disabled)
+        audio, sr = torchaudio.load(audio_file)
+        return audio, sr
     def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
         """