Spaces:
Running
on
A100
Running
on
A100
fix torchcodec ffmpeg
Browse files- Dockerfile +3 -1
- acestep/audio_utils.py +12 -26
- acestep/handler.py +11 -26
Dockerfile
CHANGED
|
@@ -5,10 +5,12 @@ FROM python:3.11-slim
|
|
| 5 |
# Set environment variables
|
| 6 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
PYTHONUNBUFFERED=1 \
|
| 8 |
-
DEBIAN_FRONTEND=noninteractive
|
|
|
|
| 9 |
|
| 10 |
# Install system dependencies
|
| 11 |
# build-essential is required for triton to compile CUDA kernels
|
|
|
|
| 12 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 13 |
git \
|
| 14 |
ffmpeg \
|
|
|
|
| 5 |
# Set environment variables
|
| 6 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
PYTHONUNBUFFERED=1 \
|
| 8 |
+
DEBIAN_FRONTEND=noninteractive \
|
| 9 |
+
TORCHAUDIO_USE_TORCHCODEC=0
|
| 10 |
|
| 11 |
# Install system dependencies
|
| 12 |
# build-essential is required for triton to compile CUDA kernels
|
| 13 |
+
# ffmpeg is required for torchaudio ffmpeg backend (audio loading/saving)
|
| 14 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 15 |
git \
|
| 16 |
ffmpeg \
|
acestep/audio_utils.py
CHANGED
|
@@ -8,6 +8,11 @@ Independent audio file operations outside of handler, supporting:
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
import hashlib
|
| 12 |
import json
|
| 13 |
from pathlib import Path
|
|
@@ -130,11 +135,11 @@ class AudioSaver:
|
|
| 130 |
|
| 131 |
def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
|
| 132 |
"""
|
| 133 |
-
Load audio file
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
Args:
|
| 140 |
audio_file: Path to the audio file
|
|
@@ -144,7 +149,6 @@ class AudioSaver:
|
|
| 144 |
|
| 145 |
Raises:
|
| 146 |
FileNotFoundError: If the audio file doesn't exist
|
| 147 |
-
Exception: If all backends fail to load the audio
|
| 148 |
"""
|
| 149 |
audio_file = str(audio_file)
|
| 150 |
|
|
@@ -152,27 +156,9 @@ class AudioSaver:
|
|
| 152 |
if not Path(audio_file).exists():
|
| 153 |
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
| 154 |
|
| 155 |
-
#
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
return audio, sr
|
| 159 |
-
except Exception as e:
|
| 160 |
-
logger.debug(f"[AudioSaver._load_audio_file] ffmpeg backend failed: {e}, trying sox backend")
|
| 161 |
-
|
| 162 |
-
# Try sox backend as second option
|
| 163 |
-
try:
|
| 164 |
-
audio, sr = torchaudio.load(audio_file, backend="sox")
|
| 165 |
-
return audio, sr
|
| 166 |
-
except Exception as e:
|
| 167 |
-
logger.debug(f"[AudioSaver._load_audio_file] sox backend failed: {e}, trying soundfile backend")
|
| 168 |
-
|
| 169 |
-
# Try soundfile backend as last resort
|
| 170 |
-
try:
|
| 171 |
-
audio, sr = torchaudio.load(audio_file, backend="soundfile")
|
| 172 |
-
return audio, sr
|
| 173 |
-
except Exception as e:
|
| 174 |
-
logger.error(f"[AudioSaver._load_audio_file] All backends failed to load audio: {audio_file}")
|
| 175 |
-
raise
|
| 176 |
|
| 177 |
def convert_audio(
|
| 178 |
self,
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
import os
|
| 11 |
+
|
| 12 |
+
# Disable torchcodec backend to avoid CUDA dependency issues on HuggingFace Space
|
| 13 |
+
# This forces torchaudio to use ffmpeg/sox/soundfile backends instead
|
| 14 |
+
os.environ["TORCHAUDIO_USE_TORCHCODEC"] = "0"
|
| 15 |
+
|
| 16 |
import hashlib
|
| 17 |
import json
|
| 18 |
from pathlib import Path
|
|
|
|
| 135 |
|
| 136 |
def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
|
| 137 |
"""
|
| 138 |
+
Load audio file using torchaudio.
|
| 139 |
|
| 140 |
+
Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
|
| 141 |
+
torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
|
| 142 |
+
This makes torchaudio use ffmpeg backend by default.
|
| 143 |
|
| 144 |
Args:
|
| 145 |
audio_file: Path to the audio file
|
|
|
|
| 149 |
|
| 150 |
Raises:
|
| 151 |
FileNotFoundError: If the audio file doesn't exist
|
|
|
|
| 152 |
"""
|
| 153 |
audio_file = str(audio_file)
|
| 154 |
|
|
|
|
| 156 |
if not Path(audio_file).exists():
|
| 157 |
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
| 158 |
|
| 159 |
+
# Load audio using default backend (ffmpeg, since torchcodec is disabled)
|
| 160 |
+
audio, sr = torchaudio.load(audio_file)
|
| 161 |
+
return audio, sr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
def convert_audio(
|
| 164 |
self,
|
acestep/handler.py
CHANGED
|
@@ -7,6 +7,10 @@ import os
|
|
| 7 |
# Disable tokenizers parallelism to avoid fork warning
|
| 8 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
import math
|
| 11 |
from copy import deepcopy
|
| 12 |
import tempfile
|
|
@@ -1064,11 +1068,11 @@ class AceStepHandler:
|
|
| 1064 |
|
| 1065 |
def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
|
| 1066 |
"""
|
| 1067 |
-
Load audio file
|
| 1068 |
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
|
| 1073 |
Args:
|
| 1074 |
audio_file: Path to the audio file
|
|
@@ -1078,33 +1082,14 @@ class AceStepHandler:
|
|
| 1078 |
|
| 1079 |
Raises:
|
| 1080 |
FileNotFoundError: If the audio file doesn't exist
|
| 1081 |
-
Exception: If all backends fail to load the audio
|
| 1082 |
"""
|
| 1083 |
# Check if file exists first
|
| 1084 |
if not os.path.exists(audio_file):
|
| 1085 |
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
| 1086 |
|
| 1087 |
-
#
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
return audio, sr
|
| 1091 |
-
except Exception as e:
|
| 1092 |
-
logger.debug(f"[_load_audio_file] ffmpeg backend failed: {e}, trying sox backend")
|
| 1093 |
-
|
| 1094 |
-
# Try sox backend as second option
|
| 1095 |
-
try:
|
| 1096 |
-
audio, sr = torchaudio.load(audio_file, backend="sox")
|
| 1097 |
-
return audio, sr
|
| 1098 |
-
except Exception as e:
|
| 1099 |
-
logger.debug(f"[_load_audio_file] sox backend failed: {e}, trying soundfile backend")
|
| 1100 |
-
|
| 1101 |
-
# Try soundfile backend as last resort
|
| 1102 |
-
try:
|
| 1103 |
-
audio, sr = torchaudio.load(audio_file, backend="soundfile")
|
| 1104 |
-
return audio, sr
|
| 1105 |
-
except Exception as e:
|
| 1106 |
-
logger.error(f"[_load_audio_file] All backends failed to load audio: {audio_file}")
|
| 1107 |
-
raise
|
| 1108 |
|
| 1109 |
def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
|
| 1110 |
"""
|
|
|
|
| 7 |
# Disable tokenizers parallelism to avoid fork warning
|
| 8 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 9 |
|
| 10 |
+
# Disable torchcodec backend to avoid CUDA dependency issues on HuggingFace Space
|
| 11 |
+
# This forces torchaudio to use ffmpeg/sox/soundfile backends instead
|
| 12 |
+
os.environ["TORCHAUDIO_USE_TORCHCODEC"] = "0"
|
| 13 |
+
|
| 14 |
import math
|
| 15 |
from copy import deepcopy
|
| 16 |
import tempfile
|
|
|
|
| 1068 |
|
| 1069 |
def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
|
| 1070 |
"""
|
| 1071 |
+
Load audio file using torchaudio.
|
| 1072 |
|
| 1073 |
+
Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
|
| 1074 |
+
torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
|
| 1075 |
+
This makes torchaudio use ffmpeg backend by default.
|
| 1076 |
|
| 1077 |
Args:
|
| 1078 |
audio_file: Path to the audio file
|
|
|
|
| 1082 |
|
| 1083 |
Raises:
|
| 1084 |
FileNotFoundError: If the audio file doesn't exist
|
|
|
|
| 1085 |
"""
|
| 1086 |
# Check if file exists first
|
| 1087 |
if not os.path.exists(audio_file):
|
| 1088 |
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
| 1089 |
|
| 1090 |
+
# Load audio using default backend (ffmpeg, since torchcodec is disabled)
|
| 1091 |
+
audio, sr = torchaudio.load(audio_file)
|
| 1092 |
+
return audio, sr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1093 |
|
| 1094 |
def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
|
| 1095 |
"""
|