lipsync-docker / audio_processing.py
naicoi's picture
Update audio_processing.py
214ba1c verified
"""Audio processing utilities for OutofLipSync"""
import os
import subprocess
from ffmpy import FFmpeg, FFRuntimeError
def get_audio_duration(audio_path: str, max_duration: float = 30.0) -> float:
"""Get audio file duration, raise error if exceeds max_duration
Args:
audio_path: Path to audio file
max_duration: Maximum duration in seconds (default 30)
Returns:
Duration in seconds
Raises:
ValueError: If audio duration exceeds max_duration
"""
cmd = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
audio_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
duration = float(result.stdout.strip())
return duration
# def prepare_target_audio(audio_path: str, output_dir: str) -> tuple:
# """Prepare target audio for lipsync (DEPRECATED - use prepare_audio_for_lipsync instead)
#
# Args:
# audio_path: Path to target audio
# output_dir: Output directory
#
# Returns:
# (audio_16k_path, audio_upsampled_path)
# """
# audio_16k = os.path.join(output_dir, "audio_16k.wav")
# audio_upsampled = os.path.join(output_dir, "audio_upsampled.wav")
#
# ffmpeg1 = FFmpeg(
# inputs={audio_path: None},
# outputs={
# audio_16k: [
# "-ar",
# "16000",
# "-ac",
# "1",
# "-acodec",
# "pcm_s16le",
# "-loglevel",
# "error",
# "-y",
# ]
# },
# )
# try:
# ffmpeg1.run()
# except FFRuntimeError as e:
# raise Exception(f"FFmpeg failed to convert to 16k: {e}")
#
# ffmpeg2 = FFmpeg(
# inputs={audio_16k: None},
# outputs={
# audio_upsampled: [
# "-ar",
# "48000",
# "-ac",
# "1",
# "-acodec",
# "pcm_s16le",
# "-loglevel",
# "error",
# "-y",
# ]
# },
# )
# try:
# ffmpeg2.run()
# except FFRuntimeError as e:
# raise Exception(f"FFmpeg failed to upsample to 48k: {e}")
#
# return audio_16k, audio_upsampled
def prepare_audio_for_lipsync(audio_path: str, output_dir: str) -> str:
"""Chuẩn bị audio 16kHz mono cho lipsync pipeline
Args:
audio_path: Path audio gốc
output_dir: Output directory
Returns:
Path audio 16k WAV
"""
audio_16k = os.path.join(output_dir, "audio_16k.wav")
ffmpeg = FFmpeg(
inputs={audio_path: None},
outputs={
audio_16k: [
"-ar",
"16000",
"-ac",
"1",
"-acodec",
"pcm_s16le",
"-loglevel",
"error",
"-y",
]
},
)
try:
ffmpeg.run()
except FFRuntimeError as e:
raise Exception(f"FFmpeg failed to convert to 16k: {e}")
return audio_16k
def prepare_audio_for_youtube_aac(audio_path: str, output_dir: str) -> str:
"""Chuẩn bị audio theo chuẩn YouTube (AAC)
Args:
audio_path: Path audio gốc
output_dir: Output directory
Returns:
Path audio YouTube (AAC)
"""
from config import (
YOUTUBE_AUDIO_CODEC,
YOUTUBE_AUDIO_BITRATE,
YOUTUBE_AUDIO_SAMPLE_RATE,
)
output_path = os.path.join(output_dir, "audio_youtube.aac")
ffmpeg = FFmpeg(
inputs={audio_path: None},
outputs={
output_path: [
"-ar",
str(YOUTUBE_AUDIO_SAMPLE_RATE),
"-ac",
"2",
"-acodec",
YOUTUBE_AUDIO_CODEC,
"-b:a",
YOUTUBE_AUDIO_BITRATE,
"-loglevel",
"error",
"-y",
]
},
)
try:
ffmpeg.run()
except FFRuntimeError as e:
raise Exception(f"FFmpeg failed to prepare audio for YouTube: {e}")
return output_path
def prepare_audio_for_youtube(audio_path: str, output_dir: str) -> str:
"""
Chuẩn bị audio tối ưu cho YouTube
Args:
audio_path: Path to audio file (WAV)
output_dir: Output directory
Returns:
Path to audio file (WAV 48kHz PCM)
"""
output_path = os.path.join(output_dir, "audio_final.wav")
ffmpeg = FFmpeg(
inputs={audio_path: None},
outputs={
output_path: [
"-ar",
"48000",
"-ac",
"2",
"-acodec",
"pcm_s16le",
"-loglevel",
"error",
"-y",
]
},
)
try:
ffmpeg.run()
except FFRuntimeError as e:
raise Exception(f"FFmpeg failed to prepare audio for YouTube: {e}")
return output_path