Spaces:

naicoi
/

lipsync-docker

Runtime error

App Files Files Community

lipsync-docker / audio_processing.py

naicoi

Update audio_processing.py

214ba1c verified 4 months ago

raw

history blame contribute delete

5.19 kB

	"""Audio processing utilities for OutofLipSync"""

	import os
	import subprocess
	from ffmpy import FFmpeg, FFRuntimeError


	def get_audio_duration(audio_path: str, max_duration: float = 30.0) -> float:
	"""Get audio file duration, raise error if exceeds max_duration

	Args:
	audio_path: Path to audio file
	max_duration: Maximum duration in seconds (default 30)

	Returns:
	Duration in seconds

	Raises:
	ValueError: If audio duration exceeds max_duration
	"""
	cmd = [
	"ffprobe",
	"-v",
	"error",
	"-show_entries",
	"format=duration",
	"-of",
	"default=noprint_wrappers=1:nokey=1",
	audio_path,
	]
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	duration = float(result.stdout.strip())



	return duration


	# def prepare_target_audio(audio_path: str, output_dir: str) -> tuple:
	# """Prepare target audio for lipsync (DEPRECATED - use prepare_audio_for_lipsync instead)
	#
	# Args:
	# audio_path: Path to target audio
	# output_dir: Output directory
	#
	# Returns:
	# (audio_16k_path, audio_upsampled_path)
	# """
	# audio_16k = os.path.join(output_dir, "audio_16k.wav")
	# audio_upsampled = os.path.join(output_dir, "audio_upsampled.wav")
	#
	# ffmpeg1 = FFmpeg(
	# inputs={audio_path: None},
	# outputs={
	# audio_16k: [
	# "-ar",
	# "16000",
	# "-ac",
	# "1",
	# "-acodec",
	# "pcm_s16le",
	# "-loglevel",
	# "error",
	# "-y",
	# ]
	# },
	# )
	# try:
	# ffmpeg1.run()
	# except FFRuntimeError as e:
	# raise Exception(f"FFmpeg failed to convert to 16k: {e}")
	#
	# ffmpeg2 = FFmpeg(
	# inputs={audio_16k: None},
	# outputs={
	# audio_upsampled: [
	# "-ar",
	# "48000",
	# "-ac",
	# "1",
	# "-acodec",
	# "pcm_s16le",
	# "-loglevel",
	# "error",
	# "-y",
	# ]
	# },
	# )
	# try:
	# ffmpeg2.run()
	# except FFRuntimeError as e:
	# raise Exception(f"FFmpeg failed to upsample to 48k: {e}")
	#
	# return audio_16k, audio_upsampled


	def prepare_audio_for_lipsync(audio_path: str, output_dir: str) -> str:
	"""Chuẩn bị audio 16kHz mono cho lipsync pipeline

	Args:
	audio_path: Path audio gốc
	output_dir: Output directory

	Returns:
	Path audio 16k WAV
	"""
	audio_16k = os.path.join(output_dir, "audio_16k.wav")

	ffmpeg = FFmpeg(
	inputs={audio_path: None},
	outputs={
	audio_16k: [
	"-ar",
	"16000",
	"-ac",
	"1",
	"-acodec",
	"pcm_s16le",
	"-loglevel",
	"error",
	"-y",
	]
	},
	)
	try:
	ffmpeg.run()
	except FFRuntimeError as e:
	raise Exception(f"FFmpeg failed to convert to 16k: {e}")

	return audio_16k


	def prepare_audio_for_youtube_aac(audio_path: str, output_dir: str) -> str:
	"""Chuẩn bị audio theo chuẩn YouTube (AAC)

	Args:
	audio_path: Path audio gốc
	output_dir: Output directory

	Returns:
	Path audio YouTube (AAC)
	"""
	from config import (
	YOUTUBE_AUDIO_CODEC,
	YOUTUBE_AUDIO_BITRATE,
	YOUTUBE_AUDIO_SAMPLE_RATE,
	)

	output_path = os.path.join(output_dir, "audio_youtube.aac")

	ffmpeg = FFmpeg(
	inputs={audio_path: None},
	outputs={
	output_path: [
	"-ar",
	str(YOUTUBE_AUDIO_SAMPLE_RATE),
	"-ac",
	"2",
	"-acodec",
	YOUTUBE_AUDIO_CODEC,
	"-b:a",
	YOUTUBE_AUDIO_BITRATE,
	"-loglevel",
	"error",
	"-y",
	]
	},
	)
	try:
	ffmpeg.run()
	except FFRuntimeError as e:
	raise Exception(f"FFmpeg failed to prepare audio for YouTube: {e}")

	return output_path


	def prepare_audio_for_youtube(audio_path: str, output_dir: str) -> str:
	"""
	Chuẩn bị audio tối ưu cho YouTube

	Args:
	audio_path: Path to audio file (WAV)
	output_dir: Output directory

	Returns:
	Path to audio file (WAV 48kHz PCM)
	"""
	output_path = os.path.join(output_dir, "audio_final.wav")

	ffmpeg = FFmpeg(
	inputs={audio_path: None},
	outputs={
	output_path: [
	"-ar",
	"48000",
	"-ac",
	"2",
	"-acodec",
	"pcm_s16le",
	"-loglevel",
	"error",
	"-y",
	]
	},
	)
	try:
	ffmpeg.run()
	except FFRuntimeError as e:
	raise Exception(f"FFmpeg failed to prepare audio for YouTube: {e}")

	return output_path