voice-tools / src /lib /format_converter.py
jcudit's picture
jcudit HF Staff
fix: also correct lib/ in gitignore to only exclude root-level, add src/lib package
3ff2f18
"""
Audio format converter: m4a ↔ wav conversion, sample rate normalization.
Converts between m4a (compressed) and wav (lossless) formats.
Normalizes to 48kHz/24-bit for processing, outputs as m4a/192kbps for final.
"""
import logging
import tempfile
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
class FormatConversionError(Exception):
"""Custom exception for format conversion errors."""
pass
def m4a_to_wav(
input_path: str,
output_path: Optional[str] = None,
target_sr: int = 48000,
target_bit_depth: int = 24,
) -> str:
"""
Convert m4a to wav format with normalization.
Args:
input_path: Path to input m4a file
output_path: Path to output wav file (temp file if None)
target_sr: Target sample rate in Hz (default: 48000)
target_bit_depth: Target bit depth (default: 24)
Returns:
Path to output wav file
Raises:
FormatConversionError: If conversion fails
"""
try:
from pydub import AudioSegment
input_path = Path(input_path)
if not input_path.exists():
raise FormatConversionError(f"Input file not found: {input_path}")
# Create output path if not provided
if output_path is None:
temp_dir = tempfile.gettempdir()
output_path = Path(temp_dir) / f"{input_path.stem}_temp.wav"
else:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Load m4a
audio = AudioSegment.from_file(str(input_path), format="m4a")
# Normalize to target format
audio = audio.set_frame_rate(target_sr)
audio = audio.set_channels(1) # Mono
audio = audio.set_sample_width(target_bit_depth // 8) # Bytes (24-bit = 3 bytes)
# Export as wav
audio.export(str(output_path), format="wav")
logger.debug(f"Converted m4a to wav: {input_path.name} -> {output_path.name}")
return str(output_path)
except Exception as e:
if isinstance(e, FormatConversionError):
raise
raise FormatConversionError(f"Failed to convert m4a to wav: {str(e)}")
def wav_to_m4a(
input_path: str, output_path: str, bitrate: str = "192k", sample_rate: int = 48000
) -> str:
"""
Convert wav to m4a format.
Args:
input_path: Path to input wav file
output_path: Path to output m4a file
bitrate: AAC bitrate (default: "192k")
sample_rate: Sample rate in Hz (default: 48000)
Returns:
Path to output m4a file
Raises:
FormatConversionError: If conversion fails
"""
try:
from pydub import AudioSegment
input_path = Path(input_path)
output_path = Path(output_path)
if not input_path.exists():
raise FormatConversionError(f"Input file not found: {input_path}")
# Create output directory if needed
output_path.parent.mkdir(parents=True, exist_ok=True)
# Load wav
audio = AudioSegment.from_file(str(input_path), format="wav")
# Normalize sample rate
audio = audio.set_frame_rate(sample_rate)
# Export as m4a with AAC codec
audio.export(
str(output_path),
format="mp4", # m4a uses mp4 container
codec="aac",
bitrate=bitrate,
parameters=["-profile:a", "aac_low"],
)
logger.debug(f"Converted wav to m4a: {input_path.name} -> {output_path.name}")
return str(output_path)
except Exception as e:
if isinstance(e, FormatConversionError):
raise
raise FormatConversionError(f"Failed to convert wav to m4a: {str(e)}")
def normalize_to_intermediate(input_path: str, output_path: Optional[str] = None) -> str:
"""
Normalize any audio format to intermediate wav format (48kHz/24-bit/mono).
This is the standard intermediate format for all processing.
Args:
input_path: Path to input audio file (m4a, wav, mp3, etc.)
output_path: Path to output wav file (temp file if None)
Returns:
Path to normalized wav file
Raises:
FormatConversionError: If normalization fails
"""
try:
from pydub import AudioSegment
input_path = Path(input_path)
if not input_path.exists():
raise FormatConversionError(f"Input file not found: {input_path}")
# Create output path if not provided
if output_path is None:
temp_dir = tempfile.gettempdir()
output_path = Path(temp_dir) / f"{input_path.stem}_normalized.wav"
else:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Detect input format
input_format = input_path.suffix.lstrip(".")
# Load audio
audio = AudioSegment.from_file(str(input_path), format=input_format)
# Normalize to intermediate format: 48kHz, 24-bit, mono
audio = audio.set_frame_rate(48000)
audio = audio.set_channels(1)
audio = audio.set_sample_width(3) # 24-bit = 3 bytes
# Export as wav
audio.export(str(output_path), format="wav")
logger.debug(f"Normalized to intermediate: {input_path.name} -> {output_path.name}")
return str(output_path)
except Exception as e:
if isinstance(e, FormatConversionError):
raise
raise FormatConversionError(f"Failed to normalize audio: {str(e)}")
def convert_to_final_output(input_path: str, output_path: str, format: str = "m4a") -> str:
"""
Convert intermediate wav to final output format.
Final output is m4a with AAC 192kbps, 48kHz, mono.
Args:
input_path: Path to input wav file
output_path: Path to output file
format: Output format (default: "m4a")
Returns:
Path to output file
Raises:
FormatConversionError: If conversion fails
"""
if format == "m4a":
return wav_to_m4a(input_path, output_path, bitrate="192k", sample_rate=48000)
elif format == "wav":
# Just copy if wav output requested
import shutil
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(input_path, output_path)
return str(output_path)
else:
raise FormatConversionError(f"Unsupported output format: {format}")
def batch_convert(
input_files: list, output_dir: str, output_format: str = "m4a", progress_callback=None
) -> list:
"""
Convert multiple files to output format.
Args:
input_files: List of input file paths
output_dir: Output directory
output_format: Output format (default: "m4a")
progress_callback: Optional callback(index, total, filename)
Returns:
List of output file paths
Raises:
FormatConversionError: If any conversion fails
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
output_files = []
total = len(input_files)
for i, input_file in enumerate(input_files):
input_path = Path(input_file)
# Generate output filename
output_name = f"{input_path.stem}.{output_format}"
output_path = output_dir / output_name
if progress_callback:
progress_callback(i + 1, total, input_path.name)
# Convert to intermediate then to final
intermediate = normalize_to_intermediate(str(input_path))
final = convert_to_final_output(intermediate, str(output_path), output_format)
# Clean up intermediate file
Path(intermediate).unlink(missing_ok=True)
output_files.append(final)
return output_files
def get_conversion_info(input_path: str) -> dict:
"""
Get information about required conversion.
Args:
input_path: Path to input file
Returns:
Dictionary with conversion details
"""
try:
from pydub import AudioSegment
input_path = Path(input_path)
if not input_path.exists():
return {"error": "File not found"}
# Load audio to inspect properties
input_format = input_path.suffix.lstrip(".")
audio = AudioSegment.from_file(str(input_path), format=input_format)
return {
"current_format": input_format,
"current_sample_rate": audio.frame_rate,
"current_channels": audio.channels,
"current_sample_width": audio.sample_width,
"duration_seconds": len(audio) / 1000.0,
"needs_conversion": (
audio.frame_rate != 48000 or audio.channels != 1 or audio.sample_width != 3
),
"target_format": "wav (intermediate) -> m4a (final)",
"target_sample_rate": 48000,
"target_channels": 1,
"target_bit_depth": 24,
}
except Exception as e:
return {"error": str(e)}
def estimate_output_size(input_path: str, output_format: str = "m4a") -> int:
"""
Estimate output file size in bytes.
Args:
input_path: Path to input file
output_format: Output format
Returns:
Estimated file size in bytes
"""
try:
info = get_conversion_info(input_path)
if "error" in info:
return 0
duration = info["duration_seconds"]
if output_format == "m4a":
# AAC 192kbps = 192 * 1000 / 8 bytes per second
bitrate_bps = 192 * 1000 / 8
return int(duration * bitrate_bps)
elif output_format == "wav":
# 48kHz * 3 bytes (24-bit) * 1 channel
return int(duration * 48000 * 3)
else:
return 0
except Exception:
return 0