File size: 2,359 Bytes
f5bce42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""Audio normalization module for forced alignment preprocessing."""

import subprocess
from pathlib import Path
from typing import Union

from config import SAMPLE_RATE


def normalize_audio(input_path: Union[str, Path], output_path: Union[str, Path]) -> str:
    """Normalize audio file to mono, 16kHz, 16-bit PCM WAV for alignment model.
    
    Converts audio from various formats (mp3, wav, m4a, aac) to the format
    required by the facebook/mms-300m forced alignment model.
    """
    input_path = Path(input_path)
    output_path = Path(output_path)
    
    # Check if input file exists
    if not input_path.exists():
        raise FileNotFoundError(f"Input audio file not found: {input_path}")
    
    if input_path.stat().st_size == 0:
        raise FileNotFoundError(f"Input audio file is empty: {input_path}")
    
    # Create output directory if it doesn't exist
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Build ffmpeg command
    # -y: overwrite output file
    # -i: input file
    # -ac 1: mono (1 channel)
    # -ar: sample rate
    # -acodec pcm_s16le: 16-bit PCM
    # -f wav: WAV format
    cmd = [
        "ffmpeg", "-y", "-i", str(input_path),
        "-ac", "1",  # mono
        "-ar", str(SAMPLE_RATE),  # 16kHz sample rate
        "-acodec", "pcm_s16le",  # 16-bit PCM
        "-f", "wav",  # WAV format
        str(output_path)
    ]
    
    try:
        # Run ffmpeg with error capture
        result = subprocess.run(
            cmd, 
            capture_output=True, 
            text=True, 
            check=True
        )
        
        # Verify output file was created and is non-empty
        if not output_path.exists() or output_path.stat().st_size == 0:
            raise RuntimeError(f"ffmpeg failed to create output file: {output_path}")
        
        print(f"✅ Audio normalized → {output_path}")
        return str(output_path)
        
    except subprocess.CalledProcessError as e:
        error_msg = f"ffmpeg failed with return code {e.returncode}"
        if e.stderr:
            error_msg += f"\nError details: {e.stderr.strip()}"
        raise RuntimeError(error_msg)
    
    except FileNotFoundError:
        raise RuntimeError(
            "ffmpeg not found. Please install ffmpeg: "
            "brew install ffmpeg (macOS) or visit https://ffmpeg.org/"
        )