| |
| """AI Hub ๋ฑ ์คํ๋์ค ๋
น์ ๋ฐ์ดํฐ๋ฅผ ์ ํ ํตํ ํ์ง๋ก ์ ์ฒ๋ฆฌํ๋ ์คํฌ๋ฆฝํธ. |
| |
| ๊นจ๋ํ ์ค๋์ค์ PSTN ์๋ฎฌ๋ ์ด์
(๋ฐด๋ํจ์ค + ๋ค์ด์ํ๋ง + G.711 companding)์ ์ ์ฉํ์ฌ |
| ์ค์ ํตํ ๋
น์๊ณผ ์ ์ฌํ ํ์ต ๋ฐ์ดํฐ๋ฅผ ์์ฑํ๋ค. |
| |
| Usage: |
| # ๋จ์ผ ํ์ผ |
| python scripts/preprocess_phone_audio.py data/aihub_raw/sample.wav |
| |
| # ๋๋ ํ ๋ฆฌ ์ผ๊ด ์ฒ๋ฆฌ |
| python scripts/preprocess_phone_audio.py data/aihub_raw/ -o data/aihub_phone/ |
| |
| # companding ๋ฐฉ์ ์ง์ (๊ธฐ๋ณธ: random) |
| python scripts/preprocess_phone_audio.py data/aihub_raw/ --companding alaw |
| |
| # ์๋ณธ๋ ํจ๊ป ๋ณต์ฌ (์๋ณธ+์ ํ ํผํฉ ํ์ต์ฉ) |
| python scripts/preprocess_phone_audio.py data/aihub_raw/ -o data/training/ --keep-original |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import logging |
| import shutil |
| import sys |
| from pathlib import Path |
|
|
| import librosa |
| import soundfile as sf |
|
|
| |
| PROJECT_ROOT = Path(__file__).parent.parent |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| from src.common.phone_simulator import CompandingType, PhoneSimulator |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s - %(levelname)s - %(message)s", |
| ) |
| logger = logging.getLogger("preprocess_phone_audio") |
|
|
| SUPPORTED_EXTENSIONS = {".wav", ".mp3", ".m4a", ".ogg", ".flac"} |
|
|
|
|
| def find_audio_files(input_path: Path) -> list[Path]: |
| """์
๋ ฅ ๊ฒฝ๋ก์์ ์ค๋์ค ํ์ผ ๋ชฉ๋ก ๋ฐํ.""" |
| if input_path.is_file(): |
| if input_path.suffix.lower() in SUPPORTED_EXTENSIONS: |
| return [input_path] |
| logger.warning(f"์ง์ํ์ง ์๋ ํ์ผ ํ์: {input_path.suffix}") |
| return [] |
|
|
| files = [] |
| for ext in SUPPORTED_EXTENSIONS: |
| files.extend(input_path.rglob(f"*{ext}")) |
| return sorted(files) |
|
|
|
|
| def process_file( |
| input_file: Path, |
| output_dir: Path, |
| simulator: PhoneSimulator, |
| input_root: Path, |
| keep_original: bool = False, |
| ) -> bool: |
| """๋จ์ผ ํ์ผ์ ์ ํ ํ์ง๋ก ๋ณํ.""" |
| try: |
| |
| relative = input_file.relative_to(input_root) |
| output_file = output_dir / relative.with_suffix(".wav") |
| output_file.parent.mkdir(parents=True, exist_ok=True) |
|
|
| |
| audio, sr = librosa.load(str(input_file), sr=None, mono=True) |
|
|
| |
| processed, new_sr = simulator.process(audio, sr) |
|
|
| |
| phone_output = output_file.with_name(f"phone_{output_file.name}") |
| sf.write(str(phone_output), processed, new_sr, subtype="PCM_16") |
|
|
| |
| if keep_original: |
| orig_output = output_file.with_name(f"orig_{output_file.name}") |
| shutil.copy2(str(input_file), str(orig_output)) |
|
|
| return True |
|
|
| except Exception as e: |
| logger.error(f"์ฒ๋ฆฌ ์คํจ [{input_file.name}]: {e}") |
| return False |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="์คํ๋์ค ๋
น์ โ ์ ํ ํตํ ํ์ง ์ ์ฒ๋ฆฌ", |
| ) |
| parser.add_argument( |
| "input", |
| type=Path, |
| help="์
๋ ฅ ์ค๋์ค ํ์ผ ๋๋ ๋๋ ํ ๋ฆฌ", |
| ) |
| parser.add_argument( |
| "-o", "--output", |
| type=Path, |
| default=None, |
| help="์ถ๋ ฅ ๋๋ ํ ๋ฆฌ (๊ธฐ๋ณธ: {input}_phone/)", |
| ) |
| parser.add_argument( |
| "--companding", |
| type=str, |
| choices=["alaw", "ulaw", "random"], |
| default="random", |
| help="G.711 companding ๋ฐฉ์ (๊ธฐ๋ณธ: random โ ํ์ผ๋ง๋ค ๋๋ค ์ ํ)", |
| ) |
| parser.add_argument( |
| "--keep-original", |
| action="store_true", |
| help="์๋ณธ ํ์ผ๋ ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ์ ๋ณต์ฌ (์๋ณธ+์ ํ ํผํฉ ํ์ต์ฉ)", |
| ) |
| args = parser.parse_args() |
|
|
| |
| input_path = args.input.resolve() |
| if not input_path.exists(): |
| logger.error(f"์
๋ ฅ ๊ฒฝ๋ก๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค: {input_path}") |
| sys.exit(1) |
|
|
| |
| if args.output: |
| output_dir = args.output.resolve() |
| else: |
| if input_path.is_file(): |
| output_dir = input_path.parent / f"{input_path.stem}_phone" |
| else: |
| output_dir = input_path.parent / f"{input_path.name}_phone" |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| input_root = input_path if input_path.is_dir() else input_path.parent |
|
|
| |
| audio_files = find_audio_files(input_path) |
| if not audio_files: |
| logger.error("์ฒ๋ฆฌํ ์ค๋์ค ํ์ผ์ด ์์ต๋๋ค.") |
| sys.exit(1) |
|
|
| logger.info(f"์ค๋์ค ํ์ผ {len(audio_files)}๊ฐ ๋ฐ๊ฒฌ") |
| logger.info(f"์ถ๋ ฅ ๋๋ ํ ๋ฆฌ: {output_dir}") |
| logger.info(f"Companding: {args.companding}") |
| if args.keep_original: |
| logger.info("์๋ณธ ํ์ผ๋ ํจ๊ป ๋ณต์ฌํฉ๋๋ค") |
|
|
| |
| companding = CompandingType(args.companding) |
| simulator = PhoneSimulator(companding=companding) |
|
|
| |
| success = 0 |
| fail = 0 |
| for i, audio_file in enumerate(audio_files, 1): |
| logger.info(f"[{i}/{len(audio_files)}] {audio_file.name}") |
| if process_file(audio_file, output_dir, simulator, input_root, args.keep_original): |
| success += 1 |
| else: |
| fail += 1 |
|
|
| logger.info(f"์๋ฃ: ์ฑ๊ณต {success}, ์คํจ {fail}, ์ ์ฒด {len(audio_files)}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|