ustwo-api / scripts /preprocess_phone_audio.py
asdfasdfqrqwer's picture
Deploy from GitHub 2026-04-23T03:56:31Z
c857b85
Raw
History Blame Contribute Delete
5.63 kB
#!/usr/bin/env python3
"""AI Hub ๋“ฑ ์ŠคํŠœ๋””์˜ค ๋…น์Œ ๋ฐ์ดํ„ฐ๋ฅผ ์ „ํ™” ํ†ตํ™” ํ’ˆ์งˆ๋กœ ์ „์ฒ˜๋ฆฌํ•˜๋Š” ์Šคํฌ๋ฆฝํŠธ.
๊นจ๋—ํ•œ ์˜ค๋””์˜ค์— PSTN ์‹œ๋ฎฌ๋ ˆ์ด์…˜(๋ฐด๋“œํŒจ์Šค + ๋‹ค์šด์ƒ˜ํ”Œ๋ง + G.711 companding)์„ ์ ์šฉํ•˜์—ฌ
์‹ค์ œ ํ†ตํ™” ๋…น์Œ๊ณผ ์œ ์‚ฌํ•œ ํ•™์Šต ๋ฐ์ดํ„ฐ๋ฅผ ์ƒ์„ฑํ•œ๋‹ค.
Usage:
# ๋‹จ์ผ ํŒŒ์ผ
python scripts/preprocess_phone_audio.py data/aihub_raw/sample.wav
# ๋””๋ ‰ํ† ๋ฆฌ ์ผ๊ด„ ์ฒ˜๋ฆฌ
python scripts/preprocess_phone_audio.py data/aihub_raw/ -o data/aihub_phone/
# companding ๋ฐฉ์‹ ์ง€์ • (๊ธฐ๋ณธ: random)
python scripts/preprocess_phone_audio.py data/aihub_raw/ --companding alaw
# ์›๋ณธ๋„ ํ•จ๊ป˜ ๋ณต์‚ฌ (์›๋ณธ+์ „ํ™” ํ˜ผํ•ฉ ํ•™์Šต์šฉ)
python scripts/preprocess_phone_audio.py data/aihub_raw/ -o data/training/ --keep-original
"""
from __future__ import annotations
import argparse
import logging
import shutil
import sys
from pathlib import Path
import librosa
import soundfile as sf
# ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ๋ฅผ sys.path์— ์ถ”๊ฐ€
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.common.phone_simulator import CompandingType, PhoneSimulator
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("preprocess_phone_audio")
SUPPORTED_EXTENSIONS = {".wav", ".mp3", ".m4a", ".ogg", ".flac"}
def find_audio_files(input_path: Path) -> list[Path]:
"""์ž…๋ ฅ ๊ฒฝ๋กœ์—์„œ ์˜ค๋””์˜ค ํŒŒ์ผ ๋ชฉ๋ก ๋ฐ˜ํ™˜."""
if input_path.is_file():
if input_path.suffix.lower() in SUPPORTED_EXTENSIONS:
return [input_path]
logger.warning(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {input_path.suffix}")
return []
files = []
for ext in SUPPORTED_EXTENSIONS:
files.extend(input_path.rglob(f"*{ext}"))
return sorted(files)
def process_file(
input_file: Path,
output_dir: Path,
simulator: PhoneSimulator,
input_root: Path,
keep_original: bool = False,
) -> bool:
"""๋‹จ์ผ ํŒŒ์ผ์„ ์ „ํ™” ํ’ˆ์งˆ๋กœ ๋ณ€ํ™˜."""
try:
# ์›๋ณธ ๋””๋ ‰ํ† ๋ฆฌ ๊ตฌ์กฐ ์œ ์ง€
relative = input_file.relative_to(input_root)
output_file = output_dir / relative.with_suffix(".wav")
output_file.parent.mkdir(parents=True, exist_ok=True)
# ์˜ค๋””์˜ค ๋กœ๋“œ (mono, ์›๋ณธ SR ์œ ์ง€)
audio, sr = librosa.load(str(input_file), sr=None, mono=True)
# ์ „ํ™” ํ’ˆ์งˆ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ์ ์šฉ
processed, new_sr = simulator.process(audio, sr)
# phone_ ์ ‘๋‘์‚ฌ๋กœ ์ €์žฅ
phone_output = output_file.with_name(f"phone_{output_file.name}")
sf.write(str(phone_output), processed, new_sr, subtype="PCM_16")
# ์›๋ณธ๋„ ๋ณต์‚ฌ (ํ˜ผํ•ฉ ํ•™์Šต์šฉ)
if keep_original:
orig_output = output_file.with_name(f"orig_{output_file.name}")
shutil.copy2(str(input_file), str(orig_output))
return True
except Exception as e:
logger.error(f"์ฒ˜๋ฆฌ ์‹คํŒจ [{input_file.name}]: {e}")
return False
def main():
parser = argparse.ArgumentParser(
description="์ŠคํŠœ๋””์˜ค ๋…น์Œ โ†’ ์ „ํ™” ํ†ตํ™” ํ’ˆ์งˆ ์ „์ฒ˜๋ฆฌ",
)
parser.add_argument(
"input",
type=Path,
help="์ž…๋ ฅ ์˜ค๋””์˜ค ํŒŒ์ผ ๋˜๋Š” ๋””๋ ‰ํ† ๋ฆฌ",
)
parser.add_argument(
"-o", "--output",
type=Path,
default=None,
help="์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ (๊ธฐ๋ณธ: {input}_phone/)",
)
parser.add_argument(
"--companding",
type=str,
choices=["alaw", "ulaw", "random"],
default="random",
help="G.711 companding ๋ฐฉ์‹ (๊ธฐ๋ณธ: random โ€” ํŒŒ์ผ๋งˆ๋‹ค ๋žœ๋ค ์„ ํƒ)",
)
parser.add_argument(
"--keep-original",
action="store_true",
help="์›๋ณธ ํŒŒ์ผ๋„ ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ์— ๋ณต์‚ฌ (์›๋ณธ+์ „ํ™” ํ˜ผํ•ฉ ํ•™์Šต์šฉ)",
)
args = parser.parse_args()
# ์ž…๋ ฅ ๊ฒฝ๋กœ ํ™•์ธ
input_path = args.input.resolve()
if not input_path.exists():
logger.error(f"์ž…๋ ฅ ๊ฒฝ๋กœ๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค: {input_path}")
sys.exit(1)
# ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฐ์ •
if args.output:
output_dir = args.output.resolve()
else:
if input_path.is_file():
output_dir = input_path.parent / f"{input_path.stem}_phone"
else:
output_dir = input_path.parent / f"{input_path.name}_phone"
output_dir.mkdir(parents=True, exist_ok=True)
# ์ž…๋ ฅ ๋ฃจํŠธ (์ƒ๋Œ€ ๊ฒฝ๋กœ ๊ณ„์‚ฐ์šฉ)
input_root = input_path if input_path.is_dir() else input_path.parent
# ์˜ค๋””์˜ค ํŒŒ์ผ ํƒ์ƒ‰
audio_files = find_audio_files(input_path)
if not audio_files:
logger.error("์ฒ˜๋ฆฌํ•  ์˜ค๋””์˜ค ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค.")
sys.exit(1)
logger.info(f"์˜ค๋””์˜ค ํŒŒ์ผ {len(audio_files)}๊ฐœ ๋ฐœ๊ฒฌ")
logger.info(f"์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ: {output_dir}")
logger.info(f"Companding: {args.companding}")
if args.keep_original:
logger.info("์›๋ณธ ํŒŒ์ผ๋„ ํ•จ๊ป˜ ๋ณต์‚ฌํ•ฉ๋‹ˆ๋‹ค")
# ์‹œ๋ฎฌ๋ ˆ์ดํ„ฐ ์ƒ์„ฑ
companding = CompandingType(args.companding)
simulator = PhoneSimulator(companding=companding)
# ์ผ๊ด„ ์ฒ˜๋ฆฌ
success = 0
fail = 0
for i, audio_file in enumerate(audio_files, 1):
logger.info(f"[{i}/{len(audio_files)}] {audio_file.name}")
if process_file(audio_file, output_dir, simulator, input_root, args.keep_original):
success += 1
else:
fail += 1
logger.info(f"์™„๋ฃŒ: ์„ฑ๊ณต {success}, ์‹คํŒจ {fail}, ์ „์ฒด {len(audio_files)}")
if __name__ == "__main__":
main()