|
|
|
|
|
""" |
|
|
Split audio: Split 100 songs into Chinese and English directories |
|
|
Usage: python split_audio.py <input_directory> <output_directory> |
|
|
Example: python split_audio.py /path/to/audio /path/to/output |
|
|
Input: 1-50 Chinese, 51-100 English |
|
|
Output: model_cn/ (renumbered to 0-49), model_en/ (renumbered to 0-49) |
|
|
Output numbering aligns with GT file_index |
|
|
""" |
|
|
import os, re, shutil, argparse |
|
|
from pathlib import Path |
|
|
|
|
|
def extract_idx(filename): |
|
|
matches = re.findall(r'\d+', os.path.splitext(filename)[0]) |
|
|
return int(matches[-1]) if matches else None |
|
|
|
|
|
def split(src, dst): |
|
|
src, dst = Path(src), Path(dst) |
|
|
name = src.name |
|
|
cn_dir, en_dir = dst / f"{name}_cn", dst / f"{name}_en" |
|
|
cn_dir.mkdir(parents=True, exist_ok=True) |
|
|
en_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
for f in sorted(src.glob("*.*")): |
|
|
if f.suffix.lower() not in ['.wav', '.mp3']: continue |
|
|
idx = extract_idx(f.name) |
|
|
if idx is None: continue |
|
|
|
|
|
if 1 <= idx <= 50: |
|
|
|
|
|
shutil.copy2(f, cn_dir / f"{idx-1:06d}{f.suffix}") |
|
|
elif 51 <= idx <= 100: |
|
|
|
|
|
shutil.copy2(f, en_dir / f"{idx-51:06d}{f.suffix}") |
|
|
|
|
|
print(f"Split {name} -> {cn_dir.name}, {en_dir.name}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("input_dir") |
|
|
parser.add_argument("output_dir") |
|
|
args = parser.parse_args() |
|
|
split(args.input_dir, args.output_dir) |
|
|
|