phonikud-experiments / ablation /multi_piper.py
thewh1teagle
added files of wav
9d81a18
"""
uv run multi_piper.py --input ../phonikud-experiments-checkpoints/phonikud_enhanced/ --csv ./saspeech_male_phonikud.csv --out ./phonikud_enhanced_out --config ./config/model.config.json
uv run multi_piper.py --input ../phonikud-experiments-checkpoints/phonikud_vocalized/ --csv ./saspeech_male_phonikud.csv --out ./phonikud_vocalized_out --config ./config/model.config.json
uv run multi_piper.py --input ../phonikud-experiments-checkpoints/vocalized_mock/ --csv ./saspeech_vocalized_mock_phonemes.csv --out ./vocalized_mock_out --config ./config/model.config.json
uv run multi_piper.py --input ../phonikud-experiments-checkpoints/unvocalized_mock/ --csv ./saspeech_unvocalized_mock_phonemes.csv --out ./unvocalized_mock_out --config ./config/model.config.json
"""
import pandas as pd
from pathlib import Path
import soundfile as sf
from piper_onnx import Piper
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True) # input folder with onnx models
parser.add_argument('--csv', required=True) # path to csv with the phonemes
parser.add_argument('--output', help='output dir', required=True) # output folder to put the reports
parser.add_argument('--config', required=True) # path to piper model config
args = parser.parse_args()
input_path = Path(args.input)
output_path = Path(args.output)
piper_config_path = Path(args.config)
csv_path = Path(args.csv)
# Set up paths
output_path.mkdir(exist_ok=True)
onnx_models = input_path.glob('*.onnx')
for model_path in onnx_models:
piper = Piper(model_path, piper_config_path, providers=['CPUExecutionProvider', 'CUDAExecutionProvider'])
# Load CSV
df = pd.read_csv(csv_path, sep=',', header=None, names=['file_id', 'text', 'phonemes'], index_col=False)
df = df.sort_values(by='file_id').reset_index(drop=True)
# Generate audio
for _, row in df.iterrows():
file_id = row['file_id']
_text = row['text']
phonemes = row['phonemes']
breakpoint()
samples, sample_rate = piper.create(phonemes, is_phonemes = True, length_scale = 1.2) # noise_w=0.8, noise_scale=0.667
checkpoint_wav_folder = output_path / model_path.stem
checkpoint_wav_folder.mkdir(exist_ok=True, parents=True)
file_path = checkpoint_wav_folder / f"{file_id}.wav"
sf.write(file_path, samples, sample_rate)
print(f"Saved {file_path} ({len(samples)/sample_rate:.2f}s)")
print("Done.")