|
|
import soundfile as sf |
|
|
import numpy as np |
|
|
from voxcpm import VoxCPM |
|
|
import argparse |
|
|
import os |
|
|
def main(): |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("--model_path", type=str, required=True) |
|
|
parser.add_argument("--text", type=str) |
|
|
parser.add_argument("--text_file", type=str) |
|
|
parser.add_argument("--output_dir", type=str, default="outputs") |
|
|
parser.add_argument("--cfg_value", type=float, default=2.0) |
|
|
parser.add_argument("--inference_timesteps", type=int, default=10) |
|
|
parser.add_argument("--prompt_wav_path", type=str) |
|
|
parser.add_argument("--prompt_text", type=str) |
|
|
args = parser.parse_args() |
|
|
assert args.text or args.text_file, "Please provide either text or text_file" |
|
|
|
|
|
if args.prompt_wav_path or args.prompt_text: |
|
|
assert args.prompt_wav_path and args.prompt_text, "Please provide both prompt_wav_path and prompt_text" |
|
|
model = VoxCPM.from_pretrained(args.model_path, load_denoiser=False) |
|
|
if args.text: |
|
|
wav = model.generate( |
|
|
text=args.text, |
|
|
prompt_wav_path=args.prompt_wav_path, |
|
|
prompt_text=args.prompt_text, |
|
|
cfg_value=args.cfg_value, |
|
|
inference_timesteps=args.inference_timesteps, |
|
|
normalize=True, |
|
|
denoise=False, |
|
|
retry_badcase=True, |
|
|
retry_badcase_max_times=3, |
|
|
retry_badcase_ratio_threshold=6.0, |
|
|
) |
|
|
if not os.path.exists(args.output_dir): |
|
|
os.makedirs(args.output_dir) |
|
|
sf.write(f"{args.output_dir}/output.wav", wav, 16000) |
|
|
print(f"saved: {args.output_dir}/output.wav") |
|
|
elif args.text_file: |
|
|
texts = [] |
|
|
with open(args.text_file, "r") as f: |
|
|
lines = f.readlines() |
|
|
for line in lines: |
|
|
line = line.strip().split("||") |
|
|
wav_id = line[0] |
|
|
text = " ".join(line[1:]) |
|
|
texts.append((wav_id, text)) |
|
|
for wav_id, text in texts: |
|
|
wav = model.generate( |
|
|
text=text, |
|
|
prompt_wav_path=args.prompt_wav_path, |
|
|
prompt_text=args.prompt_text, |
|
|
cfg_value=args.cfg_value, |
|
|
inference_timesteps=args.inference_timesteps, |
|
|
normalize=True, |
|
|
denoise=False, |
|
|
retry_badcase=True, |
|
|
retry_badcase_max_times=3, |
|
|
retry_badcase_ratio_threshold=6.0, |
|
|
) |
|
|
if not os.path.exists(args.output_dir): |
|
|
os.makedirs(args.output_dir) |
|
|
sf.write(f"{args.output_dir}/{wav_id}.wav", wav, 16000) |
|
|
print(f"saved: {args.output_dir}/{wav_id}.wav") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|