File size: 4,014 Bytes
a257816 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os
import sys
import time
import click
import subprocess
from loguru import logger
from rich.table import Table
from rich.console import Console
from VietTTS.tts import TTS
from VietTTS.utils.file_utils import load_prompt_speech_from_file, load_voices
AUDIO_DIR = 'samples'
MODEL_DIR = 'pretrained-models'
@click.command('server')
@click.option('-h', '--host', type=str, default='0.0.0.0', help="The host address to bind the server to. Default is '0.0.0.0'.")
@click.option('-p', '--port', type=int, default=8298, help="The port number to bind the server to. Default is 8298.")
@click.option('-w', '--workers', type=int, default=1, help="The number of worker processes to handle requests. Default is 1.")
def start_server(host: str, port: int, workers: int):
"""Start API server (OpenAI TTS API compatible).
Usage: viettts server --host 0.0.0.0 --port 8298 -w 4
"""
logger.info("Starting server")
cmd = f'gunicorn viettts.server:app \
-k uvicorn.workers.UvicornWorker \
--bind {host}:{port} \
--workers {workers} \
--max-requests 1000 \
--max-requests-jitter 50 \
--timeout 300 \
--keep-alive 75 \
--graceful-timeout 60'
subprocess.call(cmd, shell=True, stdout=sys.stdout)
@click.command('synthesis')
@click.option('-t', "--text", type=str, required=True, help="The input text to synthesize into speech.")
@click.option('-v', "--voice", type=str, default='1', help="The voice ID or file path to clone the voice from. Default is '1'.")
@click.option('-s', "--speed", type=float, default=1, help="The speed multiplier for the speech. Default is 1 (normal speed).")
@click.option('-o', "--output", type=str, default='output.wav', help="The file path to save the synthesized audio. Default is 'output.wav'.")
def synthesis(text: str, voice: str, speed: float, output: str):
"""Synthesis audio from text and save to file.
Usage: viettts synthesis --text 'Xin chào VietTTS' --voice nu-nhe-nhang --voice 8 --speed 1.2 --output test_nu-nhe-nhang.wav
"""
logger.info("Starting synthesis")
st = time.perf_counter()
if not text:
logger.error('text must not empty')
return
if speed > 2 or speed < 0.5:
logger.error(f'speed must in range 0.5-2.0')
return
if not os.path.exists(voice):
voice_map = load_voices(AUDIO_DIR)
if voice.isdigit():
voice = list(voice_map.values())[int(voice)]
else:
voice = voice_map.get(voice)
if not os.path.exists(voice):
logger.error(f'voice is not available. Use --voice <voice-name/voice-id/local-file> or run `viettts show-voices` to get available voices.')
return
logger.info('Loading model')
tts = TTS(model_dir=MODEL_DIR)
logger.info('Loading voice')
voice = load_prompt_speech_from_file(voice)
logger.info('Processing')
tts.tts_to_file(text, voice, speed, output)
et = time.perf_counter()
logger.success(f"Saved to: {output} [time cost={et-st:.2f}s]")
@click.command('show-voices')
def show_voice():
"""Print all available voices.
Usage: viettts show-voices
"""
voice_map = load_voices(AUDIO_DIR)
console = Console()
table = Table(show_header=True, header_style="green", show_lines=False)
table.add_column("Voice ID", width=10)
table.add_column("Voice Name", width=30)
table.add_column("File", justify="left")
for i, (voice_name, voice_path) in enumerate(voice_map.items()):
table.add_row(str(i+1), voice_name, voice_path)
console.print(table)
@click.group()
def cli():
"""
VietTTS CLI v0.1.0
Vietnamese Text To Speech and Voice Clone
License: Apache 2.0 - Author: <dangvansam dangvansam98@gmail.com>
"""
pass
cli.add_command(start_server)
cli.add_command(synthesis)
cli.add_command(show_voice) |