Spaces:

aoxiang1221
/

gpt-sovits

Sleeping

gpt-sovits / Synthesizers /base /Base_TTS_Synthesizer.py

nekoaoxiang

添加代码至 df4c937

558c90a almost 2 years ago

3.67 kB

	from abc import ABC, abstractmethod

	from .Base_TTS_Task import Base_TTS_Task as TTS_Task
	import json
	from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple
	from pydantic import BaseModel, Field, model_validator
	import numpy as np
	from abc import ABC, abstractmethod
	from typing import Dict, List, Union, Generator, Tuple
	from typing_extensions import Literal
	import numpy as np
	import wave,io

	class Base_TTS_Synthesizer(ABC):
	"""
	Abstract base class for a Text-To-Speech (TTS) synthesizer.

	Attributes:
	ui_config (Dict[str, List]): A dictionary containing UI configuration settings.
	debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information.

	"""

	ui_config: Dict[str, List] = {}
	debug_mode: bool = False

	def __init__(self, **kwargs):
	"""
	Initializes the TTS synthesizer with optional UI configurations and debug mode setting.

	Args:
	ui_config (Dict[str, List], optional): Configuration for user interface settings.
	debug_mode (bool, optional): Enables or disables debug mode.

	"""
	self.ui_config = kwargs.get("ui_config", {})
	self.debug_mode = kwargs.get("debug_mode", False)

	@abstractmethod
	def generate(
	self,
	task: TTS_Task,
	return_type: Literal["filepath", "numpy"] = "numpy",
	save_path: Optional[str] = None,
	) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]:
	"""
	Generates speech from a given TTS task.

	Args:
	task (TTS_Task): The task containing data and parameters for speech synthesis.
	return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data.
	save_path (str, optional): The path to save the audio file.
	Returns:
	Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types.

	"""
	pass

	@abstractmethod
	def get_characters(self):
	"""
	Retrieves the available characters and their emotions for the TTS.

	Returns:
	Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions.
	"""
	pass

	@abstractmethod
	def params_parser(self, data):
	"""
	Parses input data into a TTS_Task.

	Args:
	data (Any): The raw input data to be parsed.

	Returns:
	TTS_Task: A TTS task object created from the input data.
	"""
	pass

	@abstractmethod
	def ms_like_parser(self, data):
	"""
	Parses input data in a Microsoft-like format into a TTS_Task.

	Args:
	data (Any): The raw input data to be parsed.

	Returns:
	TTS_Task: A TTS task object created from the Microsoft-like formatted input data.
	"""
	pass


	def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2):
	"""
	Generate a wave header with no data.

	Args:
	sample_rate (int): The sample rate of the audio.
	channels (int, optional): The number of audio channels. Defaults to 1.
	sample_width (int, optional): The sample width in bytes. Defaults to 2.

	Returns:
	bytes: The wave header as bytes.
	"""
	wav_buf = io.BytesIO()
	with wave.open(wav_buf, "wb") as vfout:
	vfout.setnchannels(channels)
	vfout.setsampwidth(sample_width)
	vfout.setframerate(sample_rate)

	wav_buf.seek(0)
	return wav_buf.read()