src/VoiceDialogue/models/voice_model.py · MoYoYoTech/VoiceDialogue at 0df78b6d6494ebd1788c92ad5de337c4debabc16

VoiceDialogue / src /VoiceDialogue /models /voice_model.py

liumaolin

First commit.

7b64dcd 11 months ago

23.2 kB

	import enum
	import typing
	from concurrent.futures.thread import ThreadPoolExecutor
	from pathlib import Path

	from pydantic import BaseModel

	from config.settings import settings
	from utils.download_utils import download_file_from_huggingface

	# 基础预训练模型文件映射
	BASE_PRETRAINED_FILES = {
	'chinese-hubert-base/config.json': 'chinese-hubert-base/config.json',
	'chinese-hubert-base/preprocessor_config.json': 'chinese-hubert-base/preprocessor_config.json',
	'chinese-hubert-base/pytorch_model.bin': 'chinese-hubert-base/pytorch_model.bin',
	'chinese-roberta-wwm-ext-large/config.json': 'chinese-roberta-wwm-ext-large/config.json',
	'chinese-roberta-wwm-ext-large/pytorch_model.bin': 'chinese-roberta-wwm-ext-large/pytorch_model.bin',
	'chinese-roberta-wwm-ext-large/tokenizer.json': 'chinese-roberta-wwm-ext-large/tokenizer.json',
	}

	# 声音模型配置
	VOICE_MODEL_CONFIGS = (
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Luo Xiang',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/luoxiang.png',
	'description': '',
	'file_size': '240M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/luoxiang_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/luoxiang_best_sovits.pth',
	'reference_audio': 'ref_audios/luoxiang_ref.wav',
	'prompt_semantic': 'prompt_semantic/luoxiang_prompt_semantic.pt',
	'reference_spec': 'refer_spec/luoxiang_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "复杂的问题背后也许没有统一的答案，选择站在正方还是反方，其实取决于你对一系列价值判断的回答。",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Ma Baoguo',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mabaoguo.png',
	'description': '',
	'file_size': '241M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/mabaoguo_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/mabaoguo_best_sovits.pth',
	'reference_audio': 'ref_audios/mabaoguo_ref.wav',
	'prompt_semantic': 'prompt_semantic/mabaoguo_prompt_semantic.pt',
	'reference_spec': 'refer_spec/mabaoguo_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "当他弄清为什么我能打出这个五连鞭，他们打不出来的时候。",
	# 'prompt_text': "",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Shen Yi',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/shenyi.png',
	'description': '',
	'file_size': '241M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/shenyi_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/shenyi_best_sovits.pth',
	'reference_audio': 'ref_audios/shenyi_ref.wav',
	'prompt_semantic': 'prompt_semantic/shenyi_prompt_semantic.pt',
	'reference_spec': 'refer_spec/shenyi_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "这事情本身在我看来其实挺莫名的, 啊我不太可能后面有机会还去寻求一下这个解释说。",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Yang Mi',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/yangmi.png',
	'description': '',
	'file_size': '241M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/yangmi_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/yangmi_best_sovits.pth',
	'reference_audio': 'ref_audios/yangmi_ref.wav',
	'prompt_semantic': 'prompt_semantic/yangmi_prompt_semantic.pt',
	'reference_spec': 'refer_spec/yangmi_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "你谁知道, 人生只有一次啊. 你怎么知道那样选, 你当下来说, 应该那样选. 为什么没那样选呢? 但你今天这样选了呀.",
	# 'prompt_text': "",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Zhou Jielun',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/zhoujielun.png',
	'description': '',
	'file_size': '241M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/zhoujielun_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/zhoujielun_best_sovits.pth',
	'reference_audio': 'ref_audios/zhoujielun_ref.wav',
	'prompt_semantic': 'prompt_semantic/zhoujielun_prompt_semantic.pt',
	'reference_spec': 'refer_spec/zhoujielun_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "其实我我现在讲的这些奥，都是我未来成功的一些关键。",
	# 'prompt_text': "",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	{
	'repository': 'MoYoYoTech/tone-models',
	'character_name': 'Ma Yun',
	'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mayun.png',
	'description': '',
	'file_size': '241M',
	'is_chinese_voice': True,
	'model_files': {
	**BASE_PRETRAINED_FILES,
	'gpt-weights': 'GPT_weights/mayun_best_gpt.ckpt',
	'sovits-weights': 'SoVITS_weights/mayun_best_sovits.pth',
	'reference_audio': 'ref_audios/mayun_ref.wav',
	'prompt_semantic': 'prompt_semantic/mayun_prompt_semantic.pt',
	'reference_spec': 'refer_spec/mayun_spec.pt',
	},
	'inference_parameters': {
	'text_lang': "zh",
	'prompt_text': "这是我们最大的希望能招聘的到人。所以今天阿里巴巴公司内部，我自己这么觉得，人才梯队的建设非常之好。",
	# 'prompt_text': "",
	'prompt_lang': "zh",
	'top_k': 5,
	'top_p': 1,
	'temperature': 1,
	'text_split_method': "cut3",
	'batch_size': 100,
	'speed_factor': 1.1,
	'split_bucket': True,
	'return_fragment': False,
	'fragment_interval': 0.07,
	'seed': 233333,
	},
	'conversation_templates': {
	"opening_remarks": [
	"To start off, I just want to say that it’s nice to be talking to you here today.",
	"Before we begin here today, I should say that it’s nice to meet you.",
	"First off, I just wanted to thank you for coming out and contributing a question.",
	"Great to be here with you. I’m looking forward to a fantastic discussion.",
	"Hey, how’s it going? We’ve got some important things to cover today.",
	"Good to be here. We’ve got a lot of important topics to discuss."
	],
	"mid_responses": [
	"Okay, you've got something on your mind, and that's why we're here, isn't it?",
	"More and more people are asking about this, and I’ve got somthing on my mind.",
	"Everybody's talking about this, and frankly, they're right to talk about it.",
	"Well, you've brought something to the table, and that's what dialogue is all about."
	]
	}
	},
	# {
	# 'repository': 'MoYoYoTech/gpt-sovits-models',
	# 'character_name': 'ShenTeng',
	# 'cover_image': '',
	# 'description': '',
	# 'file_size': '240M',
	# 'is_chinese_voice': True,
	# 'model_files': {
	# 'gpt-weights': 'GPT_weights/shenteng_best_gpt.ckpt',
	# 'sovits-weights': 'SoVITS_weights/shenteng_best_sovits.pth',
	# 'prompt_semantic_path': 'shenteng_prompt_semantic.pt',
	# 'refer_spepc_path': 'shenteng_spec.pt',
	# 'text_features_path': 'text_features.pth',
	# 'reference_audio': '',
	# 'bert_base_path': 'chinese-roberta-wwm-ext-large'
	# },
	# 'inference_parameters': {
	# 'text_lang': "zh",
	# 'prompt_text': "",
	# 'prompt_lang': "zh",
	# 'top_k': 5,
	# 'top_p': 1,
	# 'temperature': 1,
	# 'text_split_method': "cut3",
	# 'batch_size': 100,
	# 'speed_factor': 1.0,
	# 'split_bucket': True,
	# 'return_fragment': False,
	# 'fragment_interval': 0.07,
	# 'seed': 233333,
	# },
	# 'conversation_templates': {
	# "opening_remarks": [
	# "To start off, I just want to say that it’s nice to be talking to you here today.",
	# "Before we begin here today, I should say that it’s nice to meet you.",
	# "First off, I just wanted to thank you for coming out and contributing a question.",
	# "Great to be here with you. I’m looking forward to a fantastic discussion.",
	# "Hey, how’s it going? We’ve got some important things to cover today.",
	# "Good to be here. We’ve got a lot of important topics to discuss."
	# ],
	# "mid_responses": [
	# "Okay, you've got something on your mind, and that's why we're here, isn't it?",
	# "More and more people are asking about this, and I’ve got somthing on my mind.",
	# "Everybody's talking about this, and frankly, they're right to talk about it.",
	# "Well, you've brought something to the table, and that's what dialogue is all about."
	# ]
	# }
	# },
	)


	class VoiceModelStatus(enum.Enum):
	"""声音模型状态枚举"""
	NOT_DOWNLOADED = 'not_downloaded'
	DOWNLOADING = 'downloading'
	DOWNLOADED = 'downloaded'
	FAILED = 'failed'


	class ConversationTemplates(BaseModel):
	"""对话模板"""
	opening_remarks: list[str]
	mid_responses: list[str]


	class VoiceModel(BaseModel):
	"""声音模型配置类"""
	repository: str
	character_name: str
	cover_image: str
	description: str
	file_size: str
	is_chinese_voice: bool
	model_files: dict[str, str]
	inference_parameters: dict[str, typing.Any]
	conversation_templates: ConversationTemplates

	_download_status: VoiceModelStatus = VoiceModelStatus.NOT_DOWNLOADED

	@property
	def download_status(self) -> VoiceModelStatus:
	"""获取下载状态"""
	if self.is_model_complete:
	return VoiceModelStatus.DOWNLOADED
	return self._download_status

	@download_status.setter
	def download_status(self, status: VoiceModelStatus):
	"""设置下载状态"""
	self._download_status = status

	@property
	def model_storage_path(self) -> Path:
	"""获取模型存储路径"""
	storage_path = settings.paths.AUDIO_MODELS_DIR / self.repository
	storage_path.mkdir(parents=True, exist_ok=True)
	return storage_path

	@property
	def is_model_complete(self) -> bool:
	"""检查模型文件是否完整"""
	for model_file in self.model_files.values():
	file_path = self.model_storage_path / model_file
	if not file_path.exists():
	return False
	return True

	def download_model(self, progress_callback: typing.Callable = None):
	"""下载模型"""
	self.download_status = VoiceModelStatus.DOWNLOADING

	try:
	self._download_model_files(progress_callback)
	self.download_status = VoiceModelStatus.DOWNLOADED
	except Exception:
	self.download_status = VoiceModelStatus.FAILED
	raise

	def _download_model_files(self, progress_callback: typing.Callable = None):
	"""从HuggingFace下载模型文件"""
	with ThreadPoolExecutor() as executor:
	for model_file in self.model_files.values():
	executor.submit(
	download_file_from_huggingface,
	self.model_storage_path,
	self.repository,
	model_file
	)

	if progress_callback:
	progress_callback()

	def delete_model(self):
	"""删除模型核心文件"""
	core_files = ['gpt-weights', 'sovits-weights']
	for file_key in core_files:
	file_path = self.model_storage_path / self.model_files.get(file_key, '')
	if file_path.is_file():
	file_path.unlink()
	elif file_path.is_dir():
	file_path.rmdir()
	self.download_status = VoiceModelStatus.NOT_DOWNLOADED

	# 模型文件路径属性
	@property
	def gpt_weights_path(self) -> Path:
	"""GPT权重文件路径"""
	return self.model_storage_path / self.model_files.get('gpt-weights', '')

	@property
	def sovits_weights_path(self) -> Path:
	"""SoVITS权重文件路径"""
	return self.model_storage_path / self.model_files.get('sovits-weights', '')

	@property
	def hubert_model_path(self) -> Path:
	"""中文HuBERT模型路径"""
	return self.model_storage_path / 'chinese-hubert-base'

	@property
	def bert_model_path(self) -> Path:
	"""中文BERT模型路径"""
	return self.model_storage_path / 'chinese-roberta-wwm-ext-large'

	@property
	def reference_audio_path(self) -> Path:
	"""参考音频文件路径"""
	return self.model_storage_path / self.model_files.get('reference_audio', '')

	@property
	def prompt_semantic_path(self) -> Path:
	"""提示语义文件路径"""
	return self.model_storage_path / self.model_files.get('prompt_semantic', '')

	@property
	def reference_spec_path(self) -> Path:
	"""参考频谱文件路径"""
	return self.model_storage_path / self.model_files.get('reference_spec', '')


	class VoiceModelRegistry:
	"""声音模型注册表"""
	_registered_models: dict[str, VoiceModel] = {}

	@classmethod
	def register_models(cls, model_configs: list[dict]) -> list[VoiceModel]:
	"""从配置注册模型"""
	registered_models = []

	for config in model_configs:
	repository = config.get('repository', '')
	character_name = config.get('character_name', '')
	model_key = f'{repository}:{character_name}'

	voice_model = VoiceModel(**config)
	cls._registered_models[model_key] = voice_model
	registered_models.append(voice_model)

	return registered_models

	@classmethod
	def get_model(cls, repository: str, character_name: str) -> VoiceModel:
	"""获取指定模型"""
	model_key = f'{repository}:{character_name}'
	return cls._registered_models.get(model_key)

	@classmethod
	def get_all_models(cls) -> list[VoiceModel]:
	"""获取所有注册的模型"""
	return list(cls._registered_models.values())

	@classmethod
	def get_version(cls) -> str:
	"""获取模型版本"""
	return 'v2'


	# 全局声音模型注册表实例
	voice_model_registry = VoiceModelRegistry.register_models(VOICE_MODEL_CONFIGS)