VoiceDialogue / src /VoiceDialogue /models /voice_model.py
liumaolin
First commit.
7b64dcd
raw
history blame
23.2 kB
import enum
import typing
from concurrent.futures.thread import ThreadPoolExecutor
from pathlib import Path
from pydantic import BaseModel
from config.settings import settings
from utils.download_utils import download_file_from_huggingface
# 基础预训练模型文件映射
BASE_PRETRAINED_FILES = {
'chinese-hubert-base/config.json': 'chinese-hubert-base/config.json',
'chinese-hubert-base/preprocessor_config.json': 'chinese-hubert-base/preprocessor_config.json',
'chinese-hubert-base/pytorch_model.bin': 'chinese-hubert-base/pytorch_model.bin',
'chinese-roberta-wwm-ext-large/config.json': 'chinese-roberta-wwm-ext-large/config.json',
'chinese-roberta-wwm-ext-large/pytorch_model.bin': 'chinese-roberta-wwm-ext-large/pytorch_model.bin',
'chinese-roberta-wwm-ext-large/tokenizer.json': 'chinese-roberta-wwm-ext-large/tokenizer.json',
}
# 声音模型配置
VOICE_MODEL_CONFIGS = (
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Luo Xiang',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/luoxiang.png',
'description': '',
'file_size': '240M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/luoxiang_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/luoxiang_best_sovits.pth',
'reference_audio': 'ref_audios/luoxiang_ref.wav',
'prompt_semantic': 'prompt_semantic/luoxiang_prompt_semantic.pt',
'reference_spec': 'refer_spec/luoxiang_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "复杂的问题背后也许没有统一的答案,选择站在正方还是反方,其实取决于你对一系列价值判断的回答。",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Ma Baoguo',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mabaoguo.png',
'description': '',
'file_size': '241M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/mabaoguo_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/mabaoguo_best_sovits.pth',
'reference_audio': 'ref_audios/mabaoguo_ref.wav',
'prompt_semantic': 'prompt_semantic/mabaoguo_prompt_semantic.pt',
'reference_spec': 'refer_spec/mabaoguo_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "当他弄清为什么我能打出这个五连鞭,他们打不出来的时候。",
# 'prompt_text': "",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Shen Yi',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/shenyi.png',
'description': '',
'file_size': '241M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/shenyi_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/shenyi_best_sovits.pth',
'reference_audio': 'ref_audios/shenyi_ref.wav',
'prompt_semantic': 'prompt_semantic/shenyi_prompt_semantic.pt',
'reference_spec': 'refer_spec/shenyi_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "这事情本身在我看来其实挺莫名的, 啊我不太可能后面有机会还去寻求一下这个解释说。",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Yang Mi',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/yangmi.png',
'description': '',
'file_size': '241M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/yangmi_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/yangmi_best_sovits.pth',
'reference_audio': 'ref_audios/yangmi_ref.wav',
'prompt_semantic': 'prompt_semantic/yangmi_prompt_semantic.pt',
'reference_spec': 'refer_spec/yangmi_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "你谁知道, 人生只有一次啊. 你怎么知道那样选, 你当下来说, 应该那样选. 为什么没那样选呢? 但你今天这样选了呀.",
# 'prompt_text': "",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Zhou Jielun',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/zhoujielun.png',
'description': '',
'file_size': '241M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/zhoujielun_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/zhoujielun_best_sovits.pth',
'reference_audio': 'ref_audios/zhoujielun_ref.wav',
'prompt_semantic': 'prompt_semantic/zhoujielun_prompt_semantic.pt',
'reference_spec': 'refer_spec/zhoujielun_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "其实我我现在讲的这些奥,都是我未来成功的一些关键。",
# 'prompt_text': "",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
{
'repository': 'MoYoYoTech/tone-models',
'character_name': 'Ma Yun',
'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mayun.png',
'description': '',
'file_size': '241M',
'is_chinese_voice': True,
'model_files': {
**BASE_PRETRAINED_FILES,
'gpt-weights': 'GPT_weights/mayun_best_gpt.ckpt',
'sovits-weights': 'SoVITS_weights/mayun_best_sovits.pth',
'reference_audio': 'ref_audios/mayun_ref.wav',
'prompt_semantic': 'prompt_semantic/mayun_prompt_semantic.pt',
'reference_spec': 'refer_spec/mayun_spec.pt',
},
'inference_parameters': {
'text_lang': "zh",
'prompt_text': "这是我们最大的希望能招聘的到人。所以今天阿里巴巴公司内部,我自己这么觉得,人才梯队的建设非常之好。",
# 'prompt_text': "",
'prompt_lang': "zh",
'top_k': 5,
'top_p': 1,
'temperature': 1,
'text_split_method': "cut3",
'batch_size': 100,
'speed_factor': 1.1,
'split_bucket': True,
'return_fragment': False,
'fragment_interval': 0.07,
'seed': 233333,
},
'conversation_templates': {
"opening_remarks": [
"To start off, I just want to say that it’s nice to be talking to you here today.",
"Before we begin here today, I should say that it’s nice to meet you.",
"First off, I just wanted to thank you for coming out and contributing a question.",
"Great to be here with you. I’m looking forward to a fantastic discussion.",
"Hey, how’s it going? We’ve got some important things to cover today.",
"Good to be here. We’ve got a lot of important topics to discuss."
],
"mid_responses": [
"Okay, you've got something on your mind, and that's why we're here, isn't it?",
"More and more people are asking about this, and I’ve got somthing on my mind.",
"Everybody's talking about this, and frankly, they're right to talk about it.",
"Well, you've brought something to the table, and that's what dialogue is all about."
]
}
},
# {
# 'repository': 'MoYoYoTech/gpt-sovits-models',
# 'character_name': 'ShenTeng',
# 'cover_image': '',
# 'description': '',
# 'file_size': '240M',
# 'is_chinese_voice': True,
# 'model_files': {
# 'gpt-weights': 'GPT_weights/shenteng_best_gpt.ckpt',
# 'sovits-weights': 'SoVITS_weights/shenteng_best_sovits.pth',
# 'prompt_semantic_path': 'shenteng_prompt_semantic.pt',
# 'refer_spepc_path': 'shenteng_spec.pt',
# 'text_features_path': 'text_features.pth',
# 'reference_audio': '',
# 'bert_base_path': 'chinese-roberta-wwm-ext-large'
# },
# 'inference_parameters': {
# 'text_lang': "zh",
# 'prompt_text': "",
# 'prompt_lang': "zh",
# 'top_k': 5,
# 'top_p': 1,
# 'temperature': 1,
# 'text_split_method': "cut3",
# 'batch_size': 100,
# 'speed_factor': 1.0,
# 'split_bucket': True,
# 'return_fragment': False,
# 'fragment_interval': 0.07,
# 'seed': 233333,
# },
# 'conversation_templates': {
# "opening_remarks": [
# "To start off, I just want to say that it’s nice to be talking to you here today.",
# "Before we begin here today, I should say that it’s nice to meet you.",
# "First off, I just wanted to thank you for coming out and contributing a question.",
# "Great to be here with you. I’m looking forward to a fantastic discussion.",
# "Hey, how’s it going? We’ve got some important things to cover today.",
# "Good to be here. We’ve got a lot of important topics to discuss."
# ],
# "mid_responses": [
# "Okay, you've got something on your mind, and that's why we're here, isn't it?",
# "More and more people are asking about this, and I’ve got somthing on my mind.",
# "Everybody's talking about this, and frankly, they're right to talk about it.",
# "Well, you've brought something to the table, and that's what dialogue is all about."
# ]
# }
# },
)
class VoiceModelStatus(enum.Enum):
"""声音模型状态枚举"""
NOT_DOWNLOADED = 'not_downloaded'
DOWNLOADING = 'downloading'
DOWNLOADED = 'downloaded'
FAILED = 'failed'
class ConversationTemplates(BaseModel):
"""对话模板"""
opening_remarks: list[str]
mid_responses: list[str]
class VoiceModel(BaseModel):
"""声音模型配置类"""
repository: str
character_name: str
cover_image: str
description: str
file_size: str
is_chinese_voice: bool
model_files: dict[str, str]
inference_parameters: dict[str, typing.Any]
conversation_templates: ConversationTemplates
_download_status: VoiceModelStatus = VoiceModelStatus.NOT_DOWNLOADED
@property
def download_status(self) -> VoiceModelStatus:
"""获取下载状态"""
if self.is_model_complete:
return VoiceModelStatus.DOWNLOADED
return self._download_status
@download_status.setter
def download_status(self, status: VoiceModelStatus):
"""设置下载状态"""
self._download_status = status
@property
def model_storage_path(self) -> Path:
"""获取模型存储路径"""
storage_path = settings.paths.AUDIO_MODELS_DIR / self.repository
storage_path.mkdir(parents=True, exist_ok=True)
return storage_path
@property
def is_model_complete(self) -> bool:
"""检查模型文件是否完整"""
for model_file in self.model_files.values():
file_path = self.model_storage_path / model_file
if not file_path.exists():
return False
return True
def download_model(self, progress_callback: typing.Callable = None):
"""下载模型"""
self.download_status = VoiceModelStatus.DOWNLOADING
try:
self._download_model_files(progress_callback)
self.download_status = VoiceModelStatus.DOWNLOADED
except Exception:
self.download_status = VoiceModelStatus.FAILED
raise
def _download_model_files(self, progress_callback: typing.Callable = None):
"""从HuggingFace下载模型文件"""
with ThreadPoolExecutor() as executor:
for model_file in self.model_files.values():
executor.submit(
download_file_from_huggingface,
self.model_storage_path,
self.repository,
model_file
)
if progress_callback:
progress_callback()
def delete_model(self):
"""删除模型核心文件"""
core_files = ['gpt-weights', 'sovits-weights']
for file_key in core_files:
file_path = self.model_storage_path / self.model_files.get(file_key, '')
if file_path.is_file():
file_path.unlink()
elif file_path.is_dir():
file_path.rmdir()
self.download_status = VoiceModelStatus.NOT_DOWNLOADED
# 模型文件路径属性
@property
def gpt_weights_path(self) -> Path:
"""GPT权重文件路径"""
return self.model_storage_path / self.model_files.get('gpt-weights', '')
@property
def sovits_weights_path(self) -> Path:
"""SoVITS权重文件路径"""
return self.model_storage_path / self.model_files.get('sovits-weights', '')
@property
def hubert_model_path(self) -> Path:
"""中文HuBERT模型路径"""
return self.model_storage_path / 'chinese-hubert-base'
@property
def bert_model_path(self) -> Path:
"""中文BERT模型路径"""
return self.model_storage_path / 'chinese-roberta-wwm-ext-large'
@property
def reference_audio_path(self) -> Path:
"""参考音频文件路径"""
return self.model_storage_path / self.model_files.get('reference_audio', '')
@property
def prompt_semantic_path(self) -> Path:
"""提示语义文件路径"""
return self.model_storage_path / self.model_files.get('prompt_semantic', '')
@property
def reference_spec_path(self) -> Path:
"""参考频谱文件路径"""
return self.model_storage_path / self.model_files.get('reference_spec', '')
class VoiceModelRegistry:
"""声音模型注册表"""
_registered_models: dict[str, VoiceModel] = {}
@classmethod
def register_models(cls, model_configs: list[dict]) -> list[VoiceModel]:
"""从配置注册模型"""
registered_models = []
for config in model_configs:
repository = config.get('repository', '')
character_name = config.get('character_name', '')
model_key = f'{repository}:{character_name}'
voice_model = VoiceModel(**config)
cls._registered_models[model_key] = voice_model
registered_models.append(voice_model)
return registered_models
@classmethod
def get_model(cls, repository: str, character_name: str) -> VoiceModel:
"""获取指定模型"""
model_key = f'{repository}:{character_name}'
return cls._registered_models.get(model_key)
@classmethod
def get_all_models(cls) -> list[VoiceModel]:
"""获取所有注册的模型"""
return list(cls._registered_models.values())
@classmethod
def get_version(cls) -> str:
"""获取模型版本"""
return 'v2'
# 全局声音模型注册表实例
voice_model_registry = VoiceModelRegistry.register_models(VOICE_MODEL_CONFIGS)