| import enum |
| import typing |
| from concurrent.futures.thread import ThreadPoolExecutor |
| from pathlib import Path |
|
|
| from pydantic import BaseModel |
|
|
| from config.settings import settings |
| from utils.download_utils import download_file_from_huggingface |
|
|
| |
| BASE_PRETRAINED_FILES = { |
| 'chinese-hubert-base/config.json': 'chinese-hubert-base/config.json', |
| 'chinese-hubert-base/preprocessor_config.json': 'chinese-hubert-base/preprocessor_config.json', |
| 'chinese-hubert-base/pytorch_model.bin': 'chinese-hubert-base/pytorch_model.bin', |
| 'chinese-roberta-wwm-ext-large/config.json': 'chinese-roberta-wwm-ext-large/config.json', |
| 'chinese-roberta-wwm-ext-large/pytorch_model.bin': 'chinese-roberta-wwm-ext-large/pytorch_model.bin', |
| 'chinese-roberta-wwm-ext-large/tokenizer.json': 'chinese-roberta-wwm-ext-large/tokenizer.json', |
| } |
|
|
| |
| VOICE_MODEL_CONFIGS = ( |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Luo Xiang', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/luoxiang.png', |
| 'description': '', |
| 'file_size': '240M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/luoxiang_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/luoxiang_best_sovits.pth', |
| 'reference_audio': 'ref_audios/luoxiang_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/luoxiang_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/luoxiang_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "复杂的问题背后也许没有统一的答案,选择站在正方还是反方,其实取决于你对一系列价值判断的回答。", |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Ma Baoguo', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mabaoguo.png', |
| 'description': '', |
| 'file_size': '241M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/mabaoguo_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/mabaoguo_best_sovits.pth', |
| 'reference_audio': 'ref_audios/mabaoguo_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/mabaoguo_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/mabaoguo_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "当他弄清为什么我能打出这个五连鞭,他们打不出来的时候。", |
| |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Shen Yi', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/shenyi.png', |
| 'description': '', |
| 'file_size': '241M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/shenyi_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/shenyi_best_sovits.pth', |
| 'reference_audio': 'ref_audios/shenyi_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/shenyi_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/shenyi_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "这事情本身在我看来其实挺莫名的, 啊我不太可能后面有机会还去寻求一下这个解释说。", |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Yang Mi', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/yangmi.png', |
| 'description': '', |
| 'file_size': '241M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/yangmi_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/yangmi_best_sovits.pth', |
| 'reference_audio': 'ref_audios/yangmi_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/yangmi_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/yangmi_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "你谁知道, 人生只有一次啊. 你怎么知道那样选, 你当下来说, 应该那样选. 为什么没那样选呢? 但你今天这样选了呀.", |
| |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Zhou Jielun', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/zhoujielun.png', |
| 'description': '', |
| 'file_size': '241M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/zhoujielun_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/zhoujielun_best_sovits.pth', |
| 'reference_audio': 'ref_audios/zhoujielun_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/zhoujielun_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/zhoujielun_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "其实我我现在讲的这些奥,都是我未来成功的一些关键。", |
| |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| { |
| 'repository': 'MoYoYoTech/tone-models', |
| 'character_name': 'Ma Yun', |
| 'cover_image': 'https://huggingface.co/MoYoYoTech/tone-models/resolve/main/cover/mayun.png', |
| 'description': '', |
| 'file_size': '241M', |
| 'is_chinese_voice': True, |
| 'model_files': { |
| **BASE_PRETRAINED_FILES, |
| 'gpt-weights': 'GPT_weights/mayun_best_gpt.ckpt', |
| 'sovits-weights': 'SoVITS_weights/mayun_best_sovits.pth', |
| 'reference_audio': 'ref_audios/mayun_ref.wav', |
| 'prompt_semantic': 'prompt_semantic/mayun_prompt_semantic.pt', |
| 'reference_spec': 'refer_spec/mayun_spec.pt', |
| }, |
| 'inference_parameters': { |
| 'text_lang': "zh", |
| 'prompt_text': "这是我们最大的希望能招聘的到人。所以今天阿里巴巴公司内部,我自己这么觉得,人才梯队的建设非常之好。", |
| |
| 'prompt_lang': "zh", |
| 'top_k': 5, |
| 'top_p': 1, |
| 'temperature': 1, |
| 'text_split_method': "cut3", |
| 'batch_size': 100, |
| 'speed_factor': 1.1, |
| 'split_bucket': True, |
| 'return_fragment': False, |
| 'fragment_interval': 0.07, |
| 'seed': 233333, |
| }, |
| 'conversation_templates': { |
| "opening_remarks": [ |
| "To start off, I just want to say that it’s nice to be talking to you here today.", |
| "Before we begin here today, I should say that it’s nice to meet you.", |
| "First off, I just wanted to thank you for coming out and contributing a question.", |
| "Great to be here with you. I’m looking forward to a fantastic discussion.", |
| "Hey, how’s it going? We’ve got some important things to cover today.", |
| "Good to be here. We’ve got a lot of important topics to discuss." |
| ], |
| "mid_responses": [ |
| "Okay, you've got something on your mind, and that's why we're here, isn't it?", |
| "More and more people are asking about this, and I’ve got somthing on my mind.", |
| "Everybody's talking about this, and frankly, they're right to talk about it.", |
| "Well, you've brought something to the table, and that's what dialogue is all about." |
| ] |
| } |
| }, |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| ) |
|
|
|
|
| class VoiceModelStatus(enum.Enum): |
| """声音模型状态枚举""" |
| NOT_DOWNLOADED = 'not_downloaded' |
| DOWNLOADING = 'downloading' |
| DOWNLOADED = 'downloaded' |
| FAILED = 'failed' |
|
|
|
|
| class ConversationTemplates(BaseModel): |
| """对话模板""" |
| opening_remarks: list[str] |
| mid_responses: list[str] |
|
|
|
|
| class VoiceModel(BaseModel): |
| """声音模型配置类""" |
| repository: str |
| character_name: str |
| cover_image: str |
| description: str |
| file_size: str |
| is_chinese_voice: bool |
| model_files: dict[str, str] |
| inference_parameters: dict[str, typing.Any] |
| conversation_templates: ConversationTemplates |
|
|
| _download_status: VoiceModelStatus = VoiceModelStatus.NOT_DOWNLOADED |
|
|
| @property |
| def download_status(self) -> VoiceModelStatus: |
| """获取下载状态""" |
| if self.is_model_complete: |
| return VoiceModelStatus.DOWNLOADED |
| return self._download_status |
|
|
| @download_status.setter |
| def download_status(self, status: VoiceModelStatus): |
| """设置下载状态""" |
| self._download_status = status |
|
|
| @property |
| def model_storage_path(self) -> Path: |
| """获取模型存储路径""" |
| storage_path = settings.paths.AUDIO_MODELS_DIR / self.repository |
| storage_path.mkdir(parents=True, exist_ok=True) |
| return storage_path |
|
|
| @property |
| def is_model_complete(self) -> bool: |
| """检查模型文件是否完整""" |
| for model_file in self.model_files.values(): |
| file_path = self.model_storage_path / model_file |
| if not file_path.exists(): |
| return False |
| return True |
|
|
| def download_model(self, progress_callback: typing.Callable = None): |
| """下载模型""" |
| self.download_status = VoiceModelStatus.DOWNLOADING |
|
|
| try: |
| self._download_model_files(progress_callback) |
| self.download_status = VoiceModelStatus.DOWNLOADED |
| except Exception: |
| self.download_status = VoiceModelStatus.FAILED |
| raise |
|
|
| def _download_model_files(self, progress_callback: typing.Callable = None): |
| """从HuggingFace下载模型文件""" |
| with ThreadPoolExecutor() as executor: |
| for model_file in self.model_files.values(): |
| executor.submit( |
| download_file_from_huggingface, |
| self.model_storage_path, |
| self.repository, |
| model_file |
| ) |
|
|
| if progress_callback: |
| progress_callback() |
|
|
| def delete_model(self): |
| """删除模型核心文件""" |
| core_files = ['gpt-weights', 'sovits-weights'] |
| for file_key in core_files: |
| file_path = self.model_storage_path / self.model_files.get(file_key, '') |
| if file_path.is_file(): |
| file_path.unlink() |
| elif file_path.is_dir(): |
| file_path.rmdir() |
| self.download_status = VoiceModelStatus.NOT_DOWNLOADED |
|
|
| |
| @property |
| def gpt_weights_path(self) -> Path: |
| """GPT权重文件路径""" |
| return self.model_storage_path / self.model_files.get('gpt-weights', '') |
|
|
| @property |
| def sovits_weights_path(self) -> Path: |
| """SoVITS权重文件路径""" |
| return self.model_storage_path / self.model_files.get('sovits-weights', '') |
|
|
| @property |
| def hubert_model_path(self) -> Path: |
| """中文HuBERT模型路径""" |
| return self.model_storage_path / 'chinese-hubert-base' |
|
|
| @property |
| def bert_model_path(self) -> Path: |
| """中文BERT模型路径""" |
| return self.model_storage_path / 'chinese-roberta-wwm-ext-large' |
|
|
| @property |
| def reference_audio_path(self) -> Path: |
| """参考音频文件路径""" |
| return self.model_storage_path / self.model_files.get('reference_audio', '') |
|
|
| @property |
| def prompt_semantic_path(self) -> Path: |
| """提示语义文件路径""" |
| return self.model_storage_path / self.model_files.get('prompt_semantic', '') |
|
|
| @property |
| def reference_spec_path(self) -> Path: |
| """参考频谱文件路径""" |
| return self.model_storage_path / self.model_files.get('reference_spec', '') |
|
|
|
|
| class VoiceModelRegistry: |
| """声音模型注册表""" |
| _registered_models: dict[str, VoiceModel] = {} |
|
|
| @classmethod |
| def register_models(cls, model_configs: list[dict]) -> list[VoiceModel]: |
| """从配置注册模型""" |
| registered_models = [] |
|
|
| for config in model_configs: |
| repository = config.get('repository', '') |
| character_name = config.get('character_name', '') |
| model_key = f'{repository}:{character_name}' |
|
|
| voice_model = VoiceModel(**config) |
| cls._registered_models[model_key] = voice_model |
| registered_models.append(voice_model) |
|
|
| return registered_models |
|
|
| @classmethod |
| def get_model(cls, repository: str, character_name: str) -> VoiceModel: |
| """获取指定模型""" |
| model_key = f'{repository}:{character_name}' |
| return cls._registered_models.get(model_key) |
|
|
| @classmethod |
| def get_all_models(cls) -> list[VoiceModel]: |
| """获取所有注册的模型""" |
| return list(cls._registered_models.values()) |
|
|
| @classmethod |
| def get_version(cls) -> str: |
| """获取模型版本""" |
| return 'v2' |
|
|
|
|
| |
| voice_model_registry = VoiceModelRegistry.register_models(VOICE_MODEL_CONFIGS) |
|
|