Spaces:
Sleeping
Sleeping
| """ | |
| Genie TTS Hugging Face Spaces Deployment | |
| 基于官方 High-Logic/Genie 项目配置 | |
| GitHub: https://github.com/High-Logic/Genie | |
| 配置说明: | |
| - 依赖配置对齐官方 Docker/requirements.txt | |
| - API 调用方式遵循官方文档 | |
| - 环境变量设置参考官方示例 | |
| """ | |
| import gradio as gr | |
| import os | |
| import tempfile | |
| import logging | |
| import warnings | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| # 设置日志 | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # 禁用一些警告 | |
| warnings.filterwarnings("ignore", category=FutureWarning) | |
| warnings.filterwarnings("ignore", category=UserWarning) | |
| def install_genie_tts(): | |
| """尝试安装genie-tts包,处理Hugging Face Spaces的限制""" | |
| try: | |
| import genie_tts | |
| logger.info("genie-tts已安装") | |
| return True, None | |
| except ImportError: | |
| logger.info("正在尝试安装genie-tts...") | |
| try: | |
| # 尝试安装genie-tts | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", | |
| "genie-tts", "--no-deps" # 不安装依赖,避免PyAudio问题 | |
| ], timeout=300) | |
| # 手动安装核心依赖 | |
| core_deps = [ | |
| "soundfile>=0.12.0", | |
| "scipy>=1.9.0", | |
| "rich>=12.0.0", | |
| "pyopenjtalk" | |
| ] | |
| for dep in core_deps: | |
| try: | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", dep | |
| ], timeout=120) | |
| except Exception as e: | |
| logger.warning(f"安装依赖 {dep} 失败: {e}") | |
| import genie_tts | |
| logger.info("genie-tts安装成功") | |
| return True, None | |
| except subprocess.TimeoutExpired: | |
| error_msg = "安装超时:Hugging Face Spaces 环境可能不支持某些依赖" | |
| logger.error(error_msg) | |
| return False, error_msg | |
| except Exception as e: | |
| error_msg = str(e) | |
| if "portaudio" in error_msg.lower(): | |
| error_msg = ("PyAudio编译失败:Hugging Face Spaces环境缺少系统级音频依赖。" | |
| "这是已知的限制,请在本地环境运行或使用替代方案。") | |
| logger.error(f"安装genie-tts失败: {error_msg}") | |
| return False, error_msg | |
| # 安装Genie TTS | |
| install_success, install_error = install_genie_tts() | |
| if install_success: | |
| try: | |
| import genie_tts as genie | |
| logger.info("Genie TTS导入成功") | |
| except ImportError as e: | |
| logger.error(f"导入Genie TTS失败: {e}") | |
| genie = None | |
| install_error = f"导入失败: {str(e)}" | |
| else: | |
| genie = None | |
| class GenieTTSInterface: | |
| def __init__(self): | |
| self.available_characters = ['misono_mika'] # 预定义角色 | |
| self.current_character = None | |
| self.model_cache_dir = self.setup_cache_directory() | |
| self.is_initialized = False | |
| self.install_error = install_error if not install_success else None | |
| def setup_cache_directory(self): | |
| """设置模型缓存目录""" | |
| cache_dir = os.path.join(tempfile.gettempdir(), "genie_tts_cache") | |
| os.makedirs(cache_dir, exist_ok=True) | |
| return cache_dir | |
| def check_model_availability(self, character_name): | |
| """检查模型是否已缓存""" | |
| model_files = [ | |
| 'prompt.wav', 'prompt_wav.json', | |
| 't2s_encoder_fp32.onnx', 't2s_first_stage_decoder_fp32.onnx', | |
| 't2s_stage_decoder_fp32.onnx', 'vits_fp32.onnx' | |
| ] | |
| character_cache_dir = os.path.join(self.model_cache_dir, character_name) | |
| if not os.path.exists(character_cache_dir): | |
| return False | |
| for file_name in model_files: | |
| if not os.path.exists(os.path.join(character_cache_dir, file_name)): | |
| return False | |
| return True | |
| def initialize_genie(self): | |
| """初始化Genie TTS环境""" | |
| if self.is_initialized: | |
| return True | |
| try: | |
| # 基于官方文档设置环境变量 | |
| os.environ["HF_HUB_ENABLE_PROGRESS_BAR"] = "1" | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" # 避免警告 | |
| # 可选:设置模型缓存路径(对应官方配置) | |
| # os.environ['HUBERT_MODEL_PATH'] = r"path/to/chinese-hubert-base.onnx" | |
| # os.environ['OPEN_JTALK_DICT_DIR'] = r"path/to/open_jtalk_dic_utf_8-1.11" | |
| # 可选:设置缓存大小(对应官方配置) | |
| # os.environ['Max_Cached_Character_Models'] = '3' | |
| # os.environ['Max_Cached_Reference_Audio'] = '10' | |
| # 设置缓存目录 | |
| if hasattr(genie, '_internal'): | |
| logger.info("Genie TTS环境初始化成功") | |
| self.is_initialized = True | |
| return True | |
| except Exception as e: | |
| logger.error(f"初始化Genie TTS失败: {e}") | |
| return False | |
| def load_character(self, character_name): | |
| """加载角色模型""" | |
| if not genie: | |
| return None, "Genie TTS未正确安装" | |
| if not self.initialize_genie(): | |
| return None, "Genie TTS初始化失败" | |
| try: | |
| logger.info(f"正在加载角色: {character_name}") | |
| # 检查模型是否已缓存 | |
| if self.check_model_availability(character_name): | |
| logger.info(f"使用缓存的模型: {character_name}") | |
| else: | |
| logger.info(f"首次下载模型: {character_name},请稍候...") | |
| # 加载预定义角色(这会自动处理下载) | |
| genie.load_predefined_character(character_name) | |
| self.current_character = character_name | |
| return f"角色 {character_name} 加载成功!", "" | |
| except Exception as e: | |
| error_msg = str(e) | |
| logger.error(f"加载角色失败: {error_msg}") | |
| # 提供更友好的错误信息 | |
| if "network" in error_msg.lower() or "connection" in error_msg.lower(): | |
| return None, "网络连接错误,请检查网络连接后重试" | |
| elif "disk space" in error_msg.lower(): | |
| return None, "磁盘空间不足,请清理空间后重试" | |
| elif "timeout" in error_msg.lower(): | |
| return None, "下载超时,请重试" | |
| else: | |
| return None, f"加载角色失败: {error_msg}" | |
| def estimate_download_size(self, character_name): | |
| """估算下载大小""" | |
| # 基于Genie模型的实际大小 | |
| model_sizes = { | |
| 'misono_mika': 180 # MB | |
| } | |
| return model_sizes.get(character_name, 200) | |
| def cleanup_cache(self): | |
| """清理缓存""" | |
| try: | |
| import shutil | |
| if os.path.exists(self.model_cache_dir): | |
| shutil.rmtree(self.model_cache_dir) | |
| self.setup_cache_directory() | |
| logger.info("缓存清理完成") | |
| return True | |
| except Exception as e: | |
| logger.error(f"清理缓存失败: {e}") | |
| return False | |
| def synthesize_speech(self, text, character_name, play_audio=False): | |
| """文本转语音 - 增强版""" | |
| if not genie: | |
| if self.install_error: | |
| error_msg = f"Genie TTS 安装失败: {self.install_error}" | |
| if "portaudio" in self.install_error.lower(): | |
| error_msg += "\n\n💡 解决方案:\n" | |
| error_msg += "1. 在本地环境运行此应用(支持完整依赖)\n" | |
| error_msg += "2. 或等待我们提供不依赖PyAudio的替代方案\n" | |
| error_msg += "3. 查看项目README了解更多信息" | |
| return None, error_msg | |
| else: | |
| return None, "Genie TTS未正确安装,原因未知" | |
| if not text.strip(): | |
| return None, "请输入要合成的文本" | |
| # 文本长度检查 | |
| if len(text) > 500: | |
| return None, "文本过长(超过500字符),请缩短文本长度" | |
| if character_name != self.current_character: | |
| status, error = self.load_character(character_name) | |
| if error: | |
| return None, error | |
| try: | |
| # 文本预处理 | |
| processed_text = self.preprocess_text(text) | |
| # 创建临时文件保存音频 | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
| output_path = tmp_file.name | |
| logger.info(f"正在合成语音: {processed_text[:50]}...") | |
| # 设置内存限制环境变量 | |
| original_env = os.environ.get('PYTORCH_JIT_USE_NNC_NOT_NVFUSER', None) | |
| os.environ['PYTORCH_JIT_USE_NNC_NOT_NVFUSER'] = '1' | |
| try: | |
| # 执行TTS | |
| genie.tts( | |
| character_name=character_name, | |
| text=processed_text, | |
| play=False, # 在服务器环境不播放 | |
| split_sentence=True, | |
| save_path=output_path | |
| ) | |
| finally: | |
| # 恢复环境变量 | |
| if original_env is None and 'PYTORCH_JIT_USE_NNC_NOT_NVFUSER' in os.environ: | |
| del os.environ['PYTORCH_JIT_USE_NNC_NOT_NVFUSER'] | |
| elif original_env is not None: | |
| os.environ['PYTORCH_JIT_USE_NNC_NOT_NVFUSER'] = original_env | |
| # 验证输出文件 | |
| if not os.path.exists(output_path): | |
| return None, "语音合成失败:输出文件未生成" | |
| file_size = os.path.getsize(output_path) | |
| if file_size == 0: | |
| return None, "语音合成失败:输出文件为空" | |
| elif file_size < 1000: # 小于1KB可能是错误 | |
| return None, "语音合成失败:输出文件异常小" | |
| logger.info(f"语音合成成功,文件大小: {file_size/1024:.1f}KB") | |
| return output_path, "" | |
| except Exception as e: | |
| error_msg = str(e) | |
| logger.error(f"语音合成失败: {error_msg}") | |
| # 提供更详细的错误信息 | |
| if "out of memory" in error_msg.lower() or "memory" in error_msg.lower(): | |
| return None, "内存不足,请尝试缩短文本或重启应用" | |
| elif "cuda" in error_msg.lower(): | |
| return None, "GPU相关错误,正在使用CPU模式重试" | |
| elif "model" in error_msg.lower(): | |
| return None, "模型加载错误,请重新选择角色" | |
| elif "timeout" in error_msg.lower(): | |
| return None, "处理超时,请尝试缩短文本" | |
| else: | |
| return None, f"语音合成失败: {error_msg}" | |
| def preprocess_text(self, text): | |
| """文本预处理""" | |
| # 基本清理 | |
| text = text.strip() | |
| # 替换常见的问题字符 | |
| replacements = { | |
| '"': '"', | |
| '"': '"', | |
| ''': "'", | |
| ''': "'", | |
| '—': '一', | |
| '–': '-', | |
| } | |
| for old, new in replacements.items(): | |
| text = text.replace(old, new) | |
| # 确保句子有适当的标点 | |
| if text and not text.endswith(('。', '!', '?', '.', '!', '?')): | |
| text += '。' | |
| return text | |
| def get_system_info(self): | |
| """获取系统信息用于调试""" | |
| try: | |
| # Try to import psutil, but gracefully handle if it's not available | |
| try: | |
| import psutil | |
| memory = psutil.virtual_memory() | |
| disk = psutil.disk_usage('/') | |
| return { | |
| 'memory_total': f"{memory.total / (1024**3):.1f}GB", | |
| 'memory_available': f"{memory.available / (1024**3):.1f}GB", | |
| 'memory_percent': f"{memory.percent}%", | |
| 'disk_free': f"{disk.free / (1024**3):.1f}GB" | |
| } | |
| except ImportError: | |
| # Fallback to basic system information without psutil | |
| import shutil | |
| total, used, free = shutil.disk_usage('/') | |
| return { | |
| 'disk_free': f"{free / (1024**3):.1f}GB", | |
| 'disk_total': f"{total / (1024**3):.1f}GB", | |
| 'status': "基础系统信息 (psutil 未安装)" | |
| } | |
| except Exception as e: | |
| return {"status": f"无法获取系统信息: {str(e)}"} | |
| # 创建接口实例 | |
| tts_interface = GenieTTSInterface() | |
| def create_interface(): | |
| """创建Gradio界面""" | |
| def tts_wrapper(text, character, progress=gr.Progress()): | |
| """TTS包装函数""" | |
| if not text.strip(): | |
| return None, "❌ 请输入要合成的文本" | |
| progress(0.1, desc="准备模型...") | |
| # 加载字符模型 | |
| if character != tts_interface.current_character: | |
| progress(0.3, desc=f"加载角色模型: {character}") | |
| status, error = tts_interface.load_character(character) | |
| if error: | |
| return None, f"❌ {error}" | |
| progress(0.5, desc="正在合成语音...") | |
| audio_path, error = tts_interface.synthesize_speech(text, character) | |
| progress(0.9, desc="完成处理...") | |
| if error: | |
| return None, f"❌ {error}" | |
| progress(1.0, desc="✅ 合成成功!") | |
| return audio_path, f"✅ 合成成功!音频长度: {get_audio_duration(audio_path):.1f}秒" | |
| def get_audio_duration(audio_path): | |
| """获取音频时长""" | |
| try: | |
| import librosa | |
| y, sr = librosa.load(audio_path, sr=None) | |
| return len(y) / sr | |
| except: | |
| return 0 | |
| def clear_all(): | |
| """清空所有输入和输出""" | |
| return "", None, "🔄 已清空所有内容" | |
| def load_example(text, character): | |
| """加载示例""" | |
| return text, character, f"📝 已加载示例: {text[:20]}..." | |
| # 定义界面 | |
| with gr.Blocks( | |
| title="🔮 Genie TTS - 语音合成", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .status-success { | |
| color: #28a745 !important; | |
| } | |
| .status-error { | |
| color: #dc3545 !important; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🔮 Genie TTS - AI 语音合成系统 | |
| 基于 [High-Logic/Genie](https://github.com/High-Logic/Genie) 的轻量级 TTS 推理引擎,支持高质量日语语音合成。 | |
| <div style="background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); padding: 1rem; border-radius: 10px; color: white; margin: 1rem 0;"> | |
| <strong>🌟 功能特点</strong><br> | |
| ✅ CPU 优化推理,无需 GPU<br> | |
| ✅ 基于 GPT-SoVITS V2 技术<br> | |
| ✅ 支持长文本自动分句<br> | |
| ✅ 实时音频流输出 | |
| </div> | |
| **📖 使用说明:** 选择角色模型 → 输入日语文本 → 点击合成按钮 → 获得高质量语音 | |
| """) | |
| # 系统状态显示 | |
| if not genie or not install_success: | |
| status_color = "#ff4444" | |
| status_icon = "❌" | |
| status_text = "服务不可用" | |
| if tts_interface.install_error and "portaudio" in tts_interface.install_error.lower(): | |
| details = ("Hugging Face Spaces环境限制导致PyAudio依赖安装失败。<br>" | |
| "💡 <strong>解决方案:</strong> 请在本地环境运行此应用以获得完整功能。") | |
| else: | |
| details = f"安装错误: {tts_interface.install_error or '未知错误'}" | |
| else: | |
| status_color = "#44ff44" | |
| status_icon = "✅" | |
| status_text = "服务正常" | |
| details = "Genie TTS引擎已成功加载,可以正常使用。" | |
| gr.Markdown(f""" | |
| <div style="background: {status_color}20; border-left: 4px solid {status_color}; padding: 1rem; margin: 1rem 0; border-radius: 0 8px 8px 0;"> | |
| <strong>{status_icon} 系统状态: {status_text}</strong><br> | |
| <small>{details}</small> | |
| </div> | |
| """) | |
| with gr.Tab("🎵 语音合成") as tts_tab: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # 角色选择 | |
| with gr.Group(): | |
| gr.Markdown("### 👤 角色设置") | |
| character_dropdown = gr.Dropdown( | |
| choices=tts_interface.available_characters, | |
| value="misono_mika", | |
| label="🎭 选择角色", | |
| info="当前可用的预训练角色模型", | |
| interactive=True | |
| ) | |
| # 文本输入 | |
| with gr.Group(): | |
| gr.Markdown("### 📝 文本输入") | |
| text_input = gr.Textbox( | |
| lines=5, | |
| label="📄 输入文本", | |
| placeholder="请输入要合成的日语文本...\n例如:どうしようかな……やっぱりやりたいかも……!", | |
| info="💡 支持日语文本,建议输入完整的句子以获得更好的效果", | |
| show_copy_button=True | |
| ) | |
| # 控制按钮 | |
| with gr.Row(): | |
| submit_btn = gr.Button( | |
| "🎵 开始合成", | |
| variant="primary", | |
| size="lg", | |
| scale=2 | |
| ) | |
| clear_btn = gr.Button( | |
| "🔄 清空", | |
| variant="secondary", | |
| scale=1 | |
| ) | |
| with gr.Column(scale=1): | |
| # 音频输出 | |
| with gr.Group(): | |
| gr.Markdown("### 🔊 音频输出") | |
| audio_output = gr.Audio( | |
| label="🎶 生成的音频", | |
| type="filepath", | |
| interactive=False, | |
| show_download_button=True | |
| ) | |
| # 状态显示 | |
| status_output = gr.Textbox( | |
| label="📊 合成状态", | |
| interactive=False, | |
| show_copy_button=False | |
| ) | |
| # 示例和教程标签页 | |
| with gr.Tab("📚 示例与教程") as examples_tab: | |
| gr.Markdown("### 🎯 快速示例") | |
| gr.Markdown("点击下面的示例可以快速体验不同类型的文本合成效果:") | |
| # 示例网格 | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**🌅 问候语**") | |
| gr.Examples( | |
| examples=[ | |
| ["おはようございます!", "misono_mika"], | |
| ["こんにちは、元気ですか?", "misono_mika"], | |
| ["お疲れさまでした", "misono_mika"] | |
| ], | |
| inputs=[text_input, character_dropdown], | |
| outputs=[text_input, character_dropdown, status_output], | |
| fn=load_example, | |
| run_on_click=True | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("**💭 情感表达**") | |
| gr.Examples( | |
| examples=[ | |
| ["どうしようかな……やっぱりやりたいかも……!", "misono_mika"], | |
| ["うーん、これは難しいですね", "misono_mika"], | |
| ["わあ、すごいですね!", "misono_mika"] | |
| ], | |
| inputs=[text_input, character_dropdown], | |
| outputs=[text_input, character_dropdown, status_output], | |
| fn=load_example, | |
| run_on_click=True | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("**🎭 日常对话**") | |
| gr.Examples( | |
| examples=[ | |
| ["ありがとうございます", "misono_mika"], | |
| ["さようなら、また明日", "misono_mika"], | |
| ["お先に失礼します", "misono_mika"] | |
| ], | |
| inputs=[text_input, character_dropdown], | |
| outputs=[text_input, character_dropdown, status_output], | |
| fn=load_example, | |
| run_on_click=True | |
| ) | |
| gr.Markdown(""" | |
| ### 📋 使用技巧 | |
| 1. **文本长度**: 建议单次输入文本长度在 100 字以内,过长的文本会自动分句处理 | |
| 2. **标点符号**: 适当使用标点符号(。!?)可以改善语音的自然度 | |
| 3. **特殊符号**: 支持省略号(……)和感叹号(!)等情感表达 | |
| 4. **处理时间**: 首次加载角色需要下载模型(约30秒),后续合成较快(5-10秒) | |
| ### 🔧 技术说明 | |
| - **模型架构**: 基于 Transformer 的端到端语音合成 | |
| - **采样率**: 32kHz,支持高质量音频输出 | |
| - **推理方式**: CPU 优化的 ONNX 模型,适合云端部署 | |
| - **内存占用**: 约 500MB RAM,支持并发处理 | |
| """) | |
| # 关于标签页 | |
| with gr.Tab("ℹ️ 关于项目") as about_tab: | |
| gr.Markdown(""" | |
| ### 🔍 项目信息 | |
| **Genie TTS** 是基于 GPT-SoVITS V2 架构的轻量级语音合成引擎,专门为 CPU 推理优化。 | |
| #### 📊 技术规格 | |
| | 项目 | 规格 | | |
| |------|------| | |
| | **基础模型** | GPT-SoVITS V2 | | |
| | **推理框架** | ONNX Runtime | | |
| | **支持语言** | 日语 (Japanese) | | |
| | **音频格式** | WAV, 32kHz | | |
| | **推理设备** | CPU (无需 GPU) | | |
| | **模型大小** | ~200MB | | |
| | **内存需求** | ~500MB RAM | | |
| #### � 相关链接 | |
| - 🏠 [项目主页](https://github.com/High-Logic/Genie) | |
| - 🤗 [Hugging Face 模型](https://huggingface.co/High-Logic/Genie) | |
| - 📖 [GPT-SoVITS 官方](https://github.com/RVC-Boss/GPT-SoVITS) | |
| - 💬 [问题反馈](https://github.com/High-Logic/Genie/issues) | |
| #### 🙏 致谢 | |
| 感谢以下项目和开发者: | |
| - [High-Logic](https://github.com/High-Logic) 团队开发的 Genie TTS | |
| - [RVC-Boss](https://github.com/RVC-Boss) 团队的 GPT-SoVITS 项目 | |
| - Hugging Face 提供的模型托管和 Spaces 平台 | |
| #### ⚖️ 免责声明 | |
| 本应用仅用于演示和研究目的。请合理使用,生成的语音内容责任由使用者承担。 | |
| """) | |
| # 绑定事件 | |
| submit_btn.click( | |
| fn=tts_wrapper, | |
| inputs=[text_input, character_dropdown], | |
| outputs=[audio_output, status_output], | |
| show_progress="full", | |
| queue=True | |
| ) | |
| clear_btn.click( | |
| fn=clear_all, | |
| outputs=[text_input, audio_output, status_output] | |
| ) | |
| return demo | |
| # 启动应用 | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_api=False, | |
| show_error=True, | |
| quiet=False | |
| ) |