Spaces:
Build error
Build error
| import os | |
| import torch | |
| import gradio as gr | |
| from openvoice import se_extractor | |
| from openvoice.api import BaseSpeakerTTS, ToneColorConverter | |
| # Auto-download checkpoints if not exist (from OpenVoice repo) | |
| if not os.path.isdir("checkpoints"): | |
| from openvoice.utils import download_checkpoints_v2 | |
| print("Downloading OpenVoice V2 checkpoints (~1.5GB)...") | |
| download_checkpoints_v2() | |
| # ------------------- Setup ------------------- | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| output_dir = 'outputs' | |
| os.makedirs(output_dir, exist_ok=True) | |
| # English Base Speaker (V1 style) | |
| ckpt_base_en = 'checkpoints/base_speakers/EN' | |
| base_speaker_tts_en = BaseSpeakerTTS(f'{ckpt_base_en}/config.json', device=device) | |
| base_speaker_tts_en.load_ckpt(f'{ckpt_base_en}/checkpoint.pth') | |
| # Chinese Base Speaker | |
| ckpt_base_zh = 'checkpoints/base_speakers/ZH' | |
| base_speaker_tts_zh = BaseSpeakerTTS(f'{ckpt_base_zh}/config.json', device=device) | |
| base_speaker_tts_zh.load_ckpt(f'{ckpt_base_zh}/checkpoint.pth') | |
| # Tone Color Converter (shared) | |
| ckpt_converter = 'checkpoints/converter' | |
| tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device) | |
| tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth') | |
| # Pre-load source style embeddings | |
| source_se_en_default = torch.load(f'{ckpt_base_en}/en_default_se.pth', map_location=device) | |
| source_se_en_whisper = torch.load(f'{ckpt_base_en}/en_style_se.pth', map_location=device) | |
| source_se_zh_default = torch.load(f'{ckpt_base_zh}/zh_default_se.pth', map_location=device) | |
| # ------------------- Main Function ------------------- | |
| def voice_clone( | |
| reference_audio, | |
| text, | |
| language="English", | |
| style="default", | |
| speed=1.0 | |
| ): | |
| if reference_audio is None: | |
| return None, "Please upload a reference voice (5-30 seconds)" | |
| if not text.strip(): | |
| return None, "Please enter some text" | |
| # Extract target speaker embedding | |
| target_se, _ = se_extractor.get_se( | |
| reference_audio, tone_color_converter, target_dir='processed', vad=True | |
| ) | |
| # Choose base TTS and source SE | |
| if language == "English": | |
| tts = base_speaker_tts_en | |
| if style == "whispering": | |
| source_se = source_se_en_whisper | |
| speed = 0.9 | |
| else: | |
| source_se = source_se_en_default | |
| else: # Chinese | |
| tts = base_speaker_tts_zh | |
| source_se = source_se_zh_default | |
| style = "default" | |
| # Generate base speech | |
| src_path = f"{output_dir}/tmp.wav" | |
| tts.tts(text, src_path, speaker=style, language=language, speed=speed) | |
| # Convert to cloned voice | |
| save_path = f"{output_dir}/output_cloned.wav" | |
| encode_message = "@MyShell" | |
| tone_color_converter.convert( | |
| audio_src_path=src_path, | |
| src_se=source_se, | |
| tgt_se=target_se, | |
| output_path=save_path, | |
| message=encode_message | |
| ) | |
| return save_path, f"Success! Cloned in {language} ({style})" | |
| # ------------------- Gradio Interface ------------------- | |
| with gr.Blocks(title="OpenVoice Voice Style Control Demo") as demo: | |
| gr.Markdown("# OpenVoice Voice Style Control Demo") | |
| gr.Markdown("Upload any voice → Choose language & style → Generate cloned speech instantly!") | |
| with gr.Row(): | |
| ref_audio = gr.Audio( | |
| label="Reference Voice (5-30s, clear speech)", | |
| type="filepath", | |
| sources=["upload"] | |
| ) | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| label="Text to Speak", | |
| value="This audio is generated by OpenVoice.", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| language = gr.Dropdown( | |
| ["English", "Chinese"], | |
| value="English", | |
| label="Language" | |
| ) | |
| style = gr.Dropdown( | |
| ["default", "whispering"], | |
| value="default", | |
| label="Style (English only)" | |
| ) | |
| generate_btn = gr.Button("Generate Cloned Voice", variant="primary") | |
| with gr.Row(): | |
| output_audio = gr.Audio(label="Cloned Output") | |
| status = gr.Textbox(label="Status") | |
| generate_btn.click( | |
| fn=voice_clone, | |
| inputs=[ref_audio, text_input, language, style], | |
| outputs=[output_audio, status] | |
| ) | |
| gr.Markdown(""" | |
| **Tech for good**: All outputs contain @MyShell watermark. | |
| Made with [OpenVoice by MyShell.ai](https://github.com/myshell-ai/OpenVoice) | |
| """) | |
| demo.launch() |