Spaces:
Runtime error
Runtime error
| import os | |
| import uuid | |
| import time | |
| import torch | |
| import gradio as gr | |
| from melo.api import TTS | |
| from openvoice.api import ToneColorConverter | |
| # Set temporary cache locations for Hugging Face Spaces | |
| os.environ["TORCH_HOME"] = "/tmp/torch" | |
| os.environ["HF_HOME"] = "/tmp/huggingface" | |
| os.environ["HF_HUB_CACHE"] = "/tmp/huggingface" | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" | |
| os.environ["MPLCONFIGDIR"] = "/tmp" | |
| os.environ["XDG_CACHE_HOME"] = "/tmp" | |
| os.environ["XDG_CONFIG_HOME"] = "/tmp" | |
| os.environ["NUMBA_DISABLE_CACHE"] = "1" | |
| os.makedirs("/tmp/torch", exist_ok=True) | |
| os.makedirs("/tmp/huggingface", exist_ok=True) | |
| os.makedirs("/tmp/flagged", exist_ok=True) | |
| # Output folder | |
| output_dir = "/tmp/outputs" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Initialize tone converter | |
| ckpt_converter = "checkpoints/converter/config.json" | |
| tone_color_converter = ToneColorConverter(ckpt_converter) | |
| # Device setting | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def clone_and_speak(text, speaker_wav): | |
| if not speaker_wav: | |
| return "Please upload a reference .wav file." | |
| base_name = f"output_{int(time.time())}_{uuid.uuid4().hex[:6]}" | |
| tmp_melo_path = f"{output_dir}/{base_name}_tmp.wav" | |
| final_output_path = f"{output_dir}/{base_name}_converted.wav" | |
| # Use English speaker model | |
| model = TTS(language="EN", device=device) | |
| speaker_ids = model.hps.data.spk2id | |
| default_speaker_id = next(iter(speaker_ids.values())) | |
| # Generate base TTS voice | |
| model.tts_to_file(text, default_speaker_id, tmp_melo_path) | |
| # Use speaker_wav as reference to extract style embedding | |
| from openvoice import se_extractor | |
| ref_se, _ = se_extractor.get_se(speaker_wav, tone_color_converter, vad=False) | |
| # Run the tone conversion | |
| tone_color_converter.convert( | |
| audio_src_path=tmp_melo_path, | |
| src_se=ref_se, | |
| tgt_se=ref_se, | |
| output_path=final_output_path, | |
| message="@HuggingFace", | |
| ) | |
| return final_output_path | |
| # Gradio interface | |
| gr.Interface( | |
| fn=clone_and_speak, | |
| inputs=[ | |
| gr.Textbox(label="Enter Text"), | |
| gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)") | |
| ], | |
| outputs=gr.Audio(label="Synthesized Output"), | |
| flagging_dir="/tmp/flagged", | |
| title="Text to Voice using Melo TTS + OpenVoice", | |
| description="Use Melo TTS for base synthesis and OpenVoice to apply a reference speaker's tone.", | |
| ).launch() | |