Spaces:
Runtime error
Runtime error
| import os | |
| # Fixes for HF Hub | |
| os.environ["HF_HOME"] = "/tmp/huggingface" | |
| os.environ["HF_HUB_CACHE"] = "/tmp/huggingface" | |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" | |
| # Fixes for matplotlib and fontconfig | |
| os.environ["MPLCONFIGDIR"] = "/tmp" | |
| os.environ["XDG_CACHE_HOME"] = "/tmp" | |
| os.environ["XDG_CONFIG_HOME"] = "/tmp" | |
| os.environ["NUMBA_DISABLE_CACHE"] = "1" | |
| os.makedirs("/tmp/huggingface", exist_ok=True) | |
| os.makedirs("/tmp/flagged", exist_ok=True) | |
| import gradio as gr | |
| from openvoice.api import ToneColorConverter | |
| from openvoice import se_extractor | |
| import torch | |
| import time | |
| import uuid | |
| # Set model paths | |
| ckpt_converter = "checkpoints/converter/config.json" | |
| output_dir = "/tmp/outputs" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Initialize converter | |
| tone_color_converter = ToneColorConverter(ckpt_converter) | |
| # Load base speaker embedding for style transfer | |
| ref_speaker_embed = None | |
| def clone_and_speak(text, speaker_wav): | |
| if not speaker_wav: | |
| return "Please upload a reference .wav file." | |
| # Generate a unique filename | |
| timestamp = str(int(time.time())) | |
| base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}" | |
| output_wav = os.path.join(output_dir, f"{base_name}.wav") | |
| # Extract style from uploaded speaker voice | |
| global ref_speaker_embed | |
| ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter) | |
| # Generate speech using base model (internal prompt and sampling) | |
| tone_color_converter.infer( | |
| text=text, | |
| speaker_id="openvoice", | |
| language="en", | |
| ref_speaker=speaker_wav, | |
| ref_embed=ref_speaker_embed, | |
| output_path=output_wav, | |
| top_k=10, | |
| temperature=0.3 | |
| ) | |
| return output_wav | |
| demo = gr.Interface( | |
| fn=clone_and_speak, | |
| inputs=[ | |
| gr.Textbox(label="Enter Text"), | |
| gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)") | |
| ], | |
| outputs=gr.Audio(label="Synthesized Output"), | |
| flagging_dir = "/tmp/flagged", | |
| title="Text to Voice using OpenVoice", | |
| description="Clone any voice (English) and generate speech using OpenVoice on CPU.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |