Spaces:
Running
Running
| import gradio as gr | |
| from core.cloner import KokoClone | |
| # 1. Initialize the cloner globally so models load only once when the server starts | |
| print("Loading KokoClone models for the Web UI...") | |
| cloner = KokoClone() | |
| def clone_voice(text, lang, ref_audio_path): | |
| """Gradio prediction function.""" | |
| if not text or not text.strip(): | |
| raise gr.Error("Please enter some text.") | |
| if not ref_audio_path: | |
| raise gr.Error("Please upload or record a reference audio file.") | |
| output_file = "gradio_output.wav" | |
| try: | |
| # Call the core engine | |
| cloner.generate( | |
| text=text, | |
| lang=lang, | |
| reference_audio=ref_audio_path, | |
| output_path=output_file | |
| ) | |
| return output_file | |
| except Exception as e: | |
| raise gr.Error(f"An error occurred during generation: {str(e)}") | |
| # 2. Build the Gradio UI using Blocks | |
| with gr.Blocks() as demo: | |
| # Using gr.HTML for the header ensures CSS styles like text-align are respected | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 20px;"> | |
| <h1 style="margin: 0;">🎧 KokoClone</h1> | |
| <p style="margin: 10px 0; color: #666;"> | |
| Voice Cloning, Now Inside Kokoro.<br> | |
| Generate natural multilingual speech and clone any target voice with ease.<br> | |
| <i>Built on Kokoro TTS.</i> | |
| </p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # LEFT COLUMN: Inputs | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox( | |
| label="1. Text to Synthesize", | |
| lines=4, | |
| placeholder="Enter the text you want spoken..." | |
| ) | |
| lang_input = gr.Dropdown( | |
| label="2. Language", | |
| choices=[ | |
| ("English", "en"), | |
| ("Hindi", "hi"), | |
| ("French", "fr"), | |
| ("Japanese", "ja"), | |
| ("Chinese", "zh"), | |
| ("Italian", "it"), | |
| ("Spanish", "es"), | |
| ("Portuguese", "pt") | |
| ], | |
| value="en" | |
| ) | |
| # Using type="filepath" passes the temp file path directly to our cloner | |
| ref_audio_input = gr.Audio( | |
| label="3. Reference Voice (Upload or Record)", | |
| type="filepath" | |
| ) | |
| submit_btn = gr.Button("🚀 Generate Clone", variant="primary") | |
| # RIGHT COLUMN: Outputs and Instructions | |
| with gr.Column(scale=1): | |
| output_audio = gr.Audio( | |
| label="Generated Cloned Audio", | |
| interactive=False, | |
| autoplay=False | |
| ) | |
| gr.Markdown( | |
| """ | |
| <br> | |
| ### 💡 Tips for Best Results: | |
| * **Clean Audio:** Use a reference audio clip without background noise or music. | |
| * **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot. | |
| * **Language Match:** Make sure the selected language matches the text you typed! | |
| * **First Run:** The very first generation might take a few extra seconds while the models allocate memory. | |
| """ | |
| ) | |
| # 3. Wire the button to the function | |
| submit_btn.click( | |
| fn=clone_voice, | |
| inputs=[text_input, lang_input, ref_audio_input], | |
| outputs=output_audio | |
| ) | |
| # 4. Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |