Spaces:
Running
Running
File size: 3,636 Bytes
fc138bd 1e2b6b3 fc138bd 1e2b6b3 fc138bd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | import gradio as gr
from core.cloner import KokoClone
# 1. Initialize the cloner globally so models load only once when the server starts
print("Loading KokoClone models for the Web UI...")
cloner = KokoClone()
def clone_voice(text, lang, ref_audio_path):
"""Gradio prediction function."""
if not text or not text.strip():
raise gr.Error("Please enter some text.")
if not ref_audio_path:
raise gr.Error("Please upload or record a reference audio file.")
output_file = "gradio_output.wav"
try:
# Call the core engine
cloner.generate(
text=text,
lang=lang,
reference_audio=ref_audio_path,
output_path=output_file
)
return output_file
except Exception as e:
raise gr.Error(f"An error occurred during generation: {str(e)}")
# 2. Build the Gradio UI using Blocks
with gr.Blocks() as demo:
# Using gr.HTML for the header ensures CSS styles like text-align are respected
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h1 style="margin: 0;">🎧 KokoClone</h1>
<p style="margin: 10px 0; color: #666;">
Voice Cloning, Now Inside Kokoro.<br>
Generate natural multilingual speech and clone any target voice with ease.<br>
<i>Built on Kokoro TTS.</i>
</p>
</div>
""")
with gr.Row():
# LEFT COLUMN: Inputs
with gr.Column(scale=1):
text_input = gr.Textbox(
label="1. Text to Synthesize",
lines=4,
placeholder="Enter the text you want spoken..."
)
lang_input = gr.Dropdown(
label="2. Language",
choices=[
("English", "en"),
("Hindi", "hi"),
("French", "fr"),
("Japanese", "ja"),
("Chinese", "zh"),
("Italian", "it"),
("Spanish", "es"),
("Portuguese", "pt")
],
value="en"
)
# Using type="filepath" passes the temp file path directly to our cloner
ref_audio_input = gr.Audio(
label="3. Reference Voice (Upload or Record)",
type="filepath"
)
submit_btn = gr.Button("🚀 Generate Clone", variant="primary")
# RIGHT COLUMN: Outputs and Instructions
with gr.Column(scale=1):
output_audio = gr.Audio(
label="Generated Cloned Audio",
interactive=False,
autoplay=False
)
gr.Markdown(
"""
<br>
### 💡 Tips for Best Results:
* **Clean Audio:** Use a reference audio clip without background noise or music.
* **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
* **Language Match:** Make sure the selected language matches the text you typed!
* **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
"""
)
# 3. Wire the button to the function
submit_btn.click(
fn=clone_voice,
inputs=[text_input, lang_input, ref_audio_input],
outputs=output_audio
)
# 4. Launch the app
if __name__ == "__main__":
demo.launch() |