FLENclone / app.py
PatnaikAshish's picture
Update app.py
1e2b6b3 verified
import gradio as gr
from core.cloner import KokoClone
# 1. Initialize the cloner globally so models load only once when the server starts
print("Loading KokoClone models for the Web UI...")
cloner = KokoClone()
def clone_voice(text, lang, ref_audio_path):
"""Gradio prediction function."""
if not text or not text.strip():
raise gr.Error("Please enter some text.")
if not ref_audio_path:
raise gr.Error("Please upload or record a reference audio file.")
output_file = "gradio_output.wav"
try:
# Call the core engine
cloner.generate(
text=text,
lang=lang,
reference_audio=ref_audio_path,
output_path=output_file
)
return output_file
except Exception as e:
raise gr.Error(f"An error occurred during generation: {str(e)}")
# 2. Build the Gradio UI using Blocks
with gr.Blocks() as demo:
# Using gr.HTML for the header ensures CSS styles like text-align are respected
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h1 style="margin: 0;">🎧 KokoClone</h1>
<p style="margin: 10px 0; color: #666;">
Voice Cloning, Now Inside Kokoro.<br>
Generate natural multilingual speech and clone any target voice with ease.<br>
<i>Built on Kokoro TTS.</i>
</p>
</div>
""")
with gr.Row():
# LEFT COLUMN: Inputs
with gr.Column(scale=1):
text_input = gr.Textbox(
label="1. Text to Synthesize",
lines=4,
placeholder="Enter the text you want spoken..."
)
lang_input = gr.Dropdown(
label="2. Language",
choices=[
("English", "en"),
("Hindi", "hi"),
("French", "fr"),
("Japanese", "ja"),
("Chinese", "zh"),
("Italian", "it"),
("Spanish", "es"),
("Portuguese", "pt")
],
value="en"
)
# Using type="filepath" passes the temp file path directly to our cloner
ref_audio_input = gr.Audio(
label="3. Reference Voice (Upload or Record)",
type="filepath"
)
submit_btn = gr.Button("🚀 Generate Clone", variant="primary")
# RIGHT COLUMN: Outputs and Instructions
with gr.Column(scale=1):
output_audio = gr.Audio(
label="Generated Cloned Audio",
interactive=False,
autoplay=False
)
gr.Markdown(
"""
<br>
### 💡 Tips for Best Results:
* **Clean Audio:** Use a reference audio clip without background noise or music.
* **Length:** A reference clip of 3 to 10 seconds is usually the sweet spot.
* **Language Match:** Make sure the selected language matches the text you typed!
* **First Run:** The very first generation might take a few extra seconds while the models allocate memory.
"""
)
# 3. Wire the button to the function
submit_btn.click(
fn=clone_voice,
inputs=[text_input, lang_input, ref_audio_input],
outputs=output_audio
)
# 4. Launch the app
if __name__ == "__main__":
demo.launch()