Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import uuid # For generating unique temporary file names
|
| 4 |
+
|
| 5 |
+
# Define available speakers (update with real speaker options from the Kokoro-TTS model)
|
| 6 |
+
AVAILABLE_SPEAKERS = ["Speaker 1", "Speaker 2", "Speaker 3"]
|
| 7 |
+
|
| 8 |
+
# Function to interact with Kokoro-TTS API and generate speech
|
| 9 |
+
def generate_tts(text, speaker):
|
| 10 |
+
# Kokoro-TTS Space endpoint
|
| 11 |
+
url = "https://hexgrad-kokoro-tts.hf.space/api/predict"
|
| 12 |
+
payload = {
|
| 13 |
+
"data": [text, speaker] # Send text and speaker selection to API
|
| 14 |
+
}
|
| 15 |
+
response = requests.post(url, json=payload)
|
| 16 |
+
|
| 17 |
+
if response.status_code == 200:
|
| 18 |
+
output = response.json()
|
| 19 |
+
audio_data = output["data"][0] # Get the generated audio binary
|
| 20 |
+
temp_filename = f"output_{uuid.uuid4().hex}.wav" # Generate a unique temporary name
|
| 21 |
+
with open(temp_filename, "wb") as f:
|
| 22 |
+
f.write(audio_data.encode('latin1')) # Decode and save the binary data as a WAV file
|
| 23 |
+
return temp_filename, temp_filename
|
| 24 |
+
else:
|
| 25 |
+
return None, "Error: Unable to generate TTS"
|
| 26 |
+
|
| 27 |
+
# Gradio Interface
|
| 28 |
+
with gr.Blocks() as app:
|
| 29 |
+
gr.Markdown("## Long Text-to-Speech Generator with Kokoro-TTS")
|
| 30 |
+
|
| 31 |
+
with gr.Row():
|
| 32 |
+
input_text = gr.Textbox(label="Enter your text", placeholder="Type or paste your text here...", lines=10)
|
| 33 |
+
speaker_dropdown = gr.Dropdown(choices=AVAILABLE_SPEAKERS, label="Select Speaker")
|
| 34 |
+
|
| 35 |
+
with gr.Row():
|
| 36 |
+
generate_button = gr.Button("Generate Speech")
|
| 37 |
+
|
| 38 |
+
with gr.Row():
|
| 39 |
+
audio_output = gr.Audio(label="Generated Speech", type="file", interactive=False)
|
| 40 |
+
download_button = gr.File(label="Download Audio", file_types=[".wav", ".mp3"])
|
| 41 |
+
|
| 42 |
+
# Link the generate button to the TTS generation function
|
| 43 |
+
generate_button.click(fn=generate_tts, inputs=[input_text, speaker_dropdown], outputs=[audio_output, download_button])
|
| 44 |
+
|
| 45 |
+
# Launch the app
|
| 46 |
+
app.launch()
|