TTS / app.py
Aranwer's picture
Update app.py
cc74df0 verified
import gradio as gr
from TTS.api import TTS
import tempfile
import os
# Initialize TTS
model_name = "tts_models/en/vctk/vits"
tts = TTS(model_name)
# Custom speaker labels
speaker_labels = {
"p225": "Male, Young Adult",
"p226": "Female, Middle-Aged",
"p227": "Male, Mature Storyteller",
"p228": "Female, Young Adult",
"p229": "Male, Elderly Narrator",
"p230": "Female, Warm Storyteller",
"p231": "Male, Deep Voice",
"p232": "Female, Clear Articulation",
"p233": "Male, Authoritative",
"p234": "Female, Gentle Storyteller"
}
# Filter available speakers
available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
def text_to_speech(text, speaker_name, speed, pitch):
try:
if not text.strip():
raise ValueError("Please enter some text")
# Generate temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
tts.tts_to_file(
text=text,
speaker=speaker_name,
file_path=f.name,
speed=speed
)
output_path = f.name
# Adjust pitch using sox if needed
if pitch != 0.0:
try:
import sox
tfm = sox.Transformer()
tfm.pitch(pitch)
adjusted_file = output_path + "_adjusted.wav"
tfm.build_file(output_path, adjusted_file)
os.replace(adjusted_file, output_path)
except ImportError:
print("Sox not installed; skipping pitch adjustment.")
return output_path
except Exception as e:
raise gr.Error(f"Error generating speech: {str(e)}")
def create_download_link(audio_file):
if audio_file and os.path.exists(audio_file):
return gr.update(visible=True, value=audio_file)
return gr.update(visible=False)
with gr.Blocks(title="Storytelling TTS App") as app:
gr.Markdown("# πŸŽ™οΈ Professional Storytelling Text-to-Speech")
gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter your story text",
lines=8,
placeholder="Once upon a time..."
)
speaker = gr.Dropdown(
choices=available_speakers,
label="Narrator Voice",
value="p227",
format_func=lambda x: speaker_labels[x]
)
with gr.Accordion("πŸŽ›οΈ Voice Adjustment", open=True):
speed = gr.Slider(
minimum=0.5, maximum=2.0,
value=1.0, step=0.1,
label="Speaking Rate",
info="1.0 = normal speed"
)
pitch = gr.Slider(
minimum=-5.0, maximum=5.0,
value=0.0, step=0.5,
label="Pitch Shift (in semitones)",
info="0 = normal, positive = higher pitch"
)
generate_btn = gr.Button("🎧 Generate Narration", variant="primary")
with gr.Column():
audio_output = gr.Audio(
label="Generated Narration",
type="filepath",
elem_classes=["output-audio"]
)
download_button = gr.DownloadButton(
label="Download Audio", visible=False
)
with gr.Accordion("🎀 Preview Narrator Voices (Samples Coming Soon)", open=False):
gr.Markdown("Previews will be available here once sample audios are added.")
for speaker_id in available_speakers[:3]:
gr.Audio(
value=None,
label=speaker_labels[speaker_id],
visible=False # Set to True and provide file path or URL to enable
)
generate_btn.click(
fn=text_to_speech,
inputs=[text_input, speaker, speed, pitch],
outputs=audio_output
).then(
fn=create_download_link,
inputs=audio_output,
outputs=download_button
)
gr.Examples(
examples=[
["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
],
inputs=[text_input, speaker, speed, pitch],
outputs=audio_output,
fn=text_to_speech,
cache_examples=False
)
if __name__ == "__main__":
try:
import sox
except ImportError:
print("Consider installing sox for pitch adjustment: pip install sox")
app.launch()