File size: 5,037 Bytes
cc74df0 3915525 3681b2d 3915525 cc74df0 3915525 cc74df0 20c432f 3915525 cc74df0 20c432f 3915525 3681b2d cc74df0 3915525 20c432f cc74df0 20c432f cc74df0 3915525 3681b2d 3915525 3681b2d cc74df0 3681b2d 20c432f cc74df0 3681b2d cc74df0 20c432f cc74df0 20c432f cc74df0 20c432f cc74df0 20c432f cc74df0 20c432f cc74df0 20c432f cc74df0 20c432f cc74df0 3681b2d 20c432f cc74df0 20c432f cc74df0 3681b2d 20c432f 3681b2d cc74df0 3681b2d cc74df0 3681b2d 20c432f 3681b2d 20c432f 3681b2d 20c432f 3681b2d 3915525 20c432f 3681b2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
from TTS.api import TTS
import tempfile
import os
# Initialize TTS
model_name = "tts_models/en/vctk/vits"
tts = TTS(model_name)
# Custom speaker labels
speaker_labels = {
"p225": "Male, Young Adult",
"p226": "Female, Middle-Aged",
"p227": "Male, Mature Storyteller",
"p228": "Female, Young Adult",
"p229": "Male, Elderly Narrator",
"p230": "Female, Warm Storyteller",
"p231": "Male, Deep Voice",
"p232": "Female, Clear Articulation",
"p233": "Male, Authoritative",
"p234": "Female, Gentle Storyteller"
}
# Filter available speakers
available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
def text_to_speech(text, speaker_name, speed, pitch):
try:
if not text.strip():
raise ValueError("Please enter some text")
# Generate temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
tts.tts_to_file(
text=text,
speaker=speaker_name,
file_path=f.name,
speed=speed
)
output_path = f.name
# Adjust pitch using sox if needed
if pitch != 0.0:
try:
import sox
tfm = sox.Transformer()
tfm.pitch(pitch)
adjusted_file = output_path + "_adjusted.wav"
tfm.build_file(output_path, adjusted_file)
os.replace(adjusted_file, output_path)
except ImportError:
print("Sox not installed; skipping pitch adjustment.")
return output_path
except Exception as e:
raise gr.Error(f"Error generating speech: {str(e)}")
def create_download_link(audio_file):
if audio_file and os.path.exists(audio_file):
return gr.update(visible=True, value=audio_file)
return gr.update(visible=False)
with gr.Blocks(title="Storytelling TTS App") as app:
gr.Markdown("# ๐๏ธ Professional Storytelling Text-to-Speech")
gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter your story text",
lines=8,
placeholder="Once upon a time..."
)
speaker = gr.Dropdown(
choices=available_speakers,
label="Narrator Voice",
value="p227",
format_func=lambda x: speaker_labels[x]
)
with gr.Accordion("๐๏ธ Voice Adjustment", open=True):
speed = gr.Slider(
minimum=0.5, maximum=2.0,
value=1.0, step=0.1,
label="Speaking Rate",
info="1.0 = normal speed"
)
pitch = gr.Slider(
minimum=-5.0, maximum=5.0,
value=0.0, step=0.5,
label="Pitch Shift (in semitones)",
info="0 = normal, positive = higher pitch"
)
generate_btn = gr.Button("๐ง Generate Narration", variant="primary")
with gr.Column():
audio_output = gr.Audio(
label="Generated Narration",
type="filepath",
elem_classes=["output-audio"]
)
download_button = gr.DownloadButton(
label="Download Audio", visible=False
)
with gr.Accordion("๐ค Preview Narrator Voices (Samples Coming Soon)", open=False):
gr.Markdown("Previews will be available here once sample audios are added.")
for speaker_id in available_speakers[:3]:
gr.Audio(
value=None,
label=speaker_labels[speaker_id],
visible=False # Set to True and provide file path or URL to enable
)
generate_btn.click(
fn=text_to_speech,
inputs=[text_input, speaker, speed, pitch],
outputs=audio_output
).then(
fn=create_download_link,
inputs=audio_output,
outputs=download_button
)
gr.Examples(
examples=[
["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
],
inputs=[text_input, speaker, speed, pitch],
outputs=audio_output,
fn=text_to_speech,
cache_examples=False
)
if __name__ == "__main__":
try:
import sox
except ImportError:
print("Consider installing sox for pitch adjustment: pip install sox")
app.launch() |