|
|
import re |
|
|
import os |
|
|
import io |
|
|
import tempfile |
|
|
import gradio as gr |
|
|
from elevenlabs import ElevenLabs |
|
|
|
|
|
|
|
|
client = ElevenLabs( |
|
|
api_key=os.getenv("ELEVEN_API_KEY") |
|
|
) |
|
|
|
|
|
def filename_from_text(text, max_words=4): |
|
|
""" |
|
|
Cleans punctuation from the text, extracts the first 'max_words' words, |
|
|
and returns a dash-joined string. e.g. "hi-how-are-you.mp3" |
|
|
""" |
|
|
cleaned_text = re.sub(r"[^\w\s]", "", text) |
|
|
words = cleaned_text.split() |
|
|
if not words: |
|
|
return "output.mp3" |
|
|
first_n_words = words[:max_words] |
|
|
return "-".join(first_n_words).lower() + ".mp3" |
|
|
|
|
|
def generate_tts(my_text): |
|
|
""" |
|
|
Use the ElevenLabs TTS client to generate an MP3 in memory, |
|
|
then store it to a named file for download, and also provide |
|
|
an audio preview (path or in-memory bytes). |
|
|
""" |
|
|
|
|
|
audio_chunks = client.text_to_speech.convert( |
|
|
voice_id="XKey9vV4E16dmR6ojpyy", |
|
|
output_format="mp3_44100_128", |
|
|
text=my_text, |
|
|
model_id="eleven_multilingual_v2", |
|
|
) |
|
|
|
|
|
|
|
|
mp3_bytes = io.BytesIO() |
|
|
for chunk in audio_chunks: |
|
|
mp3_bytes.write(chunk) |
|
|
mp3_bytes.seek(0) |
|
|
|
|
|
|
|
|
tmp_dir = tempfile.mkdtemp() |
|
|
out_filename = filename_from_text(my_text) |
|
|
file_path = os.path.join(tmp_dir, out_filename) |
|
|
|
|
|
|
|
|
with open(file_path, "wb") as f: |
|
|
f.write(mp3_bytes.getbuffer()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return file_path, file_path |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_tts, |
|
|
inputs="text", |
|
|
outputs=[ |
|
|
gr.Audio(type="filepath", label="Audio Preview"), |
|
|
gr.File(label="Download MP3") |
|
|
], |
|
|
title="Ava Frigg Text-to-Speech", |
|
|
description="Enter text, generate speech, preview in the browser, and download the MP3 with a custom filename. If speech" |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |