TTS-LipSync / app.py
Torchem's picture
Update app.py
11ffdd7 verified
import gradio as gr
from transformers import pipeline
# Maya1 TTS pipeline
pipe = pipeline("text-to-speech", model="maya-research/maya1", device="cuda")
def generate_tts(text, voice_description):
# Combine text and the voice style prompt
full_prompt = f"{voice_description} {text}"
out = pipe(full_prompt)
audio = out["audio"]
# The pipeline outputs sample rate = 44100 by default
return (44100, audio)
# Default Torch Em voice description
DEFAULT_VOICE = "<male><40yo><deep><baritone><stern><aggressive>"
with gr.Blocks() as demo:
gr.Markdown("## πŸ”₯ Maya1 TTS Demo for Torch Em (GPU Required)")
text = gr.Textbox(
label="Text Input",
value="This is Torch Em β€” and I'm ready to light this up.",
lines=3
)
voice_desc = gr.Textbox(
label="Voice Description",
value=DEFAULT_VOICE,
lines=2,
info="Use emotion tags: <angry> <calm> <shout> <laugh> <sad> <sarcastic> etc."
)
audio_out = gr.Audio(label="Generated Audio")
btn = gr.Button("Generate")
btn.click(fn=generate_tts, inputs=[text, voice_desc], outputs=audio_out)
demo.launch()