tta / app.py
aishitdharwal's picture
add app
b79c492
import gradio as gr
from transformers import pipeline
import numpy as np
# Initialize the model
pipe = pipeline(model="suno/bark-small")
def text_to_speech(text):
# Generate audio from text
output = pipe(
text,
return_attention_mask=True
)
# Normalize and scale audio to int16 range
audio = output["audio"]
audio = np.float32(audio) # Ensure float32 type
audio = audio / np.max(np.abs(audio)) # Normalize to [-1, 1]
audio = (audio * 32767).astype(np.int16) # Convert to int16 range
return (output["sampling_rate"], audio)
# Create Gradio interface
demo = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(
label="Text to speak",
placeholder="Enter the text you want to convert to speech...",
),
outputs=gr.Audio(label="Generated Speech"),
title="Text to Speech with Bark-small",
description="Convert text to speech using the Suno Bark-small model",
examples=[
["Hey, it's HuggingFace on the phone!"],
["Welcome to my text to speech demo."],
]
)
if __name__ == "__main__":
demo.launch()