Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import numpy as np | |
| # Initialize the model | |
| pipe = pipeline(model="suno/bark-small") | |
| def text_to_speech(text): | |
| # Generate audio from text | |
| output = pipe( | |
| text, | |
| return_attention_mask=True | |
| ) | |
| # Normalize and scale audio to int16 range | |
| audio = output["audio"] | |
| audio = np.float32(audio) # Ensure float32 type | |
| audio = audio / np.max(np.abs(audio)) # Normalize to [-1, 1] | |
| audio = (audio * 32767).astype(np.int16) # Convert to int16 range | |
| return (output["sampling_rate"], audio) | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=text_to_speech, | |
| inputs=gr.Textbox( | |
| label="Text to speak", | |
| placeholder="Enter the text you want to convert to speech...", | |
| ), | |
| outputs=gr.Audio(label="Generated Speech"), | |
| title="Text to Speech with Bark-small", | |
| description="Convert text to speech using the Suno Bark-small model", | |
| examples=[ | |
| ["Hey, it's HuggingFace on the phone!"], | |
| ["Welcome to my text to speech demo."], | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |