bark / app.py
drixo's picture
Update app.py
78b083f verified
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForTextToWaveform
import scipy.io.wavfile as wavfile
import numpy as np
import tempfile
# -----------------------------
# Load Bark Model + Processor
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained("suno/bark")
model = AutoModelForTextToWaveform.from_pretrained(
"suno/bark",
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
).to(device)
# -----------------------------
# Voice Presets (Bark v2 manual list)
# -----------------------------
voices = [
"v2/en_speaker_0",
"v2/en_speaker_1",
"v2/en_speaker_2",
"v2/en_speaker_3",
"v2/en_speaker_4",
"v2/en_speaker_5",
"v2/en_speaker_6",
"v2/en_speaker_7",
"v2/en_speaker_8",
"v2/en_speaker_9",
]
voices = sorted(voices)
# -----------------------------
# Audio Generation Function
# -----------------------------
def generate_audio(text, voice):
if not text.strip():
return None
# Prepare inputs
inputs = processor(text, voice_preset=voice)
# Move tensors to model.device
for k, v in inputs.items():
if torch.is_tensor(v):
inputs[k] = v.to(device)
# Generate waveform
with torch.no_grad():
audio = model.generate(**inputs)
# Convert to numpy
audio = audio.cpu().numpy().squeeze()
sample_rate = 24000
# Save temporary WAV file for Gradio output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
wavfile.write(fp.name, sample_rate, audio)
return fp.name
# -----------------------------
# Gradio Interface
# -----------------------------
demo = gr.Interface(
fn=generate_audio,
inputs=[
gr.Textbox(
label="Prompt",
placeholder="Type what you want Bark to say or sing...",
lines=3
),
gr.Dropdown(
voices,
label="Voice Preset",
value="v2/en_speaker_6"
),
],
outputs=gr.Audio(label="Generated Audio"),
title="🎀 Bark Text-to-Audio (Suno, via HuggingFace Transformers)",
description="Generates speech, singing, music, and sound effects using the open-source Bark model.",
)
# -----------------------------
# Launch
# -----------------------------
demo.launch()