import gradio as gr import torch from transformers import AutoProcessor, AutoModelForTextToWaveform import scipy.io.wavfile as wavfile import numpy as np import tempfile # ----------------------------- # Load Bark Model + Processor # ----------------------------- device = "cuda" if torch.cuda.is_available() else "cpu" processor = AutoProcessor.from_pretrained("suno/bark") model = AutoModelForTextToWaveform.from_pretrained( "suno/bark", torch_dtype=torch.float16 if device == "cuda" else torch.float32, ).to(device) # ----------------------------- # Voice Presets (Bark v2 manual list) # ----------------------------- voices = [ "v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3", "v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_7", "v2/en_speaker_8", "v2/en_speaker_9", ] voices = sorted(voices) # ----------------------------- # Audio Generation Function # ----------------------------- def generate_audio(text, voice): if not text.strip(): return None # Prepare inputs inputs = processor(text, voice_preset=voice) # Move tensors to model.device for k, v in inputs.items(): if torch.is_tensor(v): inputs[k] = v.to(device) # Generate waveform with torch.no_grad(): audio = model.generate(**inputs) # Convert to numpy audio = audio.cpu().numpy().squeeze() sample_rate = 24000 # Save temporary WAV file for Gradio output with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: wavfile.write(fp.name, sample_rate, audio) return fp.name # ----------------------------- # Gradio Interface # ----------------------------- demo = gr.Interface( fn=generate_audio, inputs=[ gr.Textbox( label="Prompt", placeholder="Type what you want Bark to say or sing...", lines=3 ), gr.Dropdown( voices, label="Voice Preset", value="v2/en_speaker_6" ), ], outputs=gr.Audio(label="Generated Audio"), title="🎤 Bark Text-to-Audio (Suno, via HuggingFace Transformers)", description="Generates speech, singing, music, and sound effects using the open-source Bark model.", ) # ----------------------------- # Launch # ----------------------------- demo.launch()