|
|
import gradio as gr |
|
|
from transformers import BarkModel, AutoProcessor |
|
|
import torch |
|
|
import scipy.io.wavfile as wav |
|
|
|
|
|
|
|
|
processor = AutoProcessor.from_pretrained("suno/bark") |
|
|
model = BarkModel.from_pretrained("suno/bark") |
|
|
|
|
|
def generate(text): |
|
|
if not text: |
|
|
return None |
|
|
|
|
|
inputs = processor(text, return_tensors="pt") |
|
|
audio = model.generate(**inputs) |
|
|
audio = audio.cpu().numpy().squeeze() |
|
|
|
|
|
output_path = "output.wav" |
|
|
wav.write(output_path, 22050, audio) |
|
|
return output_path |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=generate, |
|
|
inputs=gr.Textbox( |
|
|
label="Text", |
|
|
placeholder="Type something to speak..." |
|
|
), |
|
|
outputs=gr.Audio( |
|
|
label="AI Voice", |
|
|
type="filepath" |
|
|
), |
|
|
title="AI Voice Generator (HF Stable)", |
|
|
description="Hugging Face compatible AI voice generator" |
|
|
) |
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |