Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,29 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import pipeline
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def generate_speech(text):
|
| 9 |
# Generate speech with the provided text
|
| 10 |
-
speech = synthesiser(text, forward_params={"do_sample": True})
|
| 11 |
# Return audio data and sampling rate for Gradio
|
| 12 |
return speech["sampling_rate"], speech["audio"]
|
| 13 |
|
|
@@ -16,8 +32,8 @@ iface = gr.Interface(
|
|
| 16 |
fn=generate_speech,
|
| 17 |
inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
|
| 18 |
outputs=gr.Audio(type="numpy"),
|
| 19 |
-
title="Text-to-Speech with Suno/Bark",
|
| 20 |
-
description="Enter text to generate speech using the Suno/Bark model."
|
| 21 |
)
|
| 22 |
|
| 23 |
# Launch the app
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import pipeline, BarkModel, AutoProcessor
|
| 3 |
+
import torch
|
| 4 |
+
from optimum.bettertransformer import BetterTransformer
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
+
# Check for GPU availability
|
| 8 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
|
| 10 |
+
# Initialize the text-to-speech pipeline with the smaller model and fp16
|
| 11 |
+
synthesiser = pipeline(
|
| 12 |
+
"text-to-speech",
|
| 13 |
+
model="suno/bark-small", # Use smaller model for faster inference
|
| 14 |
+
device=device,
|
| 15 |
+
torch_dtype=torch.float16, # Enable half-precision for speed
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# Convert model to BetterTransformer for kernel fusion
|
| 19 |
+
synthesiser.model = BetterTransformer.transform(synthesiser.model, keep_original_model=False)
|
| 20 |
+
|
| 21 |
+
# Optional: Enable CPU offloading for low VRAM (uncomment if needed)
|
| 22 |
+
# synthesiser.model.enable_cpu_offload()
|
| 23 |
|
| 24 |
def generate_speech(text):
|
| 25 |
# Generate speech with the provided text
|
| 26 |
+
speech = synthesiser(text, forward_params={"do_sample": True, "fine_temperature": 0.4, "coarse_temperature": 0.8})
|
| 27 |
# Return audio data and sampling rate for Gradio
|
| 28 |
return speech["sampling_rate"], speech["audio"]
|
| 29 |
|
|
|
|
| 32 |
fn=generate_speech,
|
| 33 |
inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
|
| 34 |
outputs=gr.Audio(type="numpy"),
|
| 35 |
+
title="Text-to-Speech with Suno/Bark-Small",
|
| 36 |
+
description="Enter text to generate speech using the optimized Suno/Bark-Small model."
|
| 37 |
)
|
| 38 |
|
| 39 |
# Launch the app
|