stardate69's picture
Update app.py
960e153 verified
import os
import torch
import soundfile as sf
from huggingface_hub import login
from diffusers import StableAudioPipeline
import gradio as gr
import spaces
# Load Hugging Face token securely
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if HUGGINGFACE_TOKEN is None:
raise ValueError("Missing Hugging Face token. Please set it in Spaces Secrets.")
login(HUGGINGFACE_TOKEN)
# Set device for PyTorch (only CPU, if no GPU is available)
device = "cpu"
torch_dtype = torch.float32 # Use float32 for CPU by default
# Check for GPU availability
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16 # Use float16 for GPU to optimize memory usage
# Load the pipeline
pipe = StableAudioPipeline.from_pretrained(
"stabilityai/stable-audio-open-1.0",
torch_dtype=torch_dtype
)
pipe = pipe.to(device)
# Function to generate audio
@spaces.GPU
def generate_audio(prompt, negative_prompt, duration, diffusion_steps, seed):
generator = torch.Generator(device).manual_seed(seed)
audio_output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=int(diffusion_steps), # Number of diffusion steps
audio_end_in_s=duration,
num_waveforms_per_prompt=1,
generator=generator
).audios
output_audio = audio_output[0].T.float().cpu().numpy()
output_file = "output.wav"
sf.write(output_file, output_audio, pipe.vae.sampling_rate)
return output_file
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🎧 Stable Audio Open - Audio Generation 🎼")
gr.Markdown("### Adjust prompts, duration, and diffusion steps to control the generation!")
# Input Section
with gr.Row():
prompt_input = gr.Textbox(label="Prompt", value="The sound of a hammer hitting a wooden surface.")
negative_input = gr.Textbox(label="Negative Prompt", value="Low quality.")
with gr.Row():
duration_input = gr.Slider(minimum=1, maximum=10, step=0.5, value=1, label="Duration (seconds)")
diffusion_steps_input = gr.Slider(minimum=1, maximum=500, step=10, value=10, label="Diffusion Steps")
with gr.Row():
seed_input = gr.Number(label="Random Seed", value=42)
# Output Section
generate_button = gr.Button("Generate Audio")
output_audio = gr.Audio(label="Generated Audio", type="filepath")
# Connect the function to the button click
generate_button.click(
generate_audio,
inputs=[prompt_input, negative_input, duration_input, diffusion_steps_input, seed_input],
outputs=output_audio
)
# Launch the app
demo.launch()