import os import torch import soundfile as sf from huggingface_hub import login from diffusers import StableAudioPipeline import gradio as gr import spaces # Load Hugging Face token securely HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN") if HUGGINGFACE_TOKEN is None: raise ValueError("Missing Hugging Face token. Please set it in Spaces Secrets.") login(HUGGINGFACE_TOKEN) # Set device for PyTorch (only CPU, if no GPU is available) device = "cpu" torch_dtype = torch.float32 # Use float32 for CPU by default # Check for GPU availability if torch.cuda.is_available(): device = "cuda" torch_dtype = torch.float16 # Use float16 for GPU to optimize memory usage # Load the pipeline pipe = StableAudioPipeline.from_pretrained( "stabilityai/stable-audio-open-1.0", torch_dtype=torch_dtype ) pipe = pipe.to(device) # Function to generate audio @spaces.GPU def generate_audio(prompt, negative_prompt, duration, diffusion_steps, seed): generator = torch.Generator(device).manual_seed(seed) audio_output = pipe( prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=int(diffusion_steps), # Number of diffusion steps audio_end_in_s=duration, num_waveforms_per_prompt=1, generator=generator ).audios output_audio = audio_output[0].T.float().cpu().numpy() output_file = "output.wav" sf.write(output_file, output_audio, pipe.vae.sampling_rate) return output_file # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🎧 Stable Audio Open - Audio Generation 🎼") gr.Markdown("### Adjust prompts, duration, and diffusion steps to control the generation!") # Input Section with gr.Row(): prompt_input = gr.Textbox(label="Prompt", value="The sound of a hammer hitting a wooden surface.") negative_input = gr.Textbox(label="Negative Prompt", value="Low quality.") with gr.Row(): duration_input = gr.Slider(minimum=1, maximum=10, step=0.5, value=1, label="Duration (seconds)") diffusion_steps_input = gr.Slider(minimum=1, maximum=500, step=10, value=10, label="Diffusion Steps") with gr.Row(): seed_input = gr.Number(label="Random Seed", value=42) # Output Section generate_button = gr.Button("Generate Audio") output_audio = gr.Audio(label="Generated Audio", type="filepath") # Connect the function to the button click generate_button.click( generate_audio, inputs=[prompt_input, negative_input, duration_input, diffusion_steps_input, seed_input], outputs=output_audio ) # Launch the app demo.launch()