import torch import gradio as gr from diffusers import StableAudioPipeline import scipy.io.wavfile as wavfile import gc device = "cuda" if torch.cuda.is_available() else "cpu" pipe = StableAudioPipeline.from_pretrained( "stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16 if device == "cuda" else torch.float32 ) pipe = pipe.to(device) pipe.enable_attention_slicing() def generate_audio(prompt, duration): with torch.no_grad(): output = pipe( prompt=prompt, num_inference_steps=100, # reduced for stability audio_end_in_s=duration ) audio = output.audios[0] sample_rate = 44100 file_path = "generated_audio.wav" wavfile.write(file_path, sample_rate, audio.T) del output torch.cuda.empty_cache() gc.collect() return file_path iface = gr.Interface( fn=generate_audio, inputs=[ gr.Textbox(label="Sound Prompt"), gr.Slider(5, 20, value=10, label="Duration (seconds)") ], outputs=gr.Audio(type="filepath"), title="Stable Audio Generator", ) if __name__ == "__main__": iface.launch()