Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from diffusers import AudioLDMPipeline | |
| print("Loading AudioLDM Model...") | |
| repo_id = "cvssp/audioldm-s-full-v2" | |
| pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32) | |
| def generate_audio(text_prompt, duration, guidance): | |
| print(f"Generating sound for: {text_prompt}") | |
| # --- THE FIX: Increased steps & added a Negative Prompt --- | |
| audio = pipe( | |
| prompt=text_prompt, | |
| negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio | |
| num_inference_steps=25, # Increased from 15 to 25 for better quality | |
| audio_length_in_s=duration, | |
| guidance_scale=guidance | |
| ).audios[0] | |
| return (16000, audio) | |
| with gr.Blocks() as app: | |
| gr.Markdown("# 🎬 The Foley Artist AI") | |
| gr.Markdown("Generate high-fidelity sound effects from text descriptions.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room") | |
| duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)") | |
| # Tweaked the default guidance scale to 3.0 which often works better | |
| guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)") | |
| generate_btn = gr.Button("Generate Audio", variant="primary") | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Generated Sound Effect") | |
| generate_btn.click( | |
| generate_audio, | |
| inputs=[prompt, duration, guidance], | |
| outputs=[audio_output] | |
| ) | |
| app.launch(ssr_mode=False) |