Spaces:
Sleeping
Sleeping
File size: 1,750 Bytes
3e25238 9a53a39 3e25238 9a53a39 3e25238 9a53a39 3e25238 962c699 3e25238 9a53a39 3e25238 9a53a39 3e25238 9a53a39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | import gradio as gr
import torch
from diffusers import AudioLDMPipeline
print("Loading AudioLDM Model...")
repo_id = "cvssp/audioldm-s-full-v2"
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
def generate_audio(text_prompt, duration, guidance):
print(f"Generating sound for: {text_prompt}")
# --- THE FIX: Increased steps & added a Negative Prompt ---
audio = pipe(
prompt=text_prompt,
negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio
num_inference_steps=25, # Increased from 15 to 25 for better quality
audio_length_in_s=duration,
guidance_scale=guidance
).audios[0]
return (16000, audio)
with gr.Blocks() as app:
gr.Markdown("# 🎬 The Foley Artist AI")
gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room")
duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
# Tweaked the default guidance scale to 3.0 which often works better
guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)")
generate_btn = gr.Button("Generate Audio", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Generated Sound Effect")
generate_btn.click(
generate_audio,
inputs=[prompt, duration, guidance],
outputs=[audio_output]
)
app.launch(ssr_mode=False) |