File size: 1,750 Bytes
3e25238
 
 
 
9a53a39
3e25238
 
 
 
 
9a53a39
 
3e25238
9a53a39
 
 
3e25238
 
 
 
 
 
962c699
3e25238
 
 
 
 
9a53a39
3e25238
9a53a39
 
3e25238
 
 
 
 
 
 
 
 
 
 
9a53a39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import torch
from diffusers import AudioLDMPipeline

print("Loading AudioLDM Model...")
repo_id = "cvssp/audioldm-s-full-v2"
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)

def generate_audio(text_prompt, duration, guidance):
    print(f"Generating sound for: {text_prompt}")
    
    # --- THE FIX: Increased steps & added a Negative Prompt ---
    audio = pipe(
        prompt=text_prompt, 
        negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio
        num_inference_steps=25, # Increased from 15 to 25 for better quality
        audio_length_in_s=duration, 
        guidance_scale=guidance
    ).audios[0]
    
    return (16000, audio)

with gr.Blocks() as app:
    gr.Markdown("# 🎬 The Foley Artist AI")
    gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
    
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room")
            duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
            # Tweaked the default guidance scale to 3.0 which often works better
            guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)")
            generate_btn = gr.Button("Generate Audio", variant="primary")
            
        with gr.Column():
            audio_output = gr.Audio(label="Generated Sound Effect")
            
    generate_btn.click(
        generate_audio, 
        inputs=[prompt, duration, guidance], 
        outputs=[audio_output]
    )

app.launch(ssr_mode=False)