Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,34 +2,34 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
from diffusers import AudioLDMPipeline
|
| 4 |
|
| 5 |
-
print("Loading AudioLDM Model
|
| 6 |
-
# Using the small model to fit in the free tier RAM
|
| 7 |
repo_id = "cvssp/audioldm-s-full-v2"
|
| 8 |
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
|
| 9 |
|
| 10 |
def generate_audio(text_prompt, duration, guidance):
|
| 11 |
print(f"Generating sound for: {text_prompt}")
|
| 12 |
-
|
|
|
|
| 13 |
audio = pipe(
|
| 14 |
-
text_prompt,
|
| 15 |
-
|
|
|
|
| 16 |
audio_length_in_s=duration,
|
| 17 |
guidance_scale=guidance
|
| 18 |
).audios[0]
|
| 19 |
|
| 20 |
-
# Diffusers outputs audio at 16000Hz sampling rate
|
| 21 |
return (16000, audio)
|
| 22 |
|
| 23 |
-
# FIX 1: Removed theme from Blocks()
|
| 24 |
with gr.Blocks() as app:
|
| 25 |
gr.Markdown("# 🎬 The Foley Artist AI")
|
| 26 |
gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
|
| 27 |
|
| 28 |
with gr.Row():
|
| 29 |
with gr.Column():
|
| 30 |
-
prompt = gr.Textbox(label="Describe the Sound
|
| 31 |
duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
|
| 32 |
-
|
|
|
|
| 33 |
generate_btn = gr.Button("Generate Audio", variant="primary")
|
| 34 |
|
| 35 |
with gr.Column():
|
|
@@ -41,5 +41,4 @@ with gr.Blocks() as app:
|
|
| 41 |
outputs=[audio_output]
|
| 42 |
)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
app.launch(theme=gr.themes.Monochrome(), ssr_mode=False)
|
|
|
|
| 2 |
import torch
|
| 3 |
from diffusers import AudioLDMPipeline
|
| 4 |
|
| 5 |
+
print("Loading AudioLDM Model...")
|
|
|
|
| 6 |
repo_id = "cvssp/audioldm-s-full-v2"
|
| 7 |
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
|
| 8 |
|
| 9 |
def generate_audio(text_prompt, duration, guidance):
|
| 10 |
print(f"Generating sound for: {text_prompt}")
|
| 11 |
+
|
| 12 |
+
# --- THE FIX: Increased steps & added a Negative Prompt ---
|
| 13 |
audio = pipe(
|
| 14 |
+
prompt=text_prompt,
|
| 15 |
+
negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio
|
| 16 |
+
num_inference_steps=25, # Increased from 15 to 25 for better quality
|
| 17 |
audio_length_in_s=duration,
|
| 18 |
guidance_scale=guidance
|
| 19 |
).audios[0]
|
| 20 |
|
|
|
|
| 21 |
return (16000, audio)
|
| 22 |
|
|
|
|
| 23 |
with gr.Blocks() as app:
|
| 24 |
gr.Markdown("# 🎬 The Foley Artist AI")
|
| 25 |
gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
|
| 26 |
|
| 27 |
with gr.Row():
|
| 28 |
with gr.Column():
|
| 29 |
+
prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room")
|
| 30 |
duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
|
| 31 |
+
# Tweaked the default guidance scale to 3.0 which often works better
|
| 32 |
+
guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)")
|
| 33 |
generate_btn = gr.Button("Generate Audio", variant="primary")
|
| 34 |
|
| 35 |
with gr.Column():
|
|
|
|
| 41 |
outputs=[audio_output]
|
| 42 |
)
|
| 43 |
|
| 44 |
+
app.launch(ssr_mode=False)
|
|
|