Spaces:

Mohit0708
/

Foley-Artist

Sleeping

App Files Files Community

Mohit0708 commited on 23 days ago

Commit

9a53a39

verified ·

1 Parent(s): 962c699

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -11

app.py CHANGED Viewed

@@ -2,34 +2,34 @@ import gradio as gr
 import torch
 from diffusers import AudioLDMPipeline
-print("Loading AudioLDM Model (This takes a few minutes on boot)...")
-# Using the small model to fit in the free tier RAM
 repo_id = "cvssp/audioldm-s-full-v2"
 pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
 def generate_audio(text_prompt, duration, guidance):
     print(f"Generating sound for: {text_prompt}")
-    # Number of inference steps reduced slightly for faster CPU generation
     audio = pipe(
-        text_prompt,
-        num_inference_steps=15,
         audio_length_in_s=duration,
         guidance_scale=guidance
     ).audios[0]
-    # Diffusers outputs audio at 16000Hz sampling rate
     return (16000, audio)
-# FIX 1: Removed theme from Blocks()
 with gr.Blocks() as app:
     gr.Markdown("# 🎬 The Foley Artist AI")
     gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
     with gr.Row():
         with gr.Column():
-            prompt = gr.Textbox(label="Describe the Sound (e.g., 'Footsteps on gravel')", lines=2)
             duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
-            guidance = gr.Slider(minimum=1.0, maximum=5.0, value=2.5, step=0.5, label="Guidance Scale (Higher = closer to text)")
             generate_btn = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
@@ -41,5 +41,4 @@ with gr.Blocks() as app:
         outputs=[audio_output]
     )
-# FIX 2: Added theme here and turned off SSR mode to prevent the asyncio crash
-app.launch(theme=gr.themes.Monochrome(), ssr_mode=False)

 import torch
 from diffusers import AudioLDMPipeline
+print("Loading AudioLDM Model...")
 repo_id = "cvssp/audioldm-s-full-v2"
 pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
 def generate_audio(text_prompt, duration, guidance):
     print(f"Generating sound for: {text_prompt}")
+    # --- THE FIX: Increased steps & added a Negative Prompt ---
     audio = pipe(
+        prompt=text_prompt,
+        negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio
+        num_inference_steps=25, # Increased from 15 to 25 for better quality
         audio_length_in_s=duration,
         guidance_scale=guidance
     ).audios[0]
     return (16000, audio)
 with gr.Blocks() as app:
     gr.Markdown("# 🎬 The Foley Artist AI")
     gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
     with gr.Row():
         with gr.Column():
+            prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room")
             duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
+            # Tweaked the default guidance scale to 3.0 which often works better
+            guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)")
             generate_btn = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
         outputs=[audio_output]
     )
+app.launch(ssr_mode=False)