Mohit0708 commited on
Commit
9a53a39
·
verified ·
1 Parent(s): 962c699

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -2,34 +2,34 @@ import gradio as gr
2
  import torch
3
  from diffusers import AudioLDMPipeline
4
 
5
- print("Loading AudioLDM Model (This takes a few minutes on boot)...")
6
- # Using the small model to fit in the free tier RAM
7
  repo_id = "cvssp/audioldm-s-full-v2"
8
  pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
9
 
10
  def generate_audio(text_prompt, duration, guidance):
11
  print(f"Generating sound for: {text_prompt}")
12
- # Number of inference steps reduced slightly for faster CPU generation
 
13
  audio = pipe(
14
- text_prompt,
15
- num_inference_steps=15,
 
16
  audio_length_in_s=duration,
17
  guidance_scale=guidance
18
  ).audios[0]
19
 
20
- # Diffusers outputs audio at 16000Hz sampling rate
21
  return (16000, audio)
22
 
23
- # FIX 1: Removed theme from Blocks()
24
  with gr.Blocks() as app:
25
  gr.Markdown("# 🎬 The Foley Artist AI")
26
  gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
27
 
28
  with gr.Row():
29
  with gr.Column():
30
- prompt = gr.Textbox(label="Describe the Sound (e.g., 'Footsteps on gravel')", lines=2)
31
  duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
32
- guidance = gr.Slider(minimum=1.0, maximum=5.0, value=2.5, step=0.5, label="Guidance Scale (Higher = closer to text)")
 
33
  generate_btn = gr.Button("Generate Audio", variant="primary")
34
 
35
  with gr.Column():
@@ -41,5 +41,4 @@ with gr.Blocks() as app:
41
  outputs=[audio_output]
42
  )
43
 
44
- # FIX 2: Added theme here and turned off SSR mode to prevent the asyncio crash
45
- app.launch(theme=gr.themes.Monochrome(), ssr_mode=False)
 
2
  import torch
3
  from diffusers import AudioLDMPipeline
4
 
5
+ print("Loading AudioLDM Model...")
 
6
  repo_id = "cvssp/audioldm-s-full-v2"
7
  pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float32)
8
 
9
  def generate_audio(text_prompt, duration, guidance):
10
  print(f"Generating sound for: {text_prompt}")
11
+
12
+ # --- THE FIX: Increased steps & added a Negative Prompt ---
13
  audio = pipe(
14
+ prompt=text_prompt,
15
+ negative_prompt="low quality, static, noise, distorted, background noise, messy", # Cleans the audio
16
+ num_inference_steps=25, # Increased from 15 to 25 for better quality
17
  audio_length_in_s=duration,
18
  guidance_scale=guidance
19
  ).audios[0]
20
 
 
21
  return (16000, audio)
22
 
 
23
  with gr.Blocks() as app:
24
  gr.Markdown("# 🎬 The Foley Artist AI")
25
  gr.Markdown("Generate high-fidelity sound effects from text descriptions.")
26
 
27
  with gr.Row():
28
  with gr.Column():
29
+ prompt = gr.Textbox(label="Describe the Sound", lines=2, placeholder="e.g., A dog barking loudly in an empty room")
30
  duration = gr.Slider(minimum=2.5, maximum=5.0, value=2.5, step=2.5, label="Duration (Seconds)")
31
+ # Tweaked the default guidance scale to 3.0 which often works better
32
+ guidance = gr.Slider(minimum=1.0, maximum=5.0, value=3.0, step=0.5, label="Guidance Scale (How strictly to follow text)")
33
  generate_btn = gr.Button("Generate Audio", variant="primary")
34
 
35
  with gr.Column():
 
41
  outputs=[audio_output]
42
  )
43
 
44
+ app.launch(ssr_mode=False)