1inkusFace commited on
Commit
6a270df
·
verified ·
1 Parent(s): f84fac1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -36,7 +36,7 @@ print(f"Using device: {device}")
36
  model.to(device,torch.float32)
37
 
38
  @spaces.GPU(duration=60)
39
- def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
40
  print(f"Prompt received: {prompt}")
41
  print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
42
  sample_rate = model_config["sample_rate"]
@@ -50,6 +50,8 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
50
  }]
51
  print(f"Conditioning: {conditioning}")
52
  print("Generating audio...")
 
 
53
  output = generate_diffusion_cond(
54
  model,
55
  steps=steps,
@@ -65,7 +67,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
65
  output = rearrange(output, "b d n -> d (b n)")
66
  # Peak normalize, clip, convert to int16
67
  output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
68
- unique_filename = f"output_{uuid.uuid4().hex}.wav"
69
  print(f"Saving audio to file: {unique_filename}")
70
  torchaudio.save(
71
  unique_filename,
@@ -78,13 +80,15 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
78
  print(f"Audio saved: {unique_filename}")
79
  return unique_filename
80
 
 
81
  interface = gr.Interface(
82
  fn=generate_audio,
83
  inputs=[
84
  gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
85
  gr.Slider(0, 420, value=30, label="Duration in Seconds"),
86
  gr.Slider(10, 420, value=100, step=10, label="Number of Diffusion Steps"),
87
- gr.Slider(1.0, 32.0, value=7.0, step=0.1, label="CFG Scale")
 
88
  ],
89
  outputs=gr.Audio(type="filepath", label="Generated Audio"),
90
  title="Stable Audio Generator",
 
36
  model.to(device,torch.float32)
37
 
38
  @spaces.GPU(duration=60)
39
+ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7, use_bfloat=False):
40
  print(f"Prompt received: {prompt}")
41
  print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
42
  sample_rate = model_config["sample_rate"]
 
50
  }]
51
  print(f"Conditioning: {conditioning}")
52
  print("Generating audio...")
53
+ if use_bfloat==True:
54
+ model.to(torch.bfloat16)
55
  output = generate_diffusion_cond(
56
  model,
57
  steps=steps,
 
67
  output = rearrange(output, "b d n -> d (b n)")
68
  # Peak normalize, clip, convert to int16
69
  output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
70
+ unique_filename = f"output_{uuid.uuid4().hex}.mp3"
71
  print(f"Saving audio to file: {unique_filename}")
72
  torchaudio.save(
73
  unique_filename,
 
80
  print(f"Audio saved: {unique_filename}")
81
  return unique_filename
82
 
83
+
84
  interface = gr.Interface(
85
  fn=generate_audio,
86
  inputs=[
87
  gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
88
  gr.Slider(0, 420, value=30, label="Duration in Seconds"),
89
  gr.Slider(10, 420, value=100, step=10, label="Number of Diffusion Steps"),
90
+ gr.Slider(1.0, 32.0, value=7.0, step=0.1, label="CFG Scale"),
91
+ gr.Checkbox(value=False, label="Use Brainfloat")
92
  ],
93
  outputs=gr.Audio(type="filepath", label="Generated Audio"),
94
  title="Stable Audio Generator",