Update app.py
Browse files
app.py
CHANGED
|
@@ -36,7 +36,7 @@ print(f"Using device: {device}")
|
|
| 36 |
model.to(device,torch.float32)
|
| 37 |
|
| 38 |
@spaces.GPU(duration=60)
|
| 39 |
-
def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
|
| 40 |
print(f"Prompt received: {prompt}")
|
| 41 |
print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
|
| 42 |
sample_rate = model_config["sample_rate"]
|
|
@@ -50,6 +50,8 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
|
|
| 50 |
}]
|
| 51 |
print(f"Conditioning: {conditioning}")
|
| 52 |
print("Generating audio...")
|
|
|
|
|
|
|
| 53 |
output = generate_diffusion_cond(
|
| 54 |
model,
|
| 55 |
steps=steps,
|
|
@@ -65,7 +67,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
|
|
| 65 |
output = rearrange(output, "b d n -> d (b n)")
|
| 66 |
# Peak normalize, clip, convert to int16
|
| 67 |
output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
|
| 68 |
-
unique_filename = f"output_{uuid.uuid4().hex}.
|
| 69 |
print(f"Saving audio to file: {unique_filename}")
|
| 70 |
torchaudio.save(
|
| 71 |
unique_filename,
|
|
@@ -78,13 +80,15 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
|
|
| 78 |
print(f"Audio saved: {unique_filename}")
|
| 79 |
return unique_filename
|
| 80 |
|
|
|
|
| 81 |
interface = gr.Interface(
|
| 82 |
fn=generate_audio,
|
| 83 |
inputs=[
|
| 84 |
gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
|
| 85 |
gr.Slider(0, 420, value=30, label="Duration in Seconds"),
|
| 86 |
gr.Slider(10, 420, value=100, step=10, label="Number of Diffusion Steps"),
|
| 87 |
-
gr.Slider(1.0, 32.0, value=7.0, step=0.1, label="CFG Scale")
|
|
|
|
| 88 |
],
|
| 89 |
outputs=gr.Audio(type="filepath", label="Generated Audio"),
|
| 90 |
title="Stable Audio Generator",
|
|
|
|
| 36 |
model.to(device,torch.float32)
|
| 37 |
|
| 38 |
@spaces.GPU(duration=60)
|
| 39 |
+
def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7, use_bfloat=False):
|
| 40 |
print(f"Prompt received: {prompt}")
|
| 41 |
print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
|
| 42 |
sample_rate = model_config["sample_rate"]
|
|
|
|
| 50 |
}]
|
| 51 |
print(f"Conditioning: {conditioning}")
|
| 52 |
print("Generating audio...")
|
| 53 |
+
if use_bfloat==True:
|
| 54 |
+
model.to(torch.bfloat16)
|
| 55 |
output = generate_diffusion_cond(
|
| 56 |
model,
|
| 57 |
steps=steps,
|
|
|
|
| 67 |
output = rearrange(output, "b d n -> d (b n)")
|
| 68 |
# Peak normalize, clip, convert to int16
|
| 69 |
output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
|
| 70 |
+
unique_filename = f"output_{uuid.uuid4().hex}.mp3"
|
| 71 |
print(f"Saving audio to file: {unique_filename}")
|
| 72 |
torchaudio.save(
|
| 73 |
unique_filename,
|
|
|
|
| 80 |
print(f"Audio saved: {unique_filename}")
|
| 81 |
return unique_filename
|
| 82 |
|
| 83 |
+
|
| 84 |
interface = gr.Interface(
|
| 85 |
fn=generate_audio,
|
| 86 |
inputs=[
|
| 87 |
gr.Textbox(label="Prompt", placeholder="Enter your text prompt here"),
|
| 88 |
gr.Slider(0, 420, value=30, label="Duration in Seconds"),
|
| 89 |
gr.Slider(10, 420, value=100, step=10, label="Number of Diffusion Steps"),
|
| 90 |
+
gr.Slider(1.0, 32.0, value=7.0, step=0.1, label="CFG Scale"),
|
| 91 |
+
gr.Checkbox(value=False, label="Use Brainfloat")
|
| 92 |
],
|
| 93 |
outputs=gr.Audio(type="filepath", label="Generated Audio"),
|
| 94 |
title="Stable Audio Generator",
|