Spaces:
Runtime error
Runtime error
Drew
commited on
Commit
·
c879843
1
Parent(s):
929aad9
tests
Browse files- app.py +10 -44
- requirements.txt +5 -1
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import gradio as gr
|
|
| 5 |
import spaces
|
| 6 |
import os
|
| 7 |
import uuid
|
|
|
|
| 8 |
|
| 9 |
# Importing the model-related functions
|
| 10 |
from stable_audio_tools import get_pretrained_model
|
|
@@ -79,8 +80,15 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
|
|
| 79 |
torchaudio.save(unique_filename, output, sample_rate)
|
| 80 |
print(f"Audio saved: {unique_filename}")
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Return the path to the generated audio file
|
| 83 |
-
return
|
| 84 |
|
| 85 |
# Setting up the Gradio Interface
|
| 86 |
interface = gr.Interface(
|
|
@@ -94,49 +102,7 @@ interface = gr.Interface(
|
|
| 94 |
outputs=gr.Audio(type="filepath", label="Generated Audio"),
|
| 95 |
title="Stable Audio Generator",
|
| 96 |
description="Generate variable-length stereo audio at 44.1kHz from text prompts using Stable Audio Open 1.0.",
|
| 97 |
-
|
| 98 |
-
[
|
| 99 |
-
"Create a serene soundscape of a quiet beach at sunset.", # Text prompt
|
| 100 |
-
|
| 101 |
-
45, # Duration in Seconds
|
| 102 |
-
100, # Number of Diffusion Steps
|
| 103 |
-
10, # CFG Scale
|
| 104 |
-
],
|
| 105 |
-
[
|
| 106 |
-
"Generate an energetic and bustling city street scene with distant traffic and close conversations.", # Text prompt
|
| 107 |
-
|
| 108 |
-
30, # Duration in Seconds
|
| 109 |
-
120, # Number of Diffusion Steps
|
| 110 |
-
5, # CFG Scale
|
| 111 |
-
],
|
| 112 |
-
[
|
| 113 |
-
"Simulate a forest ambiance with birds chirping and wind rustling through the leaves.", # Text prompt
|
| 114 |
-
60, # Duration in Seconds
|
| 115 |
-
140, # Number of Diffusion Steps
|
| 116 |
-
7.5, # CFG Scale
|
| 117 |
-
],
|
| 118 |
-
[
|
| 119 |
-
"Recreate a gentle rainfall with distant thunder.", # Text prompt
|
| 120 |
-
|
| 121 |
-
35, # Duration in Seconds
|
| 122 |
-
110, # Number of Diffusion Steps
|
| 123 |
-
8, # CFG Scale
|
| 124 |
-
|
| 125 |
-
],
|
| 126 |
-
[
|
| 127 |
-
"Imagine a jazz cafe environment with soft music and ambient chatter.", # Text prompt
|
| 128 |
-
25, # Duration in Seconds
|
| 129 |
-
90, # Number of Diffusion Steps
|
| 130 |
-
6, # CFG Scale
|
| 131 |
-
|
| 132 |
-
],
|
| 133 |
-
["Rock beat played in a treated studio, session drumming on an acoustic kit.",
|
| 134 |
-
30, # Duration in Seconds
|
| 135 |
-
100, # Number of Diffusion Steps
|
| 136 |
-
7, # CFG Scale
|
| 137 |
-
|
| 138 |
-
]
|
| 139 |
-
])
|
| 140 |
|
| 141 |
|
| 142 |
# Pre-load the model to avoid multiprocessing issues
|
|
|
|
| 5 |
import spaces
|
| 6 |
import os
|
| 7 |
import uuid
|
| 8 |
+
from pydub import AudioSegment
|
| 9 |
|
| 10 |
# Importing the model-related functions
|
| 11 |
from stable_audio_tools import get_pretrained_model
|
|
|
|
| 80 |
torchaudio.save(unique_filename, output, sample_rate)
|
| 81 |
print(f"Audio saved: {unique_filename}")
|
| 82 |
|
| 83 |
+
# Convert WAV to MP3 using pydub without ffmpeg
|
| 84 |
+
audio = AudioSegment.from_wav(unique_filename)
|
| 85 |
+
full_path_mp3 = unique_filename.replace('wav', 'mp3')
|
| 86 |
+
audio.export(full_path_mp3, format="mp3")
|
| 87 |
+
|
| 88 |
+
print(f"Audio converted and saved to MP3: {full_path_mp3}")
|
| 89 |
+
|
| 90 |
# Return the path to the generated audio file
|
| 91 |
+
return audio
|
| 92 |
|
| 93 |
# Setting up the Gradio Interface
|
| 94 |
interface = gr.Interface(
|
|
|
|
| 102 |
outputs=gr.Audio(type="filepath", label="Generated Audio"),
|
| 103 |
title="Stable Audio Generator",
|
| 104 |
description="Generate variable-length stereo audio at 44.1kHz from text prompts using Stable Audio Open 1.0.",
|
| 105 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
# Pre-load the model to avoid multiprocessing issues
|
requirements.txt
CHANGED
|
@@ -9,4 +9,8 @@ torch
|
|
| 9 |
torchaudio
|
| 10 |
stable-audio-tools
|
| 11 |
openai
|
| 12 |
-
pydub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
torchaudio
|
| 10 |
stable-audio-tools
|
| 11 |
openai
|
| 12 |
+
pydub
|
| 13 |
+
git+https://github.com/huggingface/diffusers.git
|
| 14 |
+
transformers
|
| 15 |
+
accelerate
|
| 16 |
+
sentencepiece
|