Spaces:

dskill
/

sd-audio-cpu

Runtime error

App Files Files Community

Drew Skillman commited on Jun 22, 2024

Commit

2a76b54

1 Parent(s): f581b9c

switch to cache model and not use zero

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +10 -4

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	.DS_Store

 .DS_Store
+*.wav
+*.mp3

app.py CHANGED Viewed

@@ -11,16 +11,20 @@ from pydub import AudioSegment
 from stable_audio_tools import get_pretrained_model
 from stable_audio_tools.inference.generation import generate_diffusion_cond
 # Load the model outside of the GPU-decorated function
 def load_model():
     print("Loading model...")
     model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
     print("Model loaded successfully.")
     return model, model_config
 # Function to set up, generate, and process the audio
-@spaces.GPU(duration=120)  # Allocate GPU only when this function is called
 def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
     print(f"Prompt received: {prompt}")
     print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
@@ -32,7 +36,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
     print(f"Hugging Face token: {hf_token}")
     # Use pre-loaded model and configuration
-    model, model_config = load_model()
     sample_rate = model_config["sample_rate"]
     sample_size = model_config["sample_size"]
@@ -79,7 +83,8 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
     # Save to file
     torchaudio.save(unique_filename, output, sample_rate)
     print(f"Audio saved: {unique_filename}")
     # Convert WAV to MP3 using pydub without ffmpeg
     audio = AudioSegment.from_wav(unique_filename)
     full_path_mp3 = unique_filename.replace('wav', 'mp3')
@@ -89,6 +94,7 @@ def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
     # Return the path to the generated audio file
     return full_path_mp3
 # Setting up the Gradio Interface
 interface = gr.Interface(
@@ -117,4 +123,4 @@ with gr.Blocks() as demo:
 # Pre-load the model to avoid multiprocessing issues
 model, model_config = load_model()
-demo.launch()

 from stable_audio_tools import get_pretrained_model
 from stable_audio_tools.inference.generation import generate_diffusion_cond
+global model, model_config
 # Load the model outside of the GPU-decorated function
 def load_model():
+    global model, model_config
     print("Loading model...")
     model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
     print("Model loaded successfully.")
     return model, model_config
 # Function to set up, generate, and process the audio
 def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
+    global model, model_config
     print(f"Prompt received: {prompt}")
     print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
     print(f"Hugging Face token: {hf_token}")
     # Use pre-loaded model and configuration
+    #model, model_config = load_model()
     sample_rate = model_config["sample_rate"]
     sample_size = model_config["sample_size"]
     # Save to file
     torchaudio.save(unique_filename, output, sample_rate)
     print(f"Audio saved: {unique_filename}")
+    return unique_filename
+'''
     # Convert WAV to MP3 using pydub without ffmpeg
     audio = AudioSegment.from_wav(unique_filename)
     full_path_mp3 = unique_filename.replace('wav', 'mp3')
     # Return the path to the generated audio file
     return full_path_mp3
+'''
 # Setting up the Gradio Interface
 interface = gr.Interface(
 # Pre-load the model to avoid multiprocessing issues
 model, model_config = load_model()
+demo.launch(share=True)