Spaces:

OnyxMunk
/

Stable-Audio-Open

Paused

App Files Files Community

OnyxMunk commited on Dec 20, 2025

Commit

c30d48f

1 Parent(s): f9d8177

Fix build error: Remove gradio from requirements.txt to avoid version conflict

Browse files

Files changed (1) hide show

requirements.txt +8 -171

requirements.txt CHANGED Viewed

@@ -1,171 +1,8 @@
-import gradio as gr
-import torch
-import numpy as np
-from diffusers import StableAudioPipeline
-import scipy.io.wavfile as wavfile
-import io
-import os
-# Global variable to cache the model
-model_cache = None
-def load_stable_audio_model():
-    """
-    Load the Stable Audio model with caching
-    """
-    global model_cache
-    if model_cache is None:
-        try:
-            print("Loading Stable Audio model...")
-            model_cache = StableAudioPipeline.from_pretrained(
-                "stabilityai/stable-audio-open-1.0",
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-            )
-            if torch.cuda.is_available():
-                model_cache = model_cache.to("cuda")
-            print("Model loaded successfully!")
-        except Exception as e:
-            print(f"Error loading model: {e}")
-            # Fallback to placeholder if model loading fails
-            model_cache = "placeholder"
-    return model_cache
-def create_audio_generation_interface():
-    """
-    Create a Gradio interface for Stable Audio generation
-    """
-    def generate_audio(prompt, duration, seed):
-        """
-        Generate audio based on text prompt using Stable Audio model
-        """
-        try:
-            model = load_stable_audio_model()
-            if model == "placeholder":
-                # Fallback to placeholder if model loading failed
-                sample_rate = 44100
-                duration_samples = int(duration * sample_rate)
-                frequency = 440 + (seed % 200)  # Vary frequency based on seed
-                t = np.linspace(0, duration, duration_samples, endpoint=False)
-                audio = 0.3 * np.sin(2 * np.pi * frequency * t)
-                return (sample_rate, audio), "Using placeholder audio (model loading failed)"
-            # Set seed for reproducibility
-            if seed is not None:
-                torch.manual_seed(seed)
-                if torch.cuda.is_available():
-                    torch.cuda.manual_seed(seed)
-            # Generate audio with Stable Audio
-            print(f"Generating audio for prompt: '{prompt}', duration: {duration}s")
-            # Create negative prompt for better quality
-            negative_prompt = "low quality, distorted, noisy, artifacts"
-            # Generate the audio
-            audio_output = model(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                duration=duration,
-                num_inference_steps=100,
-                guidance_scale=7.5,
-                num_waveforms_per_prompt=1,
-                audio_length_in_s=duration,
-            )
-            # Extract the audio data
-            audio = audio_output.audios[0]  # Shape: [channels, samples]
-            # Convert to mono if stereo
-            if audio.ndim > 1:
-                audio = audio.mean(axis=0)
-            # Ensure proper sample rate (Stable Audio uses 44100 Hz)
-            sample_rate = 44100
-            return (sample_rate, audio), "Audio generated successfully!"
-        except Exception as e:
-            print(f"Error generating audio: {e}")
-            # Fallback to simple tone
-            sample_rate = 44100
-            duration_samples = int(duration * sample_rate)
-            frequency = 220  # A3 note
-            t = np.linspace(0, duration, duration_samples, endpoint=False)
-            audio = 0.3 * np.sin(2 * np.pi * frequency * t)
-            return (sample_rate, audio), f"Error: {str(e)}. Using fallback audio."
-    # Create the Gradio interface
-    with gr.Blocks(title="Stable Audio Open", theme=gr.themes.Soft()) as interface:
-        gr.Markdown("""
-        # 🎵 Stable Audio Open
-        Generate high-quality audio from text prompts using Stable Audio technology.
-        **Note:** This is a demo interface. The actual Stable Audio model integration is coming soon.
-        """)
-        with gr.Row():
-            with gr.Column():
-                prompt_input = gr.Textbox(
-                    label="Text Prompt",
-                    placeholder="Describe the audio you want to generate...",
-                    lines=3,
-                    value="A gentle piano melody playing in a cozy room"
-                )
-                duration_input = gr.Slider(
-                    label="Duration (seconds)",
-                    minimum=1,
-                    maximum=30,
-                    value=10,
-                    step=1
-                )
-                seed_input = gr.Number(
-                    label="Random Seed (optional)",
-                    value=None,
-                    precision=0
-                )
-                generate_btn = gr.Button("🎵 Generate Audio", variant="primary")
-            with gr.Column():
-                audio_output = gr.Audio(label="Generated Audio")
-                status_output = gr.Textbox(label="Status", interactive=False)
-            # Connect the generate button to the function
-        generate_btn.click(
-            fn=generate_audio,
-            inputs=[prompt_input, duration_input, seed_input],
-            outputs=[audio_output, status_output]
-        )
-        # Add loading state
-        generate_btn.click(
-            fn=lambda: "🎵 Generating audio... Please wait.",
-            inputs=[],
-            outputs=[status_output],
-            queue=False
-        )
-        # Add some example prompts
-        gr.Examples(
-            examples=[
-                ["A calming ocean wave sound with seagulls", 15, 42],
-                ["Upbeat electronic dance music", 20, 123],
-                ["Classical violin concerto", 25, 999],
-                ["Rain falling on a tin roof", 10, 777]
-            ],
-            inputs=[prompt_input, duration_input, seed_input]
-        )
-    return interface
-# Launch the interface
-if __name__ == "__main__":
-    interface = create_audio_generation_interface()
-    interface.launch()

+torch>=2.0.0
+transformers>=4.30.0
+numpy>=1.21.0
+scipy>=1.7.0
+accelerate>=0.20.0
+diffusers>=0.27.0
+huggingface-hub>=0.20.0
+safetensors>=0.4.0