OnyxMunk commited on
Commit
c30d48f
·
1 Parent(s): f9d8177

Fix build error: Remove gradio from requirements.txt to avoid version conflict

Browse files
Files changed (1) hide show
  1. requirements.txt +8 -171
requirements.txt CHANGED
@@ -1,171 +1,8 @@
1
- import gradio as gr
2
- import torch
3
- import numpy as np
4
- from diffusers import StableAudioPipeline
5
- import scipy.io.wavfile as wavfile
6
- import io
7
- import os
8
-
9
- # Global variable to cache the model
10
- model_cache = None
11
-
12
- def load_stable_audio_model():
13
- """
14
- Load the Stable Audio model with caching
15
- """
16
- global model_cache
17
- if model_cache is None:
18
- try:
19
- print("Loading Stable Audio model...")
20
- model_cache = StableAudioPipeline.from_pretrained(
21
- "stabilityai/stable-audio-open-1.0",
22
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
23
- )
24
- if torch.cuda.is_available():
25
- model_cache = model_cache.to("cuda")
26
- print("Model loaded successfully!")
27
- except Exception as e:
28
- print(f"Error loading model: {e}")
29
- # Fallback to placeholder if model loading fails
30
- model_cache = "placeholder"
31
- return model_cache
32
-
33
- def create_audio_generation_interface():
34
- """
35
- Create a Gradio interface for Stable Audio generation
36
- """
37
-
38
- def generate_audio(prompt, duration, seed):
39
- """
40
- Generate audio based on text prompt using Stable Audio model
41
- """
42
- try:
43
- model = load_stable_audio_model()
44
-
45
- if model == "placeholder":
46
- # Fallback to placeholder if model loading failed
47
- sample_rate = 44100
48
- duration_samples = int(duration * sample_rate)
49
- frequency = 440 + (seed % 200) # Vary frequency based on seed
50
-
51
- t = np.linspace(0, duration, duration_samples, endpoint=False)
52
- audio = 0.3 * np.sin(2 * np.pi * frequency * t)
53
- return (sample_rate, audio), "Using placeholder audio (model loading failed)"
54
-
55
- # Set seed for reproducibility
56
- if seed is not None:
57
- torch.manual_seed(seed)
58
- if torch.cuda.is_available():
59
- torch.cuda.manual_seed(seed)
60
-
61
- # Generate audio with Stable Audio
62
- print(f"Generating audio for prompt: '{prompt}', duration: {duration}s")
63
-
64
- # Create negative prompt for better quality
65
- negative_prompt = "low quality, distorted, noisy, artifacts"
66
-
67
- # Generate the audio
68
- audio_output = model(
69
- prompt=prompt,
70
- negative_prompt=negative_prompt,
71
- duration=duration,
72
- num_inference_steps=100,
73
- guidance_scale=7.5,
74
- num_waveforms_per_prompt=1,
75
- audio_length_in_s=duration,
76
- )
77
-
78
- # Extract the audio data
79
- audio = audio_output.audios[0] # Shape: [channels, samples]
80
-
81
- # Convert to mono if stereo
82
- if audio.ndim > 1:
83
- audio = audio.mean(axis=0)
84
-
85
- # Ensure proper sample rate (Stable Audio uses 44100 Hz)
86
- sample_rate = 44100
87
-
88
- return (sample_rate, audio), "Audio generated successfully!"
89
-
90
- except Exception as e:
91
- print(f"Error generating audio: {e}")
92
- # Fallback to simple tone
93
- sample_rate = 44100
94
- duration_samples = int(duration * sample_rate)
95
- frequency = 220 # A3 note
96
-
97
- t = np.linspace(0, duration, duration_samples, endpoint=False)
98
- audio = 0.3 * np.sin(2 * np.pi * frequency * t)
99
-
100
- return (sample_rate, audio), f"Error: {str(e)}. Using fallback audio."
101
-
102
- # Create the Gradio interface
103
- with gr.Blocks(title="Stable Audio Open", theme=gr.themes.Soft()) as interface:
104
- gr.Markdown("""
105
- # 🎵 Stable Audio Open
106
- Generate high-quality audio from text prompts using Stable Audio technology.
107
-
108
- **Note:** This is a demo interface. The actual Stable Audio model integration is coming soon.
109
- """)
110
-
111
- with gr.Row():
112
- with gr.Column():
113
- prompt_input = gr.Textbox(
114
- label="Text Prompt",
115
- placeholder="Describe the audio you want to generate...",
116
- lines=3,
117
- value="A gentle piano melody playing in a cozy room"
118
- )
119
-
120
- duration_input = gr.Slider(
121
- label="Duration (seconds)",
122
- minimum=1,
123
- maximum=30,
124
- value=10,
125
- step=1
126
- )
127
-
128
- seed_input = gr.Number(
129
- label="Random Seed (optional)",
130
- value=None,
131
- precision=0
132
- )
133
-
134
- generate_btn = gr.Button("🎵 Generate Audio", variant="primary")
135
-
136
- with gr.Column():
137
- audio_output = gr.Audio(label="Generated Audio")
138
- status_output = gr.Textbox(label="Status", interactive=False)
139
-
140
- # Connect the generate button to the function
141
- generate_btn.click(
142
- fn=generate_audio,
143
- inputs=[prompt_input, duration_input, seed_input],
144
- outputs=[audio_output, status_output]
145
- )
146
-
147
- # Add loading state
148
- generate_btn.click(
149
- fn=lambda: "🎵 Generating audio... Please wait.",
150
- inputs=[],
151
- outputs=[status_output],
152
- queue=False
153
- )
154
-
155
- # Add some example prompts
156
- gr.Examples(
157
- examples=[
158
- ["A calming ocean wave sound with seagulls", 15, 42],
159
- ["Upbeat electronic dance music", 20, 123],
160
- ["Classical violin concerto", 25, 999],
161
- ["Rain falling on a tin roof", 10, 777]
162
- ],
163
- inputs=[prompt_input, duration_input, seed_input]
164
- )
165
-
166
- return interface
167
-
168
- # Launch the interface
169
- if __name__ == "__main__":
170
- interface = create_audio_generation_interface()
171
- interface.launch()
 
1
+ torch>=2.0.0
2
+ transformers>=4.30.0
3
+ numpy>=1.21.0
4
+ scipy>=1.7.0
5
+ accelerate>=0.20.0
6
+ diffusers>=0.27.0
7
+ huggingface-hub>=0.20.0
8
+ safetensors>=0.4.0