Carter-123 commited on
Commit
8ac663f
Β·
verified Β·
1 Parent(s): 098bb99

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +356 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ import numpy as np
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ import os
7
+ import tempfile
8
+ from typing import Tuple, Optional
9
+
10
+ # Global variables for model caching
11
+ _model = None
12
+ _tokenizer = None
13
+
14
+ def load_model():
15
+ """Lazy load the SongGen model"""
16
+ global _model, _tokenizer
17
+
18
+ if _model is None or _tokenizer is None:
19
+ print("Loading SongGen model...")
20
+
21
+ # Using a smaller music generation model optimized for CPU
22
+ # SongGen or similar music generation model
23
+ model_name = "facebook/musicgen-small"
24
+
25
+ try:
26
+ from transformers import MusicgenForConditionalGeneration, AutoProcessor
27
+
28
+ _model = MusicgenForConditionalGeneration.from_pretrained(
29
+ model_name,
30
+ torch_dtype=torch.float32, # Use float32 for CPU compatibility
31
+ low_cpu_mem_usage=True
32
+ )
33
+ _tokenizer = AutoProcessor.from_pretrained(model_name)
34
+
35
+ # Move to CPU
36
+ _model = _model.to("cpu")
37
+ _model.eval()
38
+
39
+ print("Model loaded successfully!")
40
+ except Exception as e:
41
+ print(f"Error loading model: {e}")
42
+ # Fallback to a simpler approach if model fails to load
43
+ raise gr.Error(f"Failed to load model: {str(e)}")
44
+
45
+ return _model, _tokenizer
46
+
47
+ def generate_music(
48
+ prompt: str,
49
+ duration: float,
50
+ guidance_scale: float,
51
+ num_inference_steps: int,
52
+ temperature: float,
53
+ top_k: int,
54
+ top_p: float,
55
+ progress: gr.Progress = gr.Progress()
56
+ ) -> Tuple[Optional[str], str]:
57
+ """
58
+ Generate music based on text prompt using SongGen/MusicGen model.
59
+
60
+ Args:
61
+ prompt: Text description of the music to generate
62
+ duration: Duration of generated audio in seconds
63
+ guidance_scale: Controls adherence to prompt vs diversity
64
+ num_inference_steps: Number of denoising steps
65
+ temperature: Controls randomness in generation
66
+ top_k: Top-k sampling parameter
67
+ top_p: Nucleus sampling parameter
68
+
69
+ Returns:
70
+ Tuple of (audio_file_path, status_message)
71
+ """
72
+
73
+ if not prompt or not prompt.strip():
74
+ return None, "❌ Please enter a prompt describing the music you want to generate."
75
+
76
+ try:
77
+ progress(0.1, desc="Loading model...")
78
+ model, processor = load_model()
79
+
80
+ progress(0.2, desc="Preparing inputs...")
81
+
82
+ # Process the prompt
83
+ inputs = processor(
84
+ text=[prompt],
85
+ return_tensors="pt",
86
+ padding=True
87
+ )
88
+
89
+ # Calculate max length based on duration (50 tokens per second approximately)
90
+ max_length = min(int(duration * 50), 1500) # Cap at reasonable length
91
+
92
+ progress(0.3, desc="Generating music...")
93
+
94
+ # Generate with progress tracking
95
+ def progress_callback(step, timestep, latents):
96
+ progress_val = 0.3 + (0.6 * step / num_inference_steps)
97
+ progress(progress_val, desc=f"Generating... step {step}/{num_inference_steps}")
98
+
99
+ # Generate audio
100
+ with torch.no_grad():
101
+ audio_values = model.generate(
102
+ **inputs,
103
+ max_new_tokens=max_length,
104
+ guidance_scale=guidance_scale,
105
+ num_inference_steps=num_inference_steps,
106
+ temperature=temperature,
107
+ top_k=top_k,
108
+ top_p=top_p,
109
+ do_sample=True,
110
+ )
111
+
112
+ progress(0.9, desc="Processing audio...")
113
+
114
+ # Convert to numpy and save
115
+ audio_np = audio_values[0, 0].cpu().numpy()
116
+
117
+ # Normalize audio to [-1, 1] range
118
+ audio_np = audio_np / np.max(np.abs(audio_np))
119
+
120
+ # Get sample rate from model config
121
+ sample_rate = model.config.audio_encoder.sampling_rate
122
+
123
+ # Save to temporary file
124
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
125
+ output_path = tmp_file.name
126
+
127
+ # Save as wav file
128
+ torchaudio.save(
129
+ output_path,
130
+ torch.tensor(audio_np).unsqueeze(0),
131
+ sample_rate=sample_rate,
132
+ format="wav"
133
+ )
134
+
135
+ progress(1.0, desc="Complete!")
136
+
137
+ # Create success message with metadata
138
+ info_msg = f"""βœ… Music generated successfully!
139
+
140
+ 🎡 **Prompt:** {prompt}
141
+ ⏱️ **Duration:** {len(audio_np) / sample_rate:.2f}s
142
+ 🎚️ **Sample Rate:** {sample_rate}Hz
143
+ πŸ”§ **Settings:** guidance={guidance_scale}, steps={num_inference_steps}, temp={temperature}"""
144
+
145
+ return output_path, info_msg
146
+
147
+ except Exception as e:
148
+ return None, f"❌ Error generating music: {str(e)}"
149
+
150
+ def create_examples():
151
+ """Create example prompts for the UI"""
152
+ return [
153
+ ["Upbeat electronic dance music with a strong bass line and energetic synths", 8.0, 3.0, 50, 1.0, 250, 0.99],
154
+ ["Calm ambient piano music with soft strings, peaceful and relaxing", 10.0, 3.5, 50, 0.8, 250, 0.95],
155
+ ["Epic orchestral soundtrack with brass and percussion, cinematic and dramatic", 10.0, 4.0, 50, 1.0, 250, 0.99],
156
+ ["Lo-fi hip hop beats with jazzy chords, chill and study music", 8.0, 2.5, 50, 0.9, 250, 0.95],
157
+ ["Acoustic guitar folk melody, warm and nostalgic", 6.0, 3.0, 50, 0.85, 250, 0.95],
158
+ ["Cyberpunk synthwave with retro 80s vibes, driving and energetic", 8.0, 3.0, 50, 1.0, 250, 0.99],
159
+ ]
160
+
161
+ # Custom theme for modern UI
162
+ custom_theme = gr.themes.Soft(
163
+ primary_hue="indigo",
164
+ secondary_hue="violet",
165
+ neutral_hue="slate",
166
+ font=gr.themes.GoogleFont("Inter"),
167
+ text_size="lg",
168
+ spacing_size="lg",
169
+ radius_size="lg"
170
+ ).set(
171
+ button_primary_background_fill="*primary_600",
172
+ button_primary_background_fill_hover="*primary_700",
173
+ block_title_text_weight="600",
174
+ block_title_text_size="*text_lg",
175
+ block_background_fill="*neutral_50",
176
+ block_border_width="1px",
177
+ block_border_color="*neutral_200",
178
+ )
179
+
180
+ with gr.Blocks() as demo:
181
+ # Header with branding
182
+ with gr.Row():
183
+ with gr.Column():
184
+ gr.Markdown("""
185
+ # 🎡 SongGen Music Generator
186
+
187
+ Generate custom music from text descriptions using AI.
188
+ Powered by MusicGen - Meta's state-of-the-art music generation model.
189
+
190
+ [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
191
+ """)
192
+
193
+ with gr.Row():
194
+ # Left panel - Controls
195
+ with gr.Column(scale=1):
196
+ gr.Markdown("### πŸŽ›οΈ Generation Settings")
197
+
198
+ prompt_input = gr.Textbox(
199
+ label="Music Description",
200
+ placeholder="Describe the music you want to generate...",
201
+ lines=3,
202
+ info="Be specific about genre, instruments, mood, and tempo",
203
+ value="Upbeat electronic dance music with energetic synths and strong bass"
204
+ )
205
+
206
+ with gr.Accordion("Advanced Settings", open=False):
207
+ duration_slider = gr.Slider(
208
+ minimum=3,
209
+ maximum=15,
210
+ value=8,
211
+ step=0.5,
212
+ label="Duration (seconds)",
213
+ info="Longer durations take more time to generate"
214
+ )
215
+
216
+ guidance_slider = gr.Slider(
217
+ minimum=1.0,
218
+ maximum=5.0,
219
+ value=3.0,
220
+ step=0.5,
221
+ label="Guidance Scale",
222
+ info="Higher = more prompt adherence, less diversity"
223
+ )
224
+
225
+ steps_slider = gr.Slider(
226
+ minimum=10,
227
+ maximum=100,
228
+ value=50,
229
+ step=5,
230
+ label="Inference Steps",
231
+ info="More steps = higher quality, slower generation"
232
+ )
233
+
234
+ temperature_slider = gr.Slider(
235
+ minimum=0.5,
236
+ maximum=1.5,
237
+ value=1.0,
238
+ step=0.05,
239
+ label="Temperature",
240
+ info="Higher = more random/creative"
241
+ )
242
+
243
+ topk_slider = gr.Slider(
244
+ minimum=50,
245
+ maximum=500,
246
+ value=250,
247
+ step=50,
248
+ label="Top-K",
249
+ info="Limits vocabulary for sampling"
250
+ )
251
+
252
+ topp_slider = gr.Slider(
253
+ minimum=0.8,
254
+ maximum=1.0,
255
+ value=0.99,
256
+ step=0.01,
257
+ label="Top-P (Nucleus)",
258
+ info="Cumulative probability threshold"
259
+ )
260
+
261
+ generate_btn = gr.Button(
262
+ "🎡 Generate Music",
263
+ variant="primary",
264
+ size="lg"
265
+ )
266
+
267
+ # Status and info
268
+ status_output = gr.Textbox(
269
+ label="Status",
270
+ lines=6,
271
+ interactive=False
272
+ )
273
+
274
+ # Right panel - Output
275
+ with gr.Column(scale=1):
276
+ gr.Markdown("### 🎧 Generated Music")
277
+
278
+ audio_output = gr.Audio(
279
+ label="Generated Audio",
280
+ type="filepath",
281
+ autoplay=False,
282
+ show_download_button=True,
283
+ waveform_options=gr.WaveformOptions(
284
+ waveform_color="#4f46e5",
285
+ waveform_progress_color="#7c3aed",
286
+ show_recording_waveform=False
287
+ )
288
+ )
289
+
290
+ # Tips section
291
+ with gr.Accordion("πŸ’‘ Tips for Better Results", open=True):
292
+ gr.Markdown("""
293
+ **Prompt Engineering Tips:**
294
+
295
+ 1. **Be specific about genre:** "electronic", "classical", "jazz", "rock"
296
+
297
+ 2. **Mention instruments:** "piano", "synthesizers", "drums", "strings"
298
+
299
+ 3. **Describe the mood:** "upbeat", "melancholic", "energetic", "calm"
300
+
301
+ 4. **Add tempo hints:** "fast tempo", "slow ballad", "medium groove"
302
+
303
+ 5. **Use reference styles:** "like 80s synthwave", "cinematic soundtrack"
304
+
305
+ **Example prompts:**
306
+ - "Upbeat pop with catchy synth melody and electronic drums"
307
+ - "Sad piano ballad with emotional strings, slow tempo"
308
+ - "Heavy metal with distorted guitars and aggressive drums"
309
+ """)
310
+
311
+ # Examples section
312
+ gr.Markdown("### 🎯 Quick Examples")
313
+
314
+ examples = gr.Examples(
315
+ examples=create_examples(),
316
+ inputs=[prompt_input, duration_slider, guidance_slider, steps_slider,
317
+ temperature_slider, topk_slider, topp_slider],
318
+ label="Click to load example",
319
+ examples_per_page=3
320
+ )
321
+
322
+ # Footer
323
+ gr.Markdown("""
324
+ ---
325
+ Made with ❀️ using Gradio and MusicGen |
326
+ [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
327
+ """)
328
+
329
+ # Event handlers
330
+ generate_btn.click(
331
+ fn=generate_music,
332
+ inputs=[
333
+ prompt_input,
334
+ duration_slider,
335
+ guidance_slider,
336
+ steps_slider,
337
+ temperature_slider,
338
+ topk_slider,
339
+ topp_slider
340
+ ],
341
+ outputs=[audio_output, status_output],
342
+ api_visibility="public"
343
+ )
344
+
345
+ # Launch with Gradio 6 syntax - all parameters in launch()
346
+ demo.launch(
347
+ theme=custom_theme,
348
+ title="SongGen Music Generator",
349
+ footer_links=[
350
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
351
+ {"label": "Gradio", "url": "https://gradio.app"},
352
+ {"label": "Settings", "url": "#"}
353
+ ],
354
+ show_error=True,
355
+ quiet=False
356
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=6.0.2
2
+ numpy
3
+ torch
4
+ torchaudio
5
+ transformers