Carter-123 commited on
Commit
96fc729
·
verified ·
1 Parent(s): 4d712b3

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +228 -19
index.html CHANGED
@@ -1,19 +1,228 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>ComfyUI Workflow</title>
7
+ <style>
8
+ body {
9
+ font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Text', sans-serif;
10
+ background-color: #000000;
11
+ color: #f5f5f7;
12
+ padding: 40px;
13
+ }
14
+ pre {
15
+ background: #1d1d1f;
16
+ padding: 24px;
17
+ border-radius: 12px;
18
+ overflow-x: auto;
19
+ }
20
+ </style>
21
+ </head>
22
+ <body>
23
+ <h1>ComfyUI Workflow</h1>
24
+ <p>Error: Invalid JSON format</p>
25
+ <pre>#!/usr/bin/env python3
26
+ """
27
+ Text-to-Music Gradio Demo using Riffusion
28
+ Generates music from text prompts via spectrogram diffusion.
29
+ """
30
+
31
+ import gradio as gr
32
+ import torch
33
+ from diffusers import StableDiffusionPipeline
34
+ import numpy as np
35
+ import io
36
+ import os
37
+
38
+ from riffusion.spectrogram_image_converter import SpectrogramImageConverter
39
+ from riffusion.audio_utils import audio_buffer_to_wav, normalize_audio
40
+
41
+ # Global model cache
42
+ _pipe = None
43
+ _converter = None
44
+
45
+ def get_pipeline():
46
+ """Lazy load the Riffusion pipeline."""
47
+ global _pipe
48
+ if _pipe is None:
49
+ device = "cuda" if torch.cuda.is_available() else "cpu"
50
+ print(f"Loading Riffusion model on {device}...")
51
+ _pipe = StableDiffusionPipeline.from_pretrained(
52
+ "riffusion/riffusion-model-v1",
53
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
54
+ )
55
+ _pipe = _pipe.to(device)
56
+ _pipe.enable_attention_slicing()
57
+ print("Model loaded!")
58
+ return _pipe
59
+
60
+ def get_converter():
61
+ """Lazy load the spectrogram converter."""
62
+ global _converter
63
+ if _converter is None:
64
+ _converter = SpectrogramImageConverter()
65
+ return _converter
66
+
67
+ def generate_music(prompt: str, duration: float, bpm: float, seed: int = None):
68
+ """
69
+ Generate music from text prompt using Riffusion.
70
+
71
+ Args:
72
+ prompt: Text description of desired music
73
+ duration: Duration in seconds (clamped to model limits)
74
+ bpm: Beats per minute (affects spectrogram parameters)
75
+ seed: Random seed for reproducibility
76
+
77
+ Returns:
78
+ Tuple of (audio_path, spectrogram_path) for Gradio
79
+ """
80
+ # Clamp duration to reasonable range (Riffusion works best ~5-10s)
81
+ duration = max(2.0, min(duration, 10.0))
82
+
83
+ # Adjust prompt with BPM hint if provided
84
+ full_prompt = f"{prompt}, {int(bpm)} bpm" if bpm > 0 else prompt
85
+
86
+ pipe = get_pipeline()
87
+ converter = get_converter()
88
+
89
+ # Set seed for reproducibility
90
+ if seed is None or seed < 0:
91
+ seed = np.random.randint(0, 2**32)
92
+ generator = torch.Generator(device=pipe.device).manual_seed(seed)
93
+
94
+ print(f"Generating: '{full_prompt}' ({duration}s @ {bpm} BPM, seed={seed})")
95
+
96
+ # Generate spectrogram image
97
+ # Riffusion generates 512x512 spectrograms ~5 seconds of audio
98
+ image = pipe(
99
+ full_prompt,
100
+ num_inference_steps=50,
101
+ guidance_scale=7.5,
102
+ generator=generator,
103
+ height=512,
104
+ width=512,
105
+ ).images[0]
106
+
107
+ # Convert spectrogram to audio
108
+ audio = converter.spectrogram_to_audio(image, duration=duration)
109
+ audio = normalize_audio(audio)
110
+
111
+ # Save outputs
112
+ os.makedirs("outputs", exist_ok=True)
113
+ base_name = f"output_{seed % 10000:04d}"
114
+ audio_path = f"outputs/{base_name}.wav"
115
+ spec_path = f"outputs/{base_name}_spectrogram.png"
116
+
117
+ # Save audio
118
+ wav_buffer = audio_buffer_to_wav(audio, sample_rate=converter.sample_rate)
119
+ with open(audio_path, "wb") as f:
120
+ f.write(wav_buffer.getvalue())
121
+
122
+ # Save spectrogram for visualization
123
+ image.save(spec_path)
124
+
125
+ print(f"Saved: {audio_path}")
126
+ return audio_path, spec_path
127
+
128
+ def create_interface():
129
+ """Create and configure the Gradio interface."""
130
+
131
+ with gr.Blocks(title="Text-to-Music with Riffusion") as demo:
132
+ gr.Markdown("""
133
+ # 🎵 Text-to-Music Generator
134
+
135
+ Generate music from text descriptions using **Riffusion** -
136
+ a Stable Diffusion model trained on spectrograms.
137
+
138
+ *Examples: "jazz piano solo", "upbeat electronic dance music",
139
+ "acoustic guitar folk melody", "dark ambient synth drone"*
140
+ """)
141
+
142
+ with gr.Row():
143
+ with gr.Column(scale=2):
144
+ prompt_input = gr.Textbox(
145
+ label="Music Description",
146
+ placeholder="Describe the music you want to hear...",
147
+ value="smooth jazz saxophone solo, relaxing, nighttime",
148
+ lines=2,
149
+ )
150
+
151
+ with gr.Row():
152
+ duration_slider = gr.Slider(
153
+ minimum=2.0,
154
+ maximum=10.0,
155
+ value=5.0,
156
+ step=0.5,
157
+ label="Duration (seconds)",
158
+ )
159
+ bpm_slider = gr.Slider(
160
+ minimum=60,
161
+ maximum=180,
162
+ value=120,
163
+ step=5,
164
+ label="Tempo (BPM)",
165
+ )
166
+
167
+ seed_input = gr.Number(
168
+ label="Seed (-1 for random)",
169
+ value=-1,
170
+ precision=0,
171
+ )
172
+
173
+ generate_btn = gr.Button("🎹 Generate Music", variant="primary")
174
+
175
+ with gr.Column(scale=1):
176
+ audio_output = gr.Audio(
177
+ label="Generated Music",
178
+ type="filepath",
179
+ )
180
+ spec_output = gr.Image(
181
+ label="Spectrogram Visualization",
182
+ type="filepath",
183
+ )
184
+
185
+ # Examples
186
+ gr.Examples(
187
+ examples=[
188
+ ["piano ballad, emotional, cinematic", 6.0, 70, -1],
189
+ ["funky bass guitar groove, 1970s style", 5.0, 110, -1],
190
+ ["ethereal ambient pads, space atmosphere", 8.0, 60, -1],
191
+ ["heavy metal guitar riff, aggressive", 4.0, 140, -1],
192
+ ["classical violin concerto, elegant", 7.0, 90, -1],
193
+ ],
194
+ inputs=[prompt_input, duration_slider, bpm_slider, seed_input],
195
+ outputs=[audio_output, spec_output],
196
+ fn=generate_music,
197
+ cache_examples=False,
198
+ )
199
+
200
+ gr.Markdown("""
201
+ ### How it works
202
+
203
+ 1. Your text prompt is used to generate a **spectrogram image** via Stable Diffusion
204
+ 2. The spectrogram is converted back to **audio waveforms** using the Short-Time Fourier Transform (STFT)
205
+ 3. The resulting audio is normalized and returned as a playable WAV file
206
+
207
+ *Note: First generation will download the model (~1.5GB).*
208
+ """)
209
+
210
+ # Event handlers
211
+ generate_btn.click(
212
+ fn=generate_music,
213
+ inputs=[prompt_input, duration_slider, bpm_slider, seed_input],
214
+ outputs=[audio_output, spec_output],
215
+ )
216
+
217
+ return demo
218
+
219
+ if __name__ == "__main__":
220
+ demo = create_interface()
221
+ demo.launch(
222
+ server_name="0.0.0.0",
223
+ server_port=7860,
224
+ share=False,
225
+ show_error=True,
226
+ )</pre>
227
+ </body>
228
+ </html>