mrfakename commited on
Commit
4af893c
·
verified ·
1 Parent(s): 0929406

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +246 -314
  2. requirements.txt +2 -14
app.py CHANGED
@@ -1,363 +1,295 @@
1
- #!/usr/bin/env python3
2
  """
3
- HuggingFace Space app for Muse-8b music generation
4
- Text input -> Audio output
5
  """
6
 
7
- import spaces
8
- import gradio as gr
9
  import os
10
- import sys
11
  import tempfile
12
- from typing import Optional, Tuple
13
  import torch
14
- import numpy as np
15
- import torchaudio
16
- from huggingface_hub import snapshot_download
17
-
18
- # Add MuCodec to path
19
- sys.path.insert(0, "./MuCodec")
20
-
21
- from transformers import AutoModelForCausalLM, AutoTokenizer
22
- from MuCodec.model import PromptCondAudioDiffusion
23
- from MuCodec.tools.get_melvaehifigan48k import build_pretrained_models
24
- import MuCodec.tools.torch_tools as torch_tools
25
-
26
- # Constants
27
- MODEL_NAME = "bolshyC/Muse-8b"
28
- SAMPLE_RATE = 48000
29
-
30
- # ============================================================================
31
- # Model Loading at Module Level
32
- # ============================================================================
33
-
34
- print("Loading Muse language model...")
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
-
37
- # Load language model
38
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
39
- language_model = AutoModelForCausalLM.from_pretrained(
40
- MODEL_NAME,
41
- trust_remote_code=True,
42
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
43
- device_map="auto" if device == "cuda" else None,
44
- )
45
- if device == "cpu":
46
- language_model = language_model.to(device)
47
- language_model.eval()
48
- print("Language model loaded!")
49
-
50
- # Load MuCodec decoder
51
- print("Loading MuCodec decoder...")
52
- mucodec_dir = "./MuCodec"
53
- ckpt_path = os.path.join(mucodec_dir, "ckpt/mucodec.pt")
54
- #audioldm_path = os.path.join(mucodec_dir, "tools/audioldm_48k.pth")
55
- audioldm_path = os.path.join(snapshot_download("haoheliu/audioldm_48k", local_dir="./alm"), "audioldm_48k.pth")
56
- config_path = os.path.join(mucodec_dir, "configs/models/transformer2D.json")
57
-
58
- # Load VAE and STFT
59
- vae, stft = build_pretrained_models(audioldm_path)
60
- vae = vae.eval().to(device)
61
- stft = stft.eval().to(device)
62
-
63
- # Load diffusion model
64
- main_config = {
65
- "num_channels": 32,
66
- "unet_model_name": None,
67
- "unet_model_config_path": config_path,
68
- "snr_gamma": None,
69
- }
70
- mucodec_model = PromptCondAudioDiffusion(**main_config)
71
- main_weights = torch.load(ckpt_path, map_location='cpu')
72
- mucodec_model.load_state_dict(main_weights, strict=False)
73
- mucodec_model = mucodec_model.to(device).eval()
74
- mucodec_model.init_device_dtype(torch.device(device), torch.float32)
75
- print("MuCodec decoder loaded!")
76
-
77
- # ============================================================================
78
- # Helper Functions
79
- # ============================================================================
80
-
81
- def parse_tokens_from_text(text: str) -> Optional[torch.Tensor]:
82
- """Extract audio tokens from generated text"""
83
- try:
84
- if "<|audio_0|>" in text and "<|audio_1|>" in text:
85
- start = text.find("<|audio_0|>") + len("<|audio_0|>")
86
- end = text.find("<|audio_1|>")
87
- token_str = text[start:end].strip()
88
- else:
89
- token_str = text.strip()
90
-
91
- tokens = [int(t) for t in token_str.split() if t.isdigit()]
92
-
93
- if len(tokens) == 0:
94
- return None
95
-
96
- return torch.tensor(tokens, dtype=torch.long).unsqueeze(0).unsqueeze(0)
97
-
98
- except Exception as e:
99
- print(f"Error parsing tokens: {e}")
100
- return None
101
-
102
-
103
- def codes_to_audio(
104
- codes: torch.Tensor,
105
- num_steps: int = 20
106
- ) -> torch.Tensor:
107
- """Convert audio codes to waveform using MuCodec"""
108
-
109
- codes = codes.to(device)
110
-
111
- # Initialize latent
112
- first_latent = torch.randn(codes.shape[0], 32, 512, 32).to(device)
113
- first_latent_length = 0
114
- first_latent_codes_length = 0
115
-
116
- # Sliding window parameters
117
- min_samples = 1024
118
- hop_samples = min_samples // 4 * 3
119
- ovlp_samples = min_samples - hop_samples
120
-
121
- codes_len = codes.shape[-1]
122
- target_len = int(codes_len / 100 * 4 * SAMPLE_RATE)
123
-
124
- # Pad codes if too short
125
- if codes_len < min_samples:
126
- while codes.shape[-1] < min_samples:
127
- codes = torch.cat([codes, codes], -1)
128
- codes = codes[:, :, :min_samples]
129
- codes_len = codes.shape[-1]
130
-
131
- # Adjust codes length for sliding window
132
- if (codes_len - ovlp_samples) % hop_samples > 0:
133
- len_codes = int(np.ceil((codes_len - ovlp_samples) / hop_samples) * hop_samples + ovlp_samples)
134
- while codes.shape[-1] < len_codes:
135
- codes = torch.cat([codes, codes], -1)
136
- codes = codes[:, :, :len_codes]
137
-
138
- # Generate latents with sliding window
139
- latent_length = 512
140
- latent_list = []
141
- spk_embeds = torch.zeros([1, 32, 1, 32], device=codes.device)
142
-
143
- with torch.autocast(device_type="cuda" if torch.cuda.is_available() else "cpu", dtype=torch.float16):
144
- for sinx in range(0, codes.shape[-1] - hop_samples, hop_samples):
145
- codes_input = [codes[:, :, sinx:sinx + min_samples]]
146
-
147
- if sinx == 0:
148
- latents = mucodec_model.inference_codes(
149
- codes_input, spk_embeds, first_latent,
150
- latent_length, first_latent_length,
151
- additional_feats=[], guidance_scale=1.5,
152
- num_steps=num_steps, disable_progress=True,
153
- scenario='other_seg'
154
- )
155
- else:
156
- true_latent = latent_list[-1][:, :, -ovlp_samples // 2:, :]
157
- len_add = 512 - true_latent.shape[-2]
158
- incontext_length = true_latent.shape[-2]
159
- true_latent = torch.cat([
160
- true_latent,
161
- torch.randn(true_latent.shape[0], true_latent.shape[1],
162
- len_add, true_latent.shape[-1]).to(device)
163
- ], -2)
164
-
165
- latents = mucodec_model.inference_codes(
166
- codes_input, spk_embeds, true_latent,
167
- latent_length, incontext_length,
168
- additional_feats=[], guidance_scale=1.5,
169
- num_steps=num_steps, disable_progress=True,
170
- scenario='other_seg'
171
- )
172
-
173
- latent_list.append(latents)
174
-
175
- # Decode latents to audio
176
- latent_list = [l.float() for l in latent_list]
177
- duration = 40.96
178
- min_samples_audio = int(duration * SAMPLE_RATE)
179
- hop_samples_audio = min_samples_audio // 4 * 3
180
- ovlp_samples_audio = min_samples_audio - hop_samples_audio
181
-
182
- output = None
183
- for i, latent in enumerate(latent_list):
184
- bsz, ch, t, f = latent.shape
185
- latent = latent.reshape(bsz * 2, ch // 2, t, f)
186
- mel = vae.decode_first_stage(latent)
187
- cur_output = vae.decode_to_waveform(mel)
188
- cur_output = torch.from_numpy(cur_output)[:, :min_samples_audio]
189
-
190
- if output is None:
191
- output = cur_output
192
- else:
193
- # Overlap-add smoothing
194
- ov_win = torch.from_numpy(np.linspace(0, 1, ovlp_samples_audio)[None, :])
195
- ov_win = torch.cat([ov_win, 1 - ov_win], -1)
196
- output[:, -ovlp_samples_audio:] = (
197
- output[:, -ovlp_samples_audio:] * ov_win[:, -ovlp_samples_audio:] +
198
- cur_output[:, :ovlp_samples_audio] * ov_win[:, :ovlp_samples_audio]
199
- )
200
- output = torch.cat([output, cur_output[:, ovlp_samples_audio:]], -1)
201
-
202
- # Trim to target length
203
- output = output[:, :target_len]
204
- return output
205
-
206
-
207
- # ============================================================================
208
- # Main Generation Function with @spaces.GPU
209
- # ============================================================================
210
-
211
- @spaces.GPU
212
- def generate_music(
213
- prompt: str,
214
- max_tokens: int = 3000,
215
- temperature: float = 0.0,
216
- top_p: float = 0.9,
217
- repetition_penalty: float = 1.1,
218
- num_diffusion_steps: int = 20,
219
- ) -> Tuple[Optional[str], str]:
220
- """Generate music from text prompt"""
221
-
222
- if not prompt.strip():
223
- return None, "Please enter a prompt"
224
-
225
- try:
226
- # Generate tokens
227
- messages = [{"role": "user", "content": prompt}]
228
- prompt_text = tokenizer.apply_chat_template(
229
- messages, tokenize=False, add_generation_prompt=True
230
  )
231
 
232
- inputs = tokenizer(prompt_text, return_tensors="pt")
233
- inputs = {k: v.to(device) for k, v in inputs.items()}
 
 
 
 
234
 
235
- generation_config = {
236
- "max_new_tokens": max_tokens,
237
- "temperature": temperature if temperature > 0 else 1.0,
238
- "top_p": top_p,
239
- "repetition_penalty": repetition_penalty,
240
- "do_sample": temperature > 0,
241
- "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id,
242
- "eos_token_id": tokenizer.eos_token_id,
243
- }
244
 
245
- with torch.no_grad():
246
- outputs = language_model.generate(**inputs, **generation_config)
247
 
248
- input_length = inputs["input_ids"].shape[1]
249
- generated_tokens = outputs[0][input_length:]
250
- response = tokenizer.decode(generated_tokens, skip_special_tokens=False)
251
 
252
- # Parse tokens
253
- audio_codes = parse_tokens_from_text(response)
254
- if audio_codes is None:
255
- return None, "❌ Could not parse audio tokens from model output"
256
 
257
- print(f"Parsed {audio_codes.shape[-1]} audio tokens")
258
 
259
- # Decode to audio
260
- waveform = codes_to_audio(audio_codes, num_steps=num_diffusion_steps)
 
 
 
261
 
262
- # Save audio file
263
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
264
- output_path = f.name
265
 
266
- torchaudio.save(output_path, waveform.cpu(), SAMPLE_RATE)
267
 
268
- duration = waveform.shape[-1] / SAMPLE_RATE
269
- return output_path, f"✓ Generated {duration:.1f}s audio ({audio_codes.shape[-1]} tokens)"
 
 
 
 
 
270
 
271
- except Exception as e:
272
- import traceback
273
- error_msg = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
274
- print(error_msg)
275
- return None, error_msg
 
 
 
 
276
 
277
 
278
- # ============================================================================
279
- # Gradio Interface
280
- # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- with gr.Blocks(title="Muse-8b Music Generator") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  gr.Markdown(
284
  """
285
- # 🎵 Muse-8b Music Generator
 
 
 
 
 
 
 
 
 
286
 
287
- Generate music directly from text prompts using Muse-8b + MuCodec.
288
  """
289
  )
290
 
291
  with gr.Row():
292
- with gr.Column(scale=2):
293
- prompt_input = gr.Textbox(
294
- label="Music Prompt",
295
- placeholder="Describe the music you want to generate...\n\nExample: Please generate a song in style: Pop, Ballad, C-pop. Create an emotional love song with piano accompaniment.",
296
- lines=5
 
297
  )
298
 
299
- generate_btn = gr.Button("🎵 Generate Music", variant="primary", size="lg")
 
 
 
 
 
300
 
301
- status_output = gr.Textbox(label="Status", lines=2)
302
- audio_output = gr.Audio(label="Generated Music", type="filepath")
 
 
 
 
 
 
 
303
 
304
- with gr.Column(scale=1):
305
- gr.Markdown("### Generation Settings")
 
 
 
 
 
 
306
 
307
- max_tokens_slider = gr.Slider(
308
- minimum=500, maximum=5000, value=3000, step=100,
309
- label="Max Tokens"
310
- )
311
- temperature_slider = gr.Slider(
312
- minimum=0.0, maximum=1.0, value=0.0, step=0.1,
313
- label="Temperature (0 = deterministic)"
314
- )
315
- top_p_slider = gr.Slider(
316
- minimum=0.0, maximum=1.0, value=0.9, step=0.05,
317
- label="Top P"
318
- )
319
- rep_penalty_slider = gr.Slider(
320
- minimum=1.0, maximum=2.0, value=1.1, step=0.05,
321
- label="Repetition Penalty"
322
- )
323
- diffusion_steps_slider = gr.Slider(
324
- minimum=10, maximum=50, value=20, step=5,
325
- label="Diffusion Steps (quality vs speed)"
 
 
 
 
 
326
  )
327
 
328
- gr.Examples(
329
- examples=[
330
- ["Please generate a song in style: Pop, Ballad, C-pop. Create an emotional love song with piano accompaniment."],
331
- ["Generate an upbeat electronic dance music track with strong bass and synth leads."],
332
- ["Create a classical orchestral piece with strings and woodwinds, peaceful and serene."],
333
- ["Make a jazz fusion track with saxophone and electric guitar solos."],
334
- ],
335
- inputs=prompt_input
336
- )
 
 
 
 
 
 
337
 
338
  generate_btn.click(
339
  fn=generate_music,
340
  inputs=[
341
- prompt_input,
342
- max_tokens_slider,
343
- temperature_slider,
344
- top_p_slider,
345
- rep_penalty_slider,
346
- diffusion_steps_slider
347
  ],
348
- outputs=[audio_output, status_output]
349
  )
350
 
351
  gr.Markdown(
352
  """
353
  ---
354
- ### About
355
-
356
- **Model**: [bolshyC/Muse-8b](https://huggingface.co/bolshyC/Muse-8b)
357
- **Decoder**: MuCodec (Ultra Low-Bitrate Music Codec)
358
 
359
- First generation may take ~1-2 minutes. Subsequent generations are faster.
360
  """
361
  )
362
 
363
- demo.queue().launch()
 
 
 
 
 
 
 
 
 
1
  """
2
+ HeartMuLa Gradio App - Music Generation with Lyrics and Tags
3
+ A self-contained Gradio app for Hugging Face Spaces
4
  """
5
 
 
 
6
  import os
 
7
  import tempfile
 
8
  import torch
9
+ import gradio as gr
10
+ from huggingface_hub import hf_hub_download, snapshot_download
11
+
12
+ # Download models from HuggingFace Hub on startup
13
+ def download_models():
14
+ """Download all required model files from HuggingFace Hub."""
15
+ cache_dir = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface"))
16
+ model_dir = os.path.join(cache_dir, "heartmula_models")
17
+
18
+ if not os.path.exists(model_dir):
19
+ os.makedirs(model_dir, exist_ok=True)
20
+
21
+ # Download HeartMuLaGen (tokenizer and gen_config)
22
+ print("Downloading HeartMuLaGen files...")
23
+ for filename in ["tokenizer.json", "gen_config.json"]:
24
+ hf_hub_download(
25
+ repo_id="HeartMuLa/HeartMuLaGen",
26
+ filename=filename,
27
+ local_dir=model_dir,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
+ # Download HeartMuLa-oss-3B
31
+ print("Downloading HeartMuLa-oss-3B...")
32
+ snapshot_download(
33
+ repo_id="HeartMuLa/HeartMuLa-oss-3B",
34
+ local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"),
35
+ )
36
 
37
+ # Download HeartCodec-oss
38
+ print("Downloading HeartCodec-oss...")
39
+ snapshot_download(
40
+ repo_id="HeartMuLa/HeartCodec-oss",
41
+ local_dir=os.path.join(model_dir, "HeartCodec-oss"),
42
+ )
 
 
 
43
 
44
+ print("All models downloaded successfully!")
45
+ return model_dir
46
 
 
 
 
47
 
48
+ # Global pipeline instance
49
+ pipeline = None
 
 
50
 
 
51
 
52
+ def load_pipeline():
53
+ """Load the HeartMuLa pipeline."""
54
+ global pipeline
55
+ if pipeline is not None:
56
+ return pipeline
57
 
58
+ from heartlib import HeartMuLaGenPipeline
 
 
59
 
60
+ model_dir = download_models()
61
 
62
+ # Determine device and dtype
63
+ if torch.cuda.is_available():
64
+ device = torch.device("cuda")
65
+ dtype = torch.bfloat16
66
+ else:
67
+ device = torch.device("cpu")
68
+ dtype = torch.float32
69
 
70
+ print(f"Loading pipeline on {device} with {dtype}...")
71
+ pipeline = HeartMuLaGenPipeline.from_pretrained(
72
+ model_dir,
73
+ device=device,
74
+ dtype=dtype,
75
+ version="3B",
76
+ )
77
+ print("Pipeline loaded successfully!")
78
+ return pipeline
79
 
80
 
81
+ def generate_music(
82
+ lyrics: str,
83
+ tags: str,
84
+ max_duration_seconds: int,
85
+ temperature: float,
86
+ topk: int,
87
+ cfg_scale: float,
88
+ progress=gr.Progress(track_tqdm=True),
89
+ ):
90
+ """Generate music from lyrics and tags."""
91
+ if not lyrics.strip():
92
+ raise gr.Error("Please enter some lyrics!")
93
+
94
+ if not tags.strip():
95
+ raise gr.Error("Please enter at least one tag!")
96
+
97
+ pipe = load_pipeline()
98
+
99
+ # Create a temporary file for output
100
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
101
+ output_path = f.name
102
+
103
+ max_audio_length_ms = max_duration_seconds * 1000
104
+
105
+ with torch.no_grad():
106
+ pipe(
107
+ {
108
+ "lyrics": lyrics,
109
+ "tags": tags,
110
+ },
111
+ max_audio_length_ms=max_audio_length_ms,
112
+ save_path=output_path,
113
+ topk=topk,
114
+ temperature=temperature,
115
+ cfg_scale=cfg_scale,
116
+ )
117
 
118
+ return output_path
119
+
120
+
121
+ # Example lyrics
122
+ EXAMPLE_LYRICS = """[Intro]
123
+
124
+ [Verse]
125
+ The sun creeps in across the floor
126
+ I hear the traffic outside the door
127
+ The coffee pot begins to hiss
128
+ It is another morning just like this
129
+
130
+ [Prechorus]
131
+ The world keeps spinning round and round
132
+ Feet are planted on the ground
133
+ I find my rhythm in the sound
134
+
135
+ [Chorus]
136
+ Every day the light returns
137
+ Every day the fire burns
138
+ We keep on walking down this street
139
+ Moving to the same steady beat
140
+ It is the ordinary magic that we meet
141
+
142
+ [Verse]
143
+ The hours tick deeply into noon
144
+ Chasing shadows, chasing the moon
145
+ Work is done and the lights go low
146
+ Watching the city start to glow
147
+
148
+ [Bridge]
149
+ It is not always easy, not always bright
150
+ Sometimes we wrestle with the night
151
+ But we make it to the morning light
152
+
153
+ [Chorus]
154
+ Every day the light returns
155
+ Every day the fire burns
156
+ We keep on walking down this street
157
+ Moving to the same steady beat
158
+
159
+ [Outro]
160
+ Just another day
161
+ Every single day"""
162
+
163
+ EXAMPLE_TAGS = "piano,happy,uplifting,pop"
164
+
165
+ # Build the Gradio interface
166
+ with gr.Blocks(
167
+ title="HeartMuLa Music Generator",
168
+ theme=gr.themes.Soft(),
169
+ ) as demo:
170
  gr.Markdown(
171
  """
172
+ # HeartMuLa Music Generator
173
+
174
+ Generate music from lyrics and tags using [HeartMuLa](https://github.com/HeartMuLa/heartlib),
175
+ an open-source music foundation model.
176
+
177
+ **Instructions:**
178
+ 1. Enter your lyrics with structure tags like `[Verse]`, `[Chorus]`, `[Bridge]`, etc.
179
+ 2. Add comma-separated tags describing the music style (e.g., `piano,happy,romantic`)
180
+ 3. Adjust generation parameters as needed
181
+ 4. Click "Generate Music" and wait for your song!
182
 
183
+ *Note: Generation can take several minutes depending on the duration.*
184
  """
185
  )
186
 
187
  with gr.Row():
188
+ with gr.Column(scale=1):
189
+ lyrics_input = gr.Textbox(
190
+ label="Lyrics",
191
+ placeholder="Enter lyrics with structure tags like [Verse], [Chorus], etc.",
192
+ lines=20,
193
+ value=EXAMPLE_LYRICS,
194
  )
195
 
196
+ tags_input = gr.Textbox(
197
+ label="Tags",
198
+ placeholder="piano,happy,romantic,synthesizer",
199
+ value=EXAMPLE_TAGS,
200
+ info="Comma-separated tags describing the music style",
201
+ )
202
 
203
+ with gr.Accordion("Advanced Settings", open=False):
204
+ max_duration = gr.Slider(
205
+ minimum=30,
206
+ maximum=240,
207
+ value=120,
208
+ step=10,
209
+ label="Max Duration (seconds)",
210
+ info="Maximum length of generated audio",
211
+ )
212
 
213
+ temperature = gr.Slider(
214
+ minimum=0.1,
215
+ maximum=2.0,
216
+ value=1.0,
217
+ step=0.1,
218
+ label="Temperature",
219
+ info="Higher = more creative, Lower = more consistent",
220
+ )
221
 
222
+ topk = gr.Slider(
223
+ minimum=1,
224
+ maximum=100,
225
+ value=50,
226
+ step=1,
227
+ label="Top-K",
228
+ info="Number of top tokens to sample from",
229
+ )
230
+
231
+ cfg_scale = gr.Slider(
232
+ minimum=1.0,
233
+ maximum=3.0,
234
+ value=1.5,
235
+ step=0.1,
236
+ label="CFG Scale",
237
+ info="Classifier-free guidance scale",
238
+ )
239
+
240
+ generate_btn = gr.Button("Generate Music", variant="primary", size="lg")
241
+
242
+ with gr.Column(scale=1):
243
+ audio_output = gr.Audio(
244
+ label="Generated Music",
245
+ type="filepath",
246
  )
247
 
248
+ gr.Markdown(
249
+ """
250
+ ### Tips for Better Results
251
+ - Use structured lyrics with section tags
252
+ - Be specific with your style tags
253
+ - Try different temperature values for variety
254
+ - Shorter durations generate faster
255
+
256
+ ### Example Tags
257
+ - **Instruments:** piano, guitar, drums, synthesizer, violin, bass
258
+ - **Mood:** happy, sad, romantic, energetic, calm, melancholic
259
+ - **Genre:** pop, rock, jazz, classical, electronic, folk
260
+ - **Tempo:** fast, slow, upbeat, relaxed
261
+ """
262
+ )
263
 
264
  generate_btn.click(
265
  fn=generate_music,
266
  inputs=[
267
+ lyrics_input,
268
+ tags_input,
269
+ max_duration,
270
+ temperature,
271
+ topk,
272
+ cfg_scale,
273
  ],
274
+ outputs=audio_output,
275
  )
276
 
277
  gr.Markdown(
278
  """
279
  ---
280
+ **Model:** [HeartMuLa-oss-3B](https://huggingface.co/HeartMuLa/HeartMuLa-oss-3B) |
281
+ **Paper:** [arXiv](https://arxiv.org/abs/2601.10547) |
282
+ **Code:** [GitHub](https://github.com/HeartMuLa/heartlib)
 
283
 
284
+ *Licensed under Apache 2.0*
285
  """
286
  )
287
 
288
+
289
+ if __name__ == "__main__":
290
+ # Preload models on startup
291
+ print("Initializing HeartMuLa...")
292
+ load_pipeline()
293
+
294
+ # Launch the app
295
+ demo.launch()
requirements.txt CHANGED
@@ -1,14 +1,2 @@
1
- spaces
2
- gradio
3
- torch
4
- torchaudio
5
- transformers
6
- accelerate
7
- diffusers
8
- einops
9
- librosa
10
- scipy
11
- numpy
12
- safetensors
13
- fairseq-fixed
14
- cached_path
 
1
+ gradio>=4.0.0
2
+ heartlib @ git+https://github.com/HeartMuLa/heartlib.git