|
|
import os |
|
|
import tempfile |
|
|
import torch |
|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download, snapshot_download |
|
|
import spaces |
|
|
|
|
|
|
|
|
def download_models(): |
|
|
"""Download all required model files from HuggingFace Hub.""" |
|
|
cache_dir = os.environ.get("HF_HOME", os.path.expanduser("/tmp")) |
|
|
model_dir = os.path.join(cache_dir, "heartmula_models") |
|
|
|
|
|
if not os.path.exists(model_dir): |
|
|
os.makedirs(model_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
print("Downloading HeartMuLaGen files...") |
|
|
for filename in ["tokenizer.json", "gen_config.json"]: |
|
|
hf_hub_download( |
|
|
repo_id="HeartMuLa/HeartMuLaGen", |
|
|
filename=filename, |
|
|
local_dir=model_dir, |
|
|
) |
|
|
|
|
|
|
|
|
print("Downloading HeartMuLa-oss-3B...") |
|
|
snapshot_download( |
|
|
repo_id="HeartMuLa/HeartMuLa-oss-3B", |
|
|
local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"), |
|
|
) |
|
|
|
|
|
|
|
|
print("Downloading HeartCodec-oss...") |
|
|
snapshot_download( |
|
|
repo_id="HeartMuLa/HeartCodec-oss", |
|
|
local_dir=os.path.join(model_dir, "HeartCodec-oss"), |
|
|
) |
|
|
|
|
|
print("All models downloaded successfully!") |
|
|
return model_dir |
|
|
|
|
|
from heartlib import HeartMuLaGenPipeline |
|
|
|
|
|
model_dir = download_models() |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
device = torch.device("cuda") |
|
|
dtype = torch.bfloat16 |
|
|
else: |
|
|
device = torch.device("cpu") |
|
|
dtype = torch.float32 |
|
|
|
|
|
print(f"Loading pipeline on {device} with {dtype}...") |
|
|
pipe = HeartMuLaGenPipeline.from_pretrained( |
|
|
model_dir, |
|
|
device=device, |
|
|
dtype=dtype, |
|
|
version="3B", |
|
|
) |
|
|
print("Pipeline loaded successfully!") |
|
|
|
|
|
|
|
|
@spaces.GPU(duration=130) |
|
|
def generate_music( |
|
|
lyrics: str, |
|
|
tags: str, |
|
|
max_duration_seconds: int, |
|
|
temperature: float, |
|
|
topk: int, |
|
|
cfg_scale: float, |
|
|
progress=gr.Progress(track_tqdm=True), |
|
|
): |
|
|
"""Generate music from lyrics and tags.""" |
|
|
if not lyrics.strip(): |
|
|
raise gr.Error("Please enter some lyrics!") |
|
|
|
|
|
if not tags.strip(): |
|
|
raise gr.Error("Please enter at least one tag!") |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: |
|
|
output_path = f.name |
|
|
|
|
|
max_audio_length_ms = max_duration_seconds * 1000 |
|
|
|
|
|
with torch.no_grad(): |
|
|
pipe( |
|
|
{ |
|
|
"lyrics": lyrics, |
|
|
"tags": tags, |
|
|
}, |
|
|
max_audio_length_ms=max_audio_length_ms, |
|
|
save_path=output_path, |
|
|
topk=topk, |
|
|
temperature=temperature, |
|
|
cfg_scale=cfg_scale, |
|
|
) |
|
|
|
|
|
return output_path |
|
|
|
|
|
|
|
|
|
|
|
EXAMPLE_LYRICS = """[Intro] |
|
|
|
|
|
[Verse] |
|
|
The sun creeps in across the floor |
|
|
I hear the traffic outside the door |
|
|
The coffee pot begins to hiss |
|
|
It is another morning just like this |
|
|
|
|
|
[Prechorus] |
|
|
The world keeps spinning round and round |
|
|
Feet are planted on the ground |
|
|
I find my rhythm in the sound |
|
|
|
|
|
[Chorus] |
|
|
Every day the light returns |
|
|
Every day the fire burns |
|
|
We keep on walking down this street |
|
|
Moving to the same steady beat |
|
|
It is the ordinary magic that we meet |
|
|
|
|
|
[Verse] |
|
|
The hours tick deeply into noon |
|
|
Chasing shadows, chasing the moon |
|
|
Work is done and the lights go low |
|
|
Watching the city start to glow |
|
|
|
|
|
[Bridge] |
|
|
It is not always easy, not always bright |
|
|
Sometimes we wrestle with the night |
|
|
But we make it to the morning light |
|
|
|
|
|
[Chorus] |
|
|
Every day the light returns |
|
|
Every day the fire burns |
|
|
We keep on walking down this street |
|
|
Moving to the same steady beat |
|
|
|
|
|
[Outro] |
|
|
Just another day |
|
|
Every single day""" |
|
|
|
|
|
EXAMPLE_TAGS = "piano,happy,uplifting,pop" |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="HeartMuLa Music Generator", |
|
|
) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# HeartMuLa Music Generator |
|
|
|
|
|
Generate music from lyrics and tags using [HeartMuLa](https://github.com/HeartMuLa/heartlib), |
|
|
an open-source music foundation model. |
|
|
|
|
|
**Instructions:** |
|
|
1. Enter your lyrics with structure tags like `[Verse]`, `[Chorus]`, `[Bridge]`, etc. |
|
|
2. Add comma-separated tags describing the music style (e.g., `piano,happy,romantic`) |
|
|
3. Adjust generation parameters as needed |
|
|
4. Click "Generate Music" and wait for your song! |
|
|
|
|
|
*Note: Generation can take several minutes depending on the duration.* |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
lyrics_input = gr.Textbox( |
|
|
label="Lyrics", |
|
|
placeholder="Enter lyrics with structure tags like [Verse], [Chorus], etc.", |
|
|
lines=20, |
|
|
value=EXAMPLE_LYRICS, |
|
|
) |
|
|
|
|
|
tags_input = gr.Textbox( |
|
|
label="Tags", |
|
|
placeholder="piano,happy,romantic,synthesizer", |
|
|
value=EXAMPLE_TAGS, |
|
|
info="Comma-separated tags describing the music style", |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
max_duration = gr.Slider( |
|
|
minimum=30, |
|
|
maximum=240, |
|
|
value=120, |
|
|
step=10, |
|
|
label="Max Duration (seconds)", |
|
|
info="Maximum length of generated audio", |
|
|
) |
|
|
|
|
|
temperature = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=2.0, |
|
|
value=1.0, |
|
|
step=0.1, |
|
|
label="Temperature", |
|
|
info="Higher = more creative, Lower = more consistent", |
|
|
) |
|
|
|
|
|
topk = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=100, |
|
|
value=50, |
|
|
step=1, |
|
|
label="Top-K", |
|
|
info="Number of top tokens to sample from", |
|
|
) |
|
|
|
|
|
cfg_scale = gr.Slider( |
|
|
minimum=1.0, |
|
|
maximum=3.0, |
|
|
value=1.5, |
|
|
step=0.1, |
|
|
label="CFG Scale", |
|
|
info="Classifier-free guidance scale", |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button("Generate Music", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
audio_output = gr.Audio( |
|
|
label="Generated Music", |
|
|
type="filepath", |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
### Tips for Better Results |
|
|
- Use structured lyrics with section tags |
|
|
- Be specific with your style tags |
|
|
- Try different temperature values for variety |
|
|
- Shorter durations generate faster |
|
|
|
|
|
### Example Tags |
|
|
- **Instruments:** piano, guitar, drums, synthesizer, violin, bass |
|
|
- **Mood:** happy, sad, romantic, energetic, calm, melancholic |
|
|
- **Genre:** pop, rock, jazz, classical, electronic, folk |
|
|
- **Tempo:** fast, slow, upbeat, relaxed |
|
|
""" |
|
|
) |
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_music, |
|
|
inputs=[ |
|
|
lyrics_input, |
|
|
tags_input, |
|
|
max_duration, |
|
|
temperature, |
|
|
topk, |
|
|
cfg_scale, |
|
|
], |
|
|
outputs=audio_output, |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
--- |
|
|
**Model:** [HeartMuLa-oss-3B](https://huggingface.co/HeartMuLa/HeartMuLa-oss-3B) | |
|
|
**Paper:** [arXiv](https://arxiv.org/abs/2601.10547) | |
|
|
**Code:** [GitHub](https://github.com/HeartMuLa/heartlib) |
|
|
|
|
|
*Licensed under Apache 2.0* |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|