music_generation

Sleeping

App Files Files Community

IsraelRM commited on Mar 10

Commit

10d6080

verified ·

1 Parent(s): 2a6b66c

Upload apps

Browse files

Files changed (3) hide show

app.py +24 -0
app_audioldm.py +219 -0
app_musicldm.py +184 -0

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+import gradio as gr
+# HuggingFace token
+mi_token = os.environ.get("MI_TOKEN_HF")
+if mi_token:
+    os.environ["HF_HUB_TOKEN"] = mi_token
+else:
+    raise ValueError("No se encontró la variable de entorno MI_TOKEN_HF")
+from app_heartmula import crear_tab1
+from app_musicldm import crear_tab2
+from app_audioldm import crear_tab3
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as principal:
+    with gr.Tabs():
+        with gr.Tab("HeartMuLa"):
+            tab1 = crear_tab1()
+        with gr.Tab("MusicLDM — Estilo & Tags"):
+            tab2 = crear_tab2()
+        with gr.Tab("AudioLDM2 — Music + Lyrics"):
+            tab3 = crear_tab3()
+principal.launch()

app_audioldm.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import gradio as gr
+import torch
+import numpy as np
+import spaces
+from diffusers import AudioLDM2Pipeline
+_pipe = None
+def _load_model():
+    global _pipe
+    if _pipe is None:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        _pipe = AudioLDM2Pipeline.from_pretrained(
+            "cvssp/audioldm2-music",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        ).to(device)
+    return _pipe
+def _build_style_prompt(instruments, voice, mood, genre, tempo, bpm):
+    if instruments:
+        if len(instruments) == 1:
+            inst_txt = instruments[0]
+        else:
+            inst_txt = ", ".join(instruments[:-1]) + " and " + instruments[-1]
+    else:
+        inst_txt = "various instruments"
+    prompt = (
+        f"A {mood} {genre} song in {tempo} tempo at {int(bpm)} BPM, "
+        f"featuring {inst_txt}"
+    )
+    if voice and voice != "none":
+        prompt += f", sung by a {voice} voice"
+    return prompt
+def _build_full_prompt(style_prompt, lyrics):
+    """
+    AudioLDM2 no tiene un slot separado para lyrics, pero entiende
+    descripciones largas que incluyen texto de canciones.
+    Se concatenan al prompt de estilo con un separador claro.
+    """
+    if not lyrics.strip():
+        return style_prompt
+    # Truncar lyrics para no saturar el tokenizer (límite ~200 tokens aprox)
+    lyrics_trimmed = lyrics.strip()[:600]
+    return f"{style_prompt}. Song lyrics: {lyrics_trimmed}"
+@spaces.GPU(duration=180)
+def generate_music(
+    instruments,
+    voice,
+    mood,
+    genre,
+    tempo,
+    bpm,
+    lyrics,
+    duration,
+    guidance_scale,
+    num_steps,
+    negative_prompt,
+):
+    pipe = _load_model()
+    style_prompt = _build_style_prompt(instruments, voice, mood, genre, tempo, bpm)
+    full_prompt = _build_full_prompt(style_prompt, lyrics)
+    print(f"[AudioLDM2] Prompt: {full_prompt}")
+    result = pipe(
+        full_prompt,
+        negative_prompt=negative_prompt or None,
+        audio_length_in_s=float(duration),
+        guidance_scale=guidance_scale,
+        num_inference_steps=int(num_steps),
+        num_waveforms_per_prompt=1,
+    )
+    audio = result.audios[0]  # (samples,) numpy float32
+    return (16000, audio)
+_GENRES = [
+    "pop", "rock", "jazz", "classical", "electronic", "folk", "metal",
+    "hip hop", "r&b", "soul", "blues", "country", "reggae", "ska",
+    "house", "techno", "trance", "dubstep", "drum and bass",
+    "ambient", "lofi", "synthwave", "electro", "idm",
+    "indie", "indie rock", "alternative", "grunge", "punk",
+    "heavy metal", "black metal", "death metal", "thrash metal",
+    "orchestral", "film score", "soundtrack", "bossa nova", "samba",
+    "flamenco", "celtic", "afrobeat", "k-pop", "city pop",
+    "experimental", "new age",
+]
+_MOODS = [
+    "happy", "sad", "romantic", "energetic", "calm", "melancholic",
+    "dark", "epic", "mysterious", "peaceful", "angry",
+]
+_TEMPOS = ["slow", "moderate", "fast", "upbeat", "relaxed", "driving", "laid-back"]
+_VOICES = ["none", "male", "female", "choir", "opera singer", "rap vocals"]
+_INSTRUMENTS = [
+    "piano", "guitar", "electric guitar", "bass guitar",
+    "drums", "synthesizer", "violin", "cello", "flute",
+    "saxophone", "trumpet", "organ", "harp",
+]
+_EXAMPLE_LYRICS = """\
+[Verse 1]
+Midnight drips on mirrored stone,
+Neon whispers, all alone.
+Rain keeps time on empty streets,
+Where past and future softly meet.
+[Chorus]
+She walks like smoke through circuits wide,
+A neon ghost I cannot hide.
+Reflections lost in silver rain,
+I call her name — she won't remain.
+[Bridge]
+Static snow in every glance,
+Trapped inside a cyber trance.
+"""
+def crear_tab3():
+    with gr.Blocks(title="AudioLDM2 — Music + Lyrics", theme="Nymbo/Nymbo_Theme") as tab3:
+        gr.Markdown(
+            "# AudioLDM2 — Música con Lyrics Embebidas\n"
+            "Generación de música con letras usando **AudioLDM2 Music** "
+            "(`cvssp/audioldm2-music`). El estilo (tags) y las lyrics se combinan "
+            "en un único prompt enriquecido que el modelo procesa con CLAP + T5."
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### Estilo musical")
+                instruments = gr.CheckboxGroup(
+                    choices=_INSTRUMENTS,
+                    value=["synthesizer", "drums", "bass guitar"],
+                    label="Instrumentos",
+                )
+                voice = gr.Dropdown(
+                    choices=_VOICES,
+                    value="female",
+                    label="Voz del cantante",
+                )
+                mood = gr.Dropdown(
+                    choices=_MOODS,
+                    value="energetic",
+                    label="Mood / Emoción",
+                )
+                genre = gr.Dropdown(
+                    choices=_GENRES,
+                    value="synthwave",
+                    label="Género",
+                    allow_custom_value=True,
+                )
+                with gr.Row():
+                    tempo = gr.Dropdown(
+                        choices=_TEMPOS,
+                        value="fast",
+                        label="Tempo",
+                    )
+                    bpm = gr.Number(value=130, label="BPM", minimum=40, maximum=240)
+            with gr.Column(scale=1):
+                gr.Markdown("### Lyrics")
+                lyrics = gr.Textbox(
+                    label="Letra de la canción",
+                    lines=12,
+                    value=_EXAMPLE_LYRICS,
+                    placeholder="[Verse 1]\nTu letra aquí...\n\n[Chorus]\n...",
+                )
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### Parámetros de generación")
+                duration = gr.Slider(
+                    minimum=5, maximum=30, value=10, step=1,
+                    label="Duración (segundos)",
+                )
+                guidance_scale = gr.Slider(
+                    minimum=1.0, maximum=10.0, value=3.5, step=0.5,
+                    label="Guidance Scale",
+                )
+                num_steps = gr.Slider(
+                    minimum=10, maximum=200, value=50, step=10,
+                    label="Pasos de inferencia",
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="low quality, noise, distorted, muffled, speech, talking",
+                    placeholder="Qué evitar en la generación",
+                )
+            with gr.Column(scale=1):
+                generate_btn = gr.Button(
+                    "Generar música con lyrics", variant="primary", size="lg"
+                )
+                output_audio = gr.Audio(label="Música generada", type="numpy")
+        generate_btn.click(
+            fn=generate_music,
+            inputs=[
+                instruments, voice, mood, genre, tempo, bpm,
+                lyrics,
+                duration, guidance_scale, num_steps, negative_prompt,
+            ],
+            outputs=output_audio,
+        )
+    return tab3

app_musicldm.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import gradio as gr
+import torch
+import numpy as np
+import spaces
+from diffusers import MusicLDMPipeline
+_pipe = None
+def _load_model():
+    global _pipe
+    if _pipe is None:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        _pipe = MusicLDMPipeline.from_pretrained(
+            "ucsd-reach/musicldm",
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        ).to(device)
+    return _pipe
+def _build_prompt(instruments, voice, mood, genre, tempo, bpm, lyrics=""):
+    if instruments:
+        if len(instruments) == 1:
+            inst_txt = instruments[0]
+        else:
+            inst_txt = ", ".join(instruments[:-1]) + " and " + instruments[-1]
+    else:
+        inst_txt = "various instruments"
+    prompt = (
+        f"A {mood} {genre} song in {tempo} tempo at {int(bpm)} BPM, "
+        f"featuring {inst_txt}"
+    )
+    if voice and voice != "none":
+        prompt += f", sung by a {voice} voice"
+    if lyrics and lyrics.strip():
+        prompt += f". Song lyrics: {lyrics.strip()[:600]}"
+    return prompt
+@spaces.GPU(duration=130)
+def generate_music(
+    instruments,
+    voice,
+    mood,
+    genre,
+    tempo,
+    bpm,
+    lyrics,
+    duration,
+    guidance_scale,
+    num_steps,
+    negative_prompt,
+):
+    pipe = _load_model()
+    prompt = _build_prompt(instruments, voice, mood, genre, tempo, bpm, lyrics)
+    print(f"[MusicLDM] Prompt: {prompt}")
+    result = pipe(
+        prompt,
+        negative_prompt=negative_prompt or None,
+        audio_length_in_s=float(duration),
+        guidance_scale=guidance_scale,
+        num_inference_steps=int(num_steps),
+    )
+    audio = result.audios[0]  # (samples,) numpy float32
+    return (16000, audio)
+_GENRES = [
+    "pop", "rock", "jazz", "classical", "electronic", "folk", "metal",
+    "hip hop", "r&b", "soul", "blues", "country", "reggae", "ska",
+    "house", "techno", "trance", "dubstep", "drum and bass",
+    "ambient", "lofi", "synthwave", "electro", "idm",
+    "indie", "indie rock", "alternative", "grunge", "punk",
+    "heavy metal", "black metal", "death metal", "thrash metal",
+    "orchestral", "film score", "soundtrack", "bossa nova", "samba",
+    "flamenco", "celtic", "afrobeat", "k-pop", "city pop",
+    "experimental", "new age",
+]
+_MOODS = [
+    "happy", "sad", "romantic", "energetic", "calm", "melancholic",
+    "dark", "epic", "mysterious", "peaceful", "angry",
+]
+_TEMPOS = ["slow", "moderate", "fast", "upbeat", "relaxed", "driving", "laid-back"]
+_VOICES = ["none", "male", "female", "choir", "opera singer", "rap vocals"]
+_INSTRUMENTS = [
+    "piano", "guitar", "electric guitar", "bass guitar",
+    "drums", "synthesizer", "violin", "cello", "flute",
+    "saxophone", "trumpet", "organ", "harp",
+]
+def crear_tab2():
+    with gr.Blocks(title="MusicLDM", theme="Nymbo/Nymbo_Theme") as tab2:
+        gr.Markdown(
+            "# MusicLDM — Latent Diffusion Music Generator\n"
+            "Generación de música desde tags de estilo usando **MusicLDM** "
+            "(`ucsd-reach/musicldm`). Los tags se convierten en un prompt "
+            "estructurado en lenguaje natural para que el modelo los entienda correctamente."
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### Estilo musical")
+                instruments = gr.CheckboxGroup(
+                    choices=_INSTRUMENTS,
+                    value=["synthesizer", "drums", "bass guitar"],
+                    label="Instrumentos",
+                )
+                voice = gr.Dropdown(
+                    choices=_VOICES,
+                    value="female",
+                    label="Voz del cantante",
+                )
+                mood = gr.Dropdown(
+                    choices=_MOODS,
+                    value="energetic",
+                    label="Mood / Emoción",
+                )
+                genre = gr.Dropdown(
+                    choices=_GENRES,
+                    value="synthwave",
+                    label="Género",
+                    allow_custom_value=True,
+                )
+                with gr.Row():
+                    tempo = gr.Dropdown(
+                        choices=_TEMPOS,
+                        value="fast",
+                        label="Tempo",
+                    )
+                    bpm = gr.Number(value=130, label="BPM", minimum=40, maximum=240)
+            with gr.Column(scale=1):
+                gr.Markdown("### Lyrics (opcional)")
+                lyrics = gr.Textbox(
+                    label="Letra de la canción",
+                    lines=8,
+                    placeholder="[Verse 1]\nTu letra aquí...\n\n[Chorus]\n...",
+                )
+                gr.Markdown("### Parámetros de generación")
+                duration = gr.Slider(
+                    minimum=5, maximum=30, value=10, step=1,
+                    label="Duración (segundos)",
+                )
+                guidance_scale = gr.Slider(
+                    minimum=1.0, maximum=10.0, value=3.5, step=0.5,
+                    label="Guidance Scale",
+                    info="Mayor = más fiel al prompt",
+                )
+                num_steps = gr.Slider(
+                    minimum=10, maximum=200, value=50, step=10,
+                    label="Pasos de inferencia",
+                    info="Más pasos = mejor calidad, más lento",
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="low quality, noise, distorted, muffled",
+                    placeholder="Qué evitar en la generación",
+                )
+                generate_btn = gr.Button("Generar música", variant="primary", size="lg")
+                output_audio = gr.Audio(label="Música generada", type="numpy")
+        generate_btn.click(
+            fn=generate_music,
+            inputs=[
+                instruments, voice, mood, genre, tempo, bpm,
+                lyrics,
+                duration, guidance_scale, num_steps, negative_prompt,
+            ],
+            outputs=output_audio,
+        )
+    return tab2