HeartMuLa / app.py
seawolf2357's picture
Update app.py
2531687 verified
import os
import tempfile
import torch
import gradio as gr
from huggingface_hub import hf_hub_download, snapshot_download
import spaces
import requests
import json
from groq import Groq
# ============================================================
# ๐ŸŽต SOMA Music Studio - HeartMuLa Edition
# HeartMuLa-oss-3B + SOMA Multi-Agent + Comic Classic Theme
# ============================================================
# ============================================================
# ๐ŸŽต HeartMuLa ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ๋ฐ ๋กœ๋”ฉ
# ============================================================
def download_models():
"""Download all required model files from HuggingFace Hub."""
cache_dir = os.environ.get("HF_HOME", os.path.expanduser("/tmp"))
model_dir = os.path.join(cache_dir, "heartmula_models")
if not os.path.exists(model_dir):
os.makedirs(model_dir, exist_ok=True)
# Download HeartMuLaGen (tokenizer and gen_config)
print("Downloading HeartMuLaGen files...")
for filename in ["tokenizer.json", "gen_config.json"]:
hf_hub_download(
repo_id="HeartMuLa/HeartMuLaGen",
filename=filename,
local_dir=model_dir,
)
# Download HeartMuLa-oss-3B
print("Downloading HeartMuLa-oss-3B...")
snapshot_download(
repo_id="HeartMuLa/HeartMuLa-oss-3B",
local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"),
)
# Download HeartCodec-oss
print("Downloading HeartCodec-oss...")
snapshot_download(
repo_id="HeartMuLa/HeartCodec-oss",
local_dir=os.path.join(model_dir, "HeartCodec-oss"),
)
print("All models downloaded successfully!")
return model_dir
from heartlib import HeartMuLaGenPipeline
# ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ๋ฐ ๋กœ๋”ฉ
model_dir = download_models()
# Determine device and dtype
if torch.cuda.is_available():
device = torch.device("cuda")
dtype = torch.bfloat16
else:
device = torch.device("cpu")
dtype = torch.float32
print(f"Loading HeartMuLa pipeline on {device} with {dtype}...")
pipe = HeartMuLaGenPipeline.from_pretrained(
model_dir,
device=device,
dtype=dtype,
version="3B",
)
print("HeartMuLa Pipeline loaded successfully!")
# ============================================================
# ๐ŸŽต HeartMuLa ๊ฐ€์ด๋“œ ๋ฐ ์„ค์ •
# ============================================================
# HeartMuLa ๊ถŒ์žฅ ๊ตฌ์กฐ ํƒœ๊ทธ (๊ณต์‹ ๋ฌธ์„œ ๊ธฐ๋ฐ˜)
STRUCTURE_TAGS = [
"[Intro]", "[Verse]", "[Prechorus]", "[Chorus]", "[Bridge]",
"[Interlude]", "[Hook]", "[Outro]", "[Inst]", "[Solo]"
]
# HeartMuLa ์Šคํƒ€์ผ ํƒœ๊ทธ ๊ฐ€์ด๋“œ (๋ฐ๋ชจ ํŽ˜์ด์ง€ ๊ธฐ๋ฐ˜)
HEARTMULA_TAG_GUIDE = """
## ๐ŸŽผ HeartMuLa ์ž…๋ ฅ ํ˜•์‹
### โš ๏ธ ์ค‘์š”: HeartMuLa๋Š” 2๊ฐ€์ง€ ์ž…๋ ฅ๋งŒ ๋ฐ›์Šต๋‹ˆ๋‹ค!
1. **๊ฐ€์‚ฌ (Lyrics)** - ๊ตฌ์กฐ ํƒœ๊ทธ ํฌํ•จ
2. **์Šคํƒ€์ผ ํƒœ๊ทธ (Tags)** - ์ฝค๋งˆ ๊ตฌ๋ถ„, ๊ณต๋ฐฑ ์—†์Œ
โŒ ๋ณด์ปฌ ํƒ€์ž…(๋“€์—ฃ, ๊ทธ๋ฃน ๋“ฑ)์€ ์ง€์›ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค!
### ๐Ÿ“ ๊ตฌ์กฐ ํƒœ๊ทธ (๊ฐ€์‚ฌ ๋‚ด ์‚ฌ์šฉ):
`[Intro]`, `[Verse]`, `[Prechorus]`, `[Chorus]`, `[Bridge]`, `[Outro]`
### ๐Ÿท๏ธ ์Šคํƒ€์ผ ํƒœ๊ทธ ํ˜•์‹ (์ฝค๋งˆ ๊ตฌ๋ถ„, ๊ณต๋ฐฑ ์—†์Œ):
```
piano,happy,wedding,synthesizer,romantic
energetic,drums,driving,rock,electric guitar
soft,Sad,Ballad,Longing
```
### ๐ŸŽน ํƒœ๊ทธ ์นดํ…Œ๊ณ ๋ฆฌ (์‹ค์ œ ๋ฐ๋ชจ ๊ธฐ๋ฐ˜):
**์•…๊ธฐ (Instruments):**
Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass
**์žฅ๋ฅด (Genre):**
pop, R&B, rock, Ballad, electronic, acoustic
**๋ถ„์œ„๊ธฐ/๊ฐ์ • (Mood/Emotion):**
warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy
**์ƒํ™ฉ (Situation):**
Cafe, wedding, Walking, self-discovery
### ๐ŸŒ ์ง€์› ์–ธ์–ด:
๐Ÿ‡บ๐Ÿ‡ธ English | ๐Ÿ‡ฐ๐Ÿ‡ท Korean | ๐Ÿ‡จ๐Ÿ‡ณ Chinese | ๐Ÿ‡ฏ๐Ÿ‡ต Japanese | ๐Ÿ‡ช๐Ÿ‡ธ Spanish
"""
# HeartMuLa ๊ถŒ์žฅ ๊ฐ€์‚ฌ ๊ตฌ์กฐ
HEARTMULA_LYRICS_STRUCTURE = """
## ๐Ÿ“ HeartMuLa Recommended Lyrics Structure
### OPTIMAL FORMAT:
```
[Intro]
[Verse]
First verse lyrics here
Second line of first verse
[Prechorus]
Building tension here
Leading to the chorus
[Chorus]
Main hook and memorable melody
Most important part of song
[Verse]
Second verse develops story
[Bridge]
Contrast section here
Different melody or perspective
[Chorus]
Main hook repeated
[Outro]
Closing the song
```
### KEY RULES:
1. [Chorus] appears at least 2-3 times
2. [Prechorus] builds tension before [Chorus]
3. [Bridge] provides contrast before final [Chorus]
4. Use [Prechorus] NOT [Pre Chorus] (no space!)
"""
# ์˜ˆ์ œ ๊ฐ€์‚ฌ
EXAMPLE_LYRICS = """[Intro]
[Verse]
The sun creeps in across the floor
I hear the traffic outside the door
The coffee pot begins to hiss
It is another morning just like this
[Prechorus]
The world keeps spinning round and round
Feet are planted on the ground
I find my rhythm in the sound
[Chorus]
Every day the light returns
Every day the fire burns
We keep on walking down this street
Moving to the same steady beat
It is the ordinary magic that we meet
[Verse]
The hours tick deeply into noon
Chasing shadows, chasing the moon
Work is done and the lights go low
Watching the city start to glow
[Bridge]
It is not always easy, not always bright
Sometimes we wrestle with the night
But we make it to the morning light
[Chorus]
Every day the light returns
Every day the fire burns
We keep on walking down this street
Moving to the same steady beat
[Outro]
Just another day
Every single day"""
EXAMPLE_TAGS = "piano,happy,uplifting,pop"
# ์˜ˆ์ œ ํ”„๋กฌํ”„ํŠธ (ํƒœ๊ทธ ์„ธํŠธ) - HeartMuLa ๋ฐ๋ชจ ๊ธฐ๋ฐ˜
EXAMPLE_TAG_PRESETS = {
"โ˜• Cafe Warm Pop": "warm,reflection,pop,Cafe",
"๐ŸŽน R&B Soft Regret": "R&B,Keyboard,Regret,drum machine,electric guitar,synthesizer,soft",
"โšก Electronic Energetic": "energetic,electronic,synthesizer,drum machine,self-discovery",
"๐Ÿ˜ข Sad Ballad Longing": "soft,Sad,Ballad,Longing",
"๐Ÿง˜ Meditation Peaceful": "meditation,faith,acoustic,peaceful",
"๐Ÿ’’ Wedding Romantic": "wedding,Piano,Strings,acoustic guitar,pop,Romantic",
"๐Ÿ’” Emotional Piano": "Sad,Regret,electric guitar,drums,Piano,emotional",
"๐Ÿšถ Hopeful Acoustic": "Walking,acoustic guitar,Strings,hope,hopeful",
"๐ŸŽธ Epic Electronic": "synthesizer,drum machine,powerful,epic,Keyboard,electronic",
"๐Ÿ”ฅ Driving Rock": "energetic,drums,driving,rock,electric guitar",
"๐ŸŽต Happy Uplifting": "piano,happy,uplifting,pop",
"๐ŸŒ™ Soft Dreamy": "soft,dreamy,piano,strings,peaceful"
}
# ============================================================
# ๐Ÿง  SOMA ์—์ด์ „ํŠธ ์‹œ์Šคํ…œ - ๊ฐ€์‚ฌ ์ƒ์„ฑ
# ============================================================
LYRICS_AGENTS = {
"lyricist": f"""You are a master lyricist optimized for HeartMuLa music generation.
{HEARTMULA_LYRICS_STRUCTURE}
Your task: Create powerful, memorable lyrics with OPTIMAL HeartMuLa tag placement.
CRITICAL RULES:
1. Use HeartMuLa structure tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
2. [Prechorus] (not [Pre Chorus]) - HeartMuLa format, NO SPACE!
3. Ensure [Chorus] is the most memorable, singable part
4. Include [Prechorus] to build tension before [Chorus]
5. Add [Bridge] for emotional contrast
6. Minimum 6-8 sections for quality generation
7. โš ๏ธ CRITICAL: Write lyrics in the EXACT language specified by the user!
- If Korean is specified, write ALL lyrics in Korean (ํ•œ๊ตญ์–ด)!
- Do NOT translate or write in English unless explicitly asked!
Write lyrics that create the BEST POSSIBLE foundation for high-quality music generation.""",
"producer": f"""You are a music producer specializing in song structure optimization for HeartMuLa.
{HEARTMULA_LYRICS_STRUCTURE}
Your task: Analyze and OPTIMIZE the tag structure for MAXIMUM musical impact.
CRITICAL OPTIMIZATION RULES:
1. Use HeartMuLa tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro]
2. IMPORTANT: Use [Prechorus] NOT [Pre Chorus] - no space!
3. Verify the structure follows genre conventions
4. Ensure proper tag sequence (Verseโ†’Prechorusโ†’Chorus)
5. Check that [Chorus] appears at least 2-3 times
6. Verify [Bridge] provides contrast before final chorus
Output the restructured lyrics with OPTIMAL HeartMuLa tag placement.""",
"emotion_director": f"""You are an emotion director for music production.
Your task: Enhance emotional impact through STRATEGIC tag content.
EMOTIONAL MAPPING BY TAG:
- [Intro]: Intrigue, anticipation
- [Verse]: Storytelling, building connection
- [Prechorus]: Rising tension, excitement
- [Chorus]: Peak emotion, catharsis
- [Bridge]: Vulnerability, reflection, contrast
- [Outro]: Resolution, lingering feeling
OPTIMIZATION RULES:
1. Each [Verse] should progress emotionally
2. [Chorus] must deliver the strongest emotional punch
3. [Bridge] should offer new emotional perspective
4. [Prechorus] should create anticipation for [Chorus]
Enhance the lyrics for MAXIMUM emotional resonance.""",
"final_editor": f"""You are the final editor for HeartMuLa music production.
Your task: Output PERFECTLY FORMATTED, production-ready lyrics.
CRITICAL OUTPUT RULES:
1. Output ONLY the actual lyrics with structure tags
2. Use HeartMuLa tags EXACTLY:
[Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
3. IMPORTANT: [Prechorus] NOT [Pre Chorus] - no space!
4. DO NOT include English translations in parentheses
5. DO NOT include explanations, descriptions, or markdown
6. DO NOT include lines starting with * or >
7. For [Inst] or [Solo] sections, write ONLY the tag
8. Ensure MINIMUM 6-8 different sections
9. Verify [Chorus] appears at least 2 times
10. PRESERVE THE ORIGINAL LANGUAGE - if lyrics are in Korean, output in Korean!
OUTPUT ONLY CLEAN LYRICS WITH OPTIMAL TAG STRUCTURE IN THE ORIGINAL LANGUAGE."""
}
# ํƒœ๊ทธ ์ƒ์„ฑ ์—์ด์ „ํŠธ - HeartMuLa ๋ฐ๋ชจ ํƒœ๊ทธ ๊ธฐ๋ฐ˜
TAG_AGENT = f"""You are a style tag generator for HeartMuLa music generation.
HeartMuLa uses comma-separated tags WITHOUT spaces.
VALID TAG CATEGORIES (from official demo):
**Instruments:**
Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass
**Genre:**
pop, R&B, rock, Ballad, electronic, acoustic
**Mood/Emotion:**
warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy
**Situation:**
Cafe, wedding, Walking, self-discovery
RULES:
1. Output 4-6 tags, comma-separated, NO SPACES between tags
2. Use ONLY tags from the categories above
3. Match the requested genre and mood
OUTPUT FORMAT (examples):
piano,happy,uplifting,pop
warm,reflection,pop,Cafe
energetic,drums,driving,rock,electric guitar
soft,Sad,Ballad,Longing
Output ONLY the comma-separated tags, nothing else."""
# ============================================================
# ๐Ÿ”ง ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜
# ============================================================
def call_groq(api_key: str, system: str, user_prompt: str, context: str = "") -> str:
"""Groq API ํ˜ธ์ถœ"""
try:
client = Groq(api_key=api_key)
messages = [{"role": "system", "content": system}]
if context:
messages.append({"role": "user", "content": f"Previous work:\n{context}\n\nTask: {user_prompt}"})
else:
messages.append({"role": "user", "content": user_prompt})
completion = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=messages,
temperature=0.9,
max_completion_tokens=4096,
top_p=1,
stream=False
)
if completion and completion.choices and completion.choices[0].message:
return completion.choices[0].message.content or "Error: Empty response"
return "Error: Invalid response"
except Exception as e:
return f"Error: {str(e)}"
def clean_lyrics(text: str) -> str:
"""๊ฐ€์‚ฌ ํ›„์ฒ˜๋ฆฌ - HeartMuLa ํฌ๋งท ์ตœ์ ํ™”"""
import re
if not text or not isinstance(text, str):
return ""
lines = text.split('\n')
cleaned_lines = []
for line in lines:
if not line.strip():
cleaned_lines.append('')
continue
# ์Šคํ‚ต ํŒจํ„ด
skip_patterns = [r'^\s*\*', r'^\s*>', r'^\s*---', r'^\s*###', r'^\s*\*\*.*\*\*\s*$']
if any(re.match(p, line) for p in skip_patterns):
continue
# ์ •๋ฆฌ
line = re.sub(r'\s*\([A-Za-z].*?\)\s*$', '', line)
line = re.sub(r'\*\*(.*?)\*\*', r'\1', line)
# HeartMuLa ํƒœ๊ทธ ์ •๊ทœํ™”
line = re.sub(r'\[Pre.?Chorus\]', '[Prechorus]', line, flags=re.IGNORECASE)
line = re.sub(r'\[Post.?Chorus\]', '[Chorus]', line, flags=re.IGNORECASE)
line = re.sub(r'\[Build.?Up\]', '[Prechorus]', line, flags=re.IGNORECASE)
line = re.sub(r'\[Break\]', '[Interlude]', line, flags=re.IGNORECASE)
if line.strip():
cleaned_lines.append(line.strip())
result = '\n'.join(cleaned_lines)
result = re.sub(r'\n{3,}', '\n\n', result)
return result.strip()
def clean_tags(tags: str) -> str:
"""ํƒœ๊ทธ ์ •๋ฆฌ - HeartMuLa ํฌ๋งท (์ฝค๋งˆ ๊ตฌ๋ถ„, ๊ณต๋ฐฑ ์—†์Œ)"""
if not tags:
return "piano,happy,pop"
tags = tags.lower().strip()
tags = tags.replace(', ', ',').replace(' ,', ',').replace(' ', ' ')
tags = tags.replace(' ', '_').replace(',,', ',')
tags = tags.strip(',')
tag_list = [t.strip() for t in tags.split(',') if t.strip()]
unique_tags = list(dict.fromkeys(tag_list))
return ','.join(unique_tags) if unique_tags else "piano,happy,pop"
# ============================================================
# ๐ŸŽต ๋ฉ”์ธ ํ•จ์ˆ˜๋“ค
# ============================================================
@spaces.GPU(duration=120)
def generate_lyrics_soma(
api_key: str, theme: str, genre: str, mood: str,
language: str, additional: str,
progress=gr.Progress()
):
"""SOMA ๊ฐ€์‚ฌ ์ƒ์„ฑ"""
if not api_key or not api_key.strip():
return "โŒ Groq API Key ํ•„์š”", "", "", "", ""
if not theme or not theme.strip():
return "โŒ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", "", "", "", ""
base_prompt = f"""Create PROFESSIONAL lyrics optimized for HeartMuLa:
- Theme: {theme}
- Genre: {genre}
- Mood: {mood}
- Language: {language} โ† CRITICAL: You MUST write ALL lyrics in {language} language!
{f'- Additional: {additional}' if additional else ''}
โš ๏ธ LANGUAGE REQUIREMENT: Write the ENTIRE lyrics in {language}!
{"- ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ ๊ฐ€์‚ฌ๋ฅผ ์ž‘์„ฑํ•˜์„ธ์š”! ํ•œ๊ธ€๋กœ๋งŒ ์ž‘์„ฑ!" if language == "Korean" else ""}
{"- ่ฏท็”จไธญๆ–‡ๅ†™ๆ‰€ๆœ‰ๆญŒ่ฏ๏ผๅฟ…้กปๅ…จ้ƒจ็”จไธญๆ–‡๏ผ" if language == "Chinese" else ""}
{"- ๆ—ฅๆœฌ่ชžใงๆญŒ่ฉžใ‚’ๆ›ธใ„ใฆใใ ใ•ใ„๏ผๅ…จใฆๆ—ฅๆœฌ่ชžใง๏ผ" if language == "Japanese" else ""}
{"- ยกEscribe toda la letra en espaรฑol! ยกSolo en espaรฑol!" if language == "Spanish" else ""}
USE HeartMuLa STRUCTURE TAGS:
[Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro]
IMPORTANT: Use [Prechorus] NOT [Pre Chorus] - no space!
REQUIRED STRUCTURE:
1. [Intro] - Set the mood
2. [Verse] x2-3 - Tell the story
3. [Prechorus] - Build tension
4. [Chorus] x2-3 - Main hook (MOST important!)
5. [Bridge] - Emotional contrast
6. [Outro] - Conclusion
REMEMBER: ALL lyrics must be in {language}!"""
try:
progress(0.2, desc="๐ŸŽค ์ž‘์‚ฌ๊ฐ€ - ์ดˆ์•ˆ ์ž‘์„ฑ...")
draft = call_groq(api_key, LYRICS_AGENTS["lyricist"], base_prompt)
if draft.startswith("Error:"):
return f"โŒ {draft}", "", "", "", ""
progress(0.4, desc="๐ŸŽน ํ”„๋กœ๋“€์„œ - ๊ตฌ์กฐ ์ตœ์ ํ™”...")
structured = call_groq(api_key, LYRICS_AGENTS["producer"],
f"Optimize for {genre}. Use [Prechorus] not [Pre Chorus]! KEEP ALL LYRICS IN {language}!", draft)
if structured.startswith("Error:"):
return f"โŒ {structured}", draft, "", "", ""
progress(0.6, desc="๐Ÿ’ซ ๊ฐ์„ฑ ๋””๋ ‰ํ„ฐ - ๊ฐ์ • ๊ฐ•ํ™”...")
emotional = call_groq(api_key, LYRICS_AGENTS["emotion_director"],
f"Enhance for {mood}. KEEP ALL LYRICS IN {language}!", structured)
if emotional.startswith("Error:"):
return f"โŒ {emotional}", draft, structured, "", ""
progress(0.8, desc="โœจ ์ตœ์ข… ํŽธ์ง‘...")
lang_instruction = {
"Korean": "ํ•œ๊ตญ์–ด๋กœ๋งŒ ์ถœ๋ ฅํ•˜์„ธ์š”!",
"Chinese": "็”จไธญๆ–‡่พ“ๅ‡บ๏ผ",
"Japanese": "ๆ—ฅๆœฌ่ชžใงๅ‡บๅŠ›ใ—ใฆใใ ใ•ใ„๏ผ",
"Spanish": "ยกEscribe en espaรฑol!"
}.get(language, "")
final = call_groq(api_key, LYRICS_AGENTS["final_editor"],
f"Output ONLY clean lyrics in {language}. Use [Prechorus] not [Pre Chorus]! {lang_instruction}", emotional)
if final.startswith("Error:"):
return f"โŒ {final}", draft, structured, emotional, ""
final_cleaned = clean_lyrics(final)
progress(1.0, desc="โœ… ์™„๋ฃŒ!")
return "โœ… ๊ฐ€์‚ฌ ์ƒ์„ฑ ์™„๋ฃŒ!", draft, structured, emotional, final_cleaned
except Exception as e:
return f"โŒ ์˜ˆ์™ธ: {str(e)}", "", "", "", ""
@spaces.GPU(duration=60)
def quick_lyrics(api_key: str, theme: str, genre: str, mood: str, language: str, additional: str):
"""๋น ๋ฅธ ๊ฐ€์‚ฌ ์ƒ์„ฑ"""
if not api_key or not api_key.strip():
return "โŒ API Key ํ•„์š”"
if not theme or not theme.strip():
return "โŒ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”"
prompt = f"""Create song lyrics for HeartMuLa:
- Theme: {theme}
- Genre: {genre}
- Mood: {mood}
- Language: {language} โ† CRITICAL: Write ALL lyrics in {language}!
{f'- Special: {additional}' if additional else ''}
โš ๏ธ LANGUAGE: You MUST write the ENTIRE lyrics in {language}!
{"๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜์„ธ์š”! ํ•œ๊ธ€ ๊ฐ€์‚ฌ๋งŒ!" if language == "Korean" else ""}
{"่ฏท็”จไธญๆ–‡ๅ†™ๆญŒ่ฏ๏ผ" if language == "Chinese" else ""}
{"ๆ—ฅๆœฌ่ชžใงๆ›ธใ„ใฆใใ ใ•ใ„๏ผ" if language == "Japanese" else ""}
{"ยกEscribe en espaรฑol!" if language == "Spanish" else ""}
USE STRUCTURE (8-10 sections):
[Intro] โ†’ [Verse] โ†’ [Prechorus] โ†’ [Chorus] โ†’ [Verse] โ†’ [Prechorus] โ†’ [Chorus] โ†’ [Bridge] โ†’ [Chorus] โ†’ [Outro]
IMPORTANT: Use [Prechorus] NOT [Pre Chorus]!
OUTPUT ONLY lyrics with tags in {language}. NO explanations."""
try:
result = call_groq(api_key, f"""Professional songwriter for HeartMuLa.
{HEARTMULA_LYRICS_STRUCTURE}
Output ONLY clean lyrics.""", prompt)
if result.startswith("Error:"):
return f"โŒ {result}"
return clean_lyrics(result)
except Exception as e:
return f"โŒ {str(e)}"
@spaces.GPU(duration=60)
def generate_tags_ai(api_key: str, genre: str, mood: str, current_tags: str):
"""AI ํƒœ๊ทธ ์ƒ์„ฑ - HeartMuLa ํ˜•์‹"""
if not api_key:
return "piano,happy,pop"
prompt = f"""Generate HeartMuLa style tags based on:
- Genre: {genre}
- Mood: {mood}
- Current hint: {current_tags}
Use ONLY these tag categories (from HeartMuLa demo):
- Instruments: Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass
- Genre: pop, R&B, rock, Ballad, electronic, acoustic
- Mood: warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy
- Situation: Cafe, wedding, Walking, self-discovery
Output 4-6 comma-separated tags WITHOUT spaces between tags.
Example: piano,happy,uplifting,pop,Romantic"""
try:
result = call_groq(api_key, TAG_AGENT, prompt)
if result.startswith("Error:"):
return "piano,happy,pop"
return clean_tags(result)
except:
return "piano,happy,pop"
@spaces.GPU(duration=180)
def generate_music_heartmula(
lyrics: str,
tags: str,
max_duration_seconds: int,
temperature: float,
topk: int,
cfg_scale: float,
progress=gr.Progress(track_tqdm=True),
):
"""HeartMuLa๋กœ ์Œ์•… ์ƒ์„ฑ"""
if not lyrics or not lyrics.strip():
raise gr.Error("โš ๏ธ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”!")
if not tags or not tags.strip():
raise gr.Error("โš ๏ธ ํƒœ๊ทธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”!")
# ํƒœ๊ทธ ์ •๋ฆฌ
tags = clean_tags(tags)
# ์ž„์‹œ ํŒŒ์ผ ์ƒ์„ฑ
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
output_path = f.name
max_audio_length_ms = max_duration_seconds * 1000
try:
with torch.no_grad():
pipe(
{
"lyrics": lyrics,
"tags": tags,
},
max_audio_length_ms=max_audio_length_ms,
save_path=output_path,
topk=topk,
temperature=temperature,
cfg_scale=cfg_scale,
)
return output_path, f"โœ… ์Œ์•… ์ƒ์„ฑ ์™„๋ฃŒ! ({max_duration_seconds}์ดˆ, ํƒœ๊ทธ: {tags})"
except Exception as e:
return None, f"โŒ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}"
def load_tag_preset(preset_name):
"""ํƒœ๊ทธ ํ”„๋ฆฌ์…‹ ๋กœ๋“œ"""
if preset_name in EXAMPLE_TAG_PRESETS:
return EXAMPLE_TAG_PRESETS[preset_name]
return ""
# ============================================================
# ๐ŸŽจ Comic Classic Theme CSS
# ============================================================
css = """
@import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
.gradio-container {
background-color: #FEF9C3 !important;
background-image: radial-gradient(#1F2937 1px, transparent 1px) !important;
background-size: 20px 20px !important;
font-family: 'Comic Neue', cursive, sans-serif !important;
}
.huggingface-space-header, footer, .footer { display: none !important; }
.header-title h1 {
font-family: 'Bangers', cursive !important;
color: #1F2937 !important;
font-size: 3rem !important;
text-align: center !important;
text-shadow: 4px 4px 0px #FACC15, 6px 6px 0px #1F2937 !important;
letter-spacing: 3px !important;
-webkit-text-stroke: 2px #1F2937 !important;
}
.section-title {
font-family: 'Bangers', cursive !important;
color: #1F2937 !important;
font-size: 1.6rem !important;
border-bottom: 4px solid #3B82F6 !important;
padding-bottom: 8px !important;
text-shadow: 2px 2px 0px #FACC15 !important;
}
.gr-panel, .gr-box, .block {
background: #FFFFFF !important;
border: 3px solid #1F2937 !important;
border-radius: 8px !important;
box-shadow: 6px 6px 0px #1F2937 !important;
}
textarea, input[type="text"], input[type="password"] {
background: #FFFFFF !important;
border: 3px solid #1F2937 !important;
border-radius: 8px !important;
font-family: 'Comic Neue', cursive !important;
font-weight: 700 !important;
}
textarea:focus, input:focus {
border-color: #3B82F6 !important;
box-shadow: 4px 4px 0px #3B82F6 !important;
}
.gr-button-primary, button.primary {
background: #3B82F6 !important;
border: 3px solid #1F2937 !important;
border-radius: 8px !important;
color: #FFFFFF !important;
font-family: 'Bangers', cursive !important;
font-size: 1.2rem !important;
letter-spacing: 2px !important;
box-shadow: 5px 5px 0px #1F2937 !important;
text-shadow: 1px 1px 0px #1F2937 !important;
}
.gr-button-primary:hover {
background: #2563EB !important;
}
.gr-button-secondary, button.secondary {
background: #EF4444 !important;
border: 3px solid #1F2937 !important;
color: #FFFFFF !important;
font-family: 'Bangers', cursive !important;
box-shadow: 4px 4px 0px #1F2937 !important;
}
.generate-btn {
background: #10B981 !important;
border: 3px solid #1F2937 !important;
color: #FFFFFF !important;
font-family: 'Bangers', cursive !important;
font-size: 1.4rem !important;
box-shadow: 5px 5px 0px #1F2937 !important;
}
.generate-btn:hover {
background: #059669 !important;
}
.gr-accordion {
background: #FACC15 !important;
border: 3px solid #1F2937 !important;
box-shadow: 4px 4px 0px #1F2937 !important;
}
.tag-input textarea {
background: #FEF3C7 !important;
border: 3px dashed #F59E0B !important;
font-family: 'Courier New', monospace !important;
}
.status-box textarea {
background: #1F2937 !important;
color: #10B981 !important;
font-family: 'Courier New', monospace !important;
border: 3px solid #10B981 !important;
}
label {
color: #1F2937 !important;
font-family: 'Comic Neue', cursive !important;
font-weight: 700 !important;
}
::-webkit-scrollbar { width: 12px; }
::-webkit-scrollbar-track { background: #FEF9C3; border: 2px solid #1F2937; }
::-webkit-scrollbar-thumb { background: #3B82F6; border: 2px solid #1F2937; }
::selection { background: #FACC15; color: #1F2937; }
"""
# ============================================================
# ๐ŸŽจ Gradio UI
# ============================================================
with gr.Blocks(css=css, title="๐ŸŽต SOMA Music Studio - HeartMuLa") as demo:
# Header - HOME Badge
gr.HTML("""
<div style="text-align: center; margin: 20px 0 10px 0;">
<a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;">
<img src="https://img.shields.io/static/v1?label=๐Ÿ  HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME">
</a>
</div>
""")
gr.Markdown("# ๐ŸŽต SOMA MUSIC STUDIO ๐ŸŽถ", elem_classes="header-title")
gr.Markdown("""
<p style="text-align: center; font-family: 'Comic Neue', cursive; font-size: 1.1rem; font-weight: 700; color: #1F2937;">
๐Ÿ’ซ HeartMuLa-oss-3B + SOMA Multi-Agent = ์ตœ๊ณ  ํ’ˆ์งˆ AI ์Œ์•… ์ƒ์„ฑ ๐Ÿ’ซ
</p>
""")
# API Key
with gr.Accordion("๐Ÿ”‘ Groq API Key (๊ฐ€์‚ฌ ์ƒ์„ฑ์šฉ)", open=False):
GROQ_KEY = os.environ.get("GROQ_API_KEY", "")
groq_key = gr.Textbox(
label="Groq API Key",
type="password",
value=GROQ_KEY,
placeholder="gsk_..." if not GROQ_KEY else "โœ… Secret ๋กœ๋“œ๋จ",
interactive=not bool(GROQ_KEY)
)
with gr.Row(equal_height=False):
# ========== ์ขŒ์ธก: ๊ฐ€์‚ฌ ์ƒ์„ฑ ==========
with gr.Column(scale=1, min_width=400):
gr.Markdown("## ๐Ÿ“ LYRICS GENERATOR", elem_classes="section-title")
theme_input = gr.Textbox(
label="๐ŸŽฏ ๋…ธ๋ž˜ ์ฃผ์ œ",
placeholder="์˜ˆ: ์ด๋ณ„ ํ›„ ์„ฑ์žฅ, ๊ฟˆ์„ ํ–ฅํ•œ ๋„์ „, ์‚ฌ๋ž‘์˜ ๊ณ ๋ฐฑ...",
lines=2
)
with gr.Row():
lyrics_genre = gr.Dropdown(
label="๐ŸŽธ ์žฅ๋ฅด",
choices=["K-Pop", "Pop", "R&B", "Hip-Hop", "Ballad", "Rock",
"EDM", "Jazz", "Folk", "Disco", "Cinematic"],
value="K-Pop"
)
lyrics_mood = gr.Dropdown(
label="๐Ÿ’ซ ๋ถ„์œ„๊ธฐ",
choices=["Empowering", "Melancholic", "Joyful", "Romantic",
"Energetic", "Dreamy", "Nostalgic", "Peaceful", "Confident"],
value="Empowering"
)
with gr.Row():
lyrics_language = gr.Dropdown(
label="๐ŸŒ ์–ธ์–ด (Language)",
choices=["English", "Korean", "Chinese", "Japanese", "Spanish"],
value="English",
info="HeartMuLa ๊ณต์‹ ์ง€์› 5๊ฐœ ์–ธ์–ด"
)
lyrics_additional = gr.Textbox(
label="โœจ ์ถ”๊ฐ€ ์ง€์‹œ (์„ ํƒ)",
placeholder="ํŠน๋ณ„ ์š”์ฒญ: ํŠน์ • ๊ฐ์ •, ์Šคํƒ€์ผ ๋“ฑ...",
lines=1
)
with gr.Row():
quick_btn = gr.Button("โšก QUICK", variant="secondary")
soma_lyrics_btn = gr.Button("๐Ÿง  SOMA GENERATE", variant="primary")
lyrics_status = gr.Textbox(label="๐Ÿ“Š ์ƒํƒœ", interactive=False, max_lines=1, elem_classes="status-box")
with gr.Accordion("๐Ÿ” SOMA ์ž‘์—… ๊ณผ์ •", open=False):
step1_out = gr.Textbox(label="1๏ธโƒฃ ์ž‘์‚ฌ๊ฐ€", lines=3, interactive=False)
step2_out = gr.Textbox(label="2๏ธโƒฃ ํ”„๋กœ๋“€์„œ", lines=3, interactive=False)
step3_out = gr.Textbox(label="3๏ธโƒฃ ๊ฐ์„ฑ", lines=3, interactive=False)
step4_out = gr.Textbox(label="4๏ธโƒฃ ์ตœ์ข…", lines=3, interactive=False)
final_lyrics = gr.Textbox(
label="โœ๏ธ ๊ฐ€์‚ฌ (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
lines=16,
value=EXAMPLE_LYRICS,
placeholder="๊ฐ€์‚ฌ๊ฐ€ ์—ฌ๊ธฐ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค..."
)
# ========== ์šฐ์ธก: ์Œ์•… ์ƒ์„ฑ ==========
with gr.Column(scale=1, min_width=400):
gr.Markdown("## ๐ŸŽต MUSIC GENERATOR", elem_classes="section-title")
# ํƒœ๊ทธ ํ”„๋ฆฌ์…‹
gr.Markdown("### ๐Ÿท๏ธ ํƒœ๊ทธ ํ”„๋ฆฌ์…‹ (ํด๋ฆญํ•˜๋ฉด ์ž๋™ ์ž…๋ ฅ)")
tag_preset = gr.Dropdown(
label="ํ”„๋ฆฌ์…‹ ์„ ํƒ",
choices=list(EXAMPLE_TAG_PRESETS.keys()),
value=None
)
tags_input = gr.Textbox(
label="๐ŸŽน ์Šคํƒ€์ผ ํƒœ๊ทธ (์ฝค๋งˆ ๊ตฌ๋ถ„, ๊ณต๋ฐฑ ์—†์Œ)",
value=EXAMPLE_TAGS,
placeholder="piano,happy,uplifting,pop",
lines=1,
elem_classes="tag-input"
)
with gr.Row():
ai_tag_btn = gr.Button("๐Ÿค– AI ํƒœ๊ทธ ์ƒ์„ฑ", variant="secondary", size="sm")
with gr.Accordion("โš™๏ธ ์ƒ์„ฑ ์„ค์ •", open=True):
max_duration = gr.Slider(
minimum=30, maximum=240, value=120, step=10,
label="โฑ๏ธ ์ตœ๋Œ€ ๊ธธ์ด (์ดˆ)",
info="30~240์ดˆ"
)
with gr.Row():
temperature = gr.Slider(
minimum=0.1, maximum=2.0, value=1.0, step=0.1,
label="๐ŸŒก๏ธ Temperature",
info="๋†’์„์ˆ˜๋ก ์ฐฝ์˜์ "
)
topk = gr.Slider(
minimum=1, maximum=100, value=50, step=1,
label="๐ŸŽฏ Top-K"
)
cfg_scale = gr.Slider(
minimum=1.0, maximum=3.0, value=1.5, step=0.1,
label="๐Ÿ“ CFG Scale",
info="Classifier-free guidance"
)
generate_btn = gr.Button(
"๐ŸŽถ GENERATE MUSIC!",
variant="primary",
size="lg",
elem_classes="generate-btn"
)
music_status = gr.Textbox(label="๐Ÿ“Š ์ƒํƒœ", interactive=False, max_lines=1, elem_classes="status-box")
audio_output = gr.Audio(label="๐ŸŽง ์ƒ์„ฑ๋œ ์Œ์•…", type="filepath")
gr.Markdown("""
### ๐Ÿ’ก Tips
- **๊ตฌ์กฐ ํƒœ๊ทธ:** [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Outro]
- **์Šคํƒ€์ผ ํƒœ๊ทธ:** piano,happy,uplifting,pop (์ฝค๋งˆ ๊ตฌ๋ถ„, ๊ณต๋ฐฑ ์—†์Œ!)
- **์ง€์› ์–ธ์–ด:** ๐Ÿ‡บ๐Ÿ‡ธ EN | ๐Ÿ‡ฐ๐Ÿ‡ท KO | ๐Ÿ‡จ๐Ÿ‡ณ ZH | ๐Ÿ‡ฏ๐Ÿ‡ต JA | ๐Ÿ‡ช๐Ÿ‡ธ ES
- โš ๏ธ **๋ณด์ปฌ ํƒ€์ž…(๋“€์—ฃ, ๊ทธ๋ฃน ๋“ฑ)์€ ์ง€์›ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค!**
- **Temperature:** ๋†’์œผ๋ฉด ์ฐฝ์˜์ , ๋‚ฎ์œผ๋ฉด ์ผ๊ด€์„ฑ
""")
# ๊ฐ€์ด๋“œ
with gr.Accordion("๐Ÿ“– HeartMuLa ๊ฐ€์ด๋“œ", open=False):
gr.Markdown(f"""
## โš ๏ธ HeartMuLa ์ž…๋ ฅ ํ˜•์‹ (์ค‘์š”!)
HeartMuLa๋Š” **2๊ฐ€์ง€ ์ž…๋ ฅ๋งŒ** ๋ฐ›์Šต๋‹ˆ๋‹ค:
1. **๊ฐ€์‚ฌ (Lyrics)** - ๊ตฌ์กฐ ํƒœ๊ทธ ํฌํ•จ
2. **์Šคํƒ€์ผ ํƒœ๊ทธ (Tags)** - ์ฝค๋งˆ ๊ตฌ๋ถ„
โŒ **์ง€์›ํ•˜์ง€ ์•Š๋Š” ๊ฒƒ:**
- ๋ณด์ปฌ ํƒ€์ž… ์„ ํƒ (๋“€์—ฃ, ๊ทธ๋ฃน, ์ฝ”๋Ÿฌ์Šค ๋“ฑ)
- ๋ ˆํผ๋Ÿฐ์Šค ์˜ค๋””์˜ค (์•„์ง ๋ฏธ์ง€์›)
- BPM/ํ…œํฌ ์ง์ ‘ ์ง€์ •
---
## ๐ŸŒ ์ง€์› ์–ธ์–ด (5๊ฐœ)
| ์–ธ์–ด | ์ฝ”๋“œ | ์ƒํƒœ |
|------|------|------|
| ๐Ÿ‡บ๐Ÿ‡ธ English | EN | โœ… |
| ๐Ÿ‡ฐ๐Ÿ‡ท Korean | KO | โœ… |
| ๐Ÿ‡จ๐Ÿ‡ณ Chinese | ZH | โœ… |
| ๐Ÿ‡ฏ๐Ÿ‡ต Japanese | JA | โœ… |
| ๐Ÿ‡ช๐Ÿ‡ธ Spanish | ES | โœ… |
---
{HEARTMULA_TAG_GUIDE}
---
{HEARTMULA_LYRICS_STRUCTURE}
---
**Model:** [HeartMuLa-oss-3B](https://huggingface.co/HeartMuLa/HeartMuLa-oss-3B) |
**Paper:** [arXiv](https://arxiv.org/abs/2601.10547) |
**Code:** [GitHub](https://github.com/HeartMuLa/heartlib)
*Licensed under CC BY-NC 4.0 (Non-Commercial)*
""")
# ========== Event Handlers ==========
# ํƒœ๊ทธ ํ”„๋ฆฌ์…‹ ๋กœ๋“œ
tag_preset.change(
fn=load_tag_preset,
inputs=[tag_preset],
outputs=[tags_input]
)
# AI ํƒœ๊ทธ ์ƒ์„ฑ
ai_tag_btn.click(
fn=generate_tags_ai,
inputs=[groq_key, lyrics_genre, lyrics_mood, tags_input],
outputs=[tags_input]
)
# ๋น ๋ฅธ ๊ฐ€์‚ฌ ์ƒ์„ฑ
quick_btn.click(
fn=quick_lyrics,
inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_additional],
outputs=[final_lyrics]
)
# SOMA ๊ฐ€์‚ฌ ์ƒ์„ฑ
soma_lyrics_btn.click(
fn=generate_lyrics_soma,
inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_additional],
outputs=[lyrics_status, step1_out, step2_out, step3_out, step4_out]
).then(
fn=lambda x: x,
inputs=[step4_out],
outputs=[final_lyrics]
)
# ์Œ์•… ์ƒ์„ฑ (HeartMuLa)
generate_btn.click(
fn=generate_music_heartmula,
inputs=[final_lyrics, tags_input, max_duration, temperature, topk, cfg_scale],
outputs=[audio_output, music_status]
)
if __name__ == "__main__":
demo.launch()