Update app.py
Browse files
app.py
CHANGED
|
@@ -1,272 +1,1508 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import tempfile
|
| 3 |
-
import torch
|
| 4 |
import gradio as gr
|
| 5 |
-
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def download_models():
|
| 10 |
-
"""Download all required model files from HuggingFace Hub."""
|
| 11 |
-
cache_dir = os.environ.get("HF_HOME", os.path.expanduser("/tmp"))
|
| 12 |
-
model_dir = os.path.join(cache_dir, "heartmula_models")
|
| 13 |
-
|
| 14 |
-
if not os.path.exists(model_dir):
|
| 15 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 16 |
-
|
| 17 |
-
# Download HeartMuLaGen (tokenizer and gen_config)
|
| 18 |
-
print("Downloading HeartMuLaGen files...")
|
| 19 |
-
for filename in ["tokenizer.json", "gen_config.json"]:
|
| 20 |
-
hf_hub_download(
|
| 21 |
-
repo_id="HeartMuLa/HeartMuLaGen",
|
| 22 |
-
filename=filename,
|
| 23 |
-
local_dir=model_dir,
|
| 24 |
-
)
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"),
|
| 31 |
-
)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
if not tags.strip():
|
| 80 |
-
raise gr.Error("Please enter at least one tag!")
|
| 81 |
-
|
| 82 |
-
# Create a temporary file for output
|
| 83 |
-
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
| 84 |
-
output_path = f.name
|
| 85 |
-
|
| 86 |
-
max_audio_length_ms = max_duration_seconds * 1000
|
| 87 |
-
|
| 88 |
-
with torch.no_grad():
|
| 89 |
-
pipe(
|
| 90 |
-
{
|
| 91 |
-
"lyrics": lyrics,
|
| 92 |
-
"tags": tags,
|
| 93 |
-
},
|
| 94 |
-
max_audio_length_ms=max_audio_length_ms,
|
| 95 |
-
save_path=output_path,
|
| 96 |
-
topk=topk,
|
| 97 |
-
temperature=temperature,
|
| 98 |
-
cfg_scale=cfg_scale,
|
| 99 |
-
)
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
[Verse]
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
|
| 113 |
[Prechorus]
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
I find my rhythm in the sound
|
| 117 |
|
| 118 |
[Chorus]
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
It is the ordinary magic that we meet
|
| 124 |
|
| 125 |
[Verse]
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
|
| 131 |
[Bridge]
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
|
| 136 |
[Chorus]
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
Moving to the same steady beat
|
| 141 |
|
| 142 |
[Outro]
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
EXAMPLE_TAGS = "piano,happy,uplifting,pop"
|
| 147 |
-
|
| 148 |
-
# Build the Gradio interface
|
| 149 |
-
with gr.Blocks(
|
| 150 |
-
title="HeartMuLa Music Generator",
|
| 151 |
-
) as demo:
|
| 152 |
-
gr.Markdown(
|
| 153 |
-
"""
|
| 154 |
-
# HeartMuLa Music Generator
|
| 155 |
-
|
| 156 |
-
Generate music from lyrics and tags using [HeartMuLa](https://github.com/HeartMuLa/heartlib),
|
| 157 |
-
an open-source music foundation model.
|
| 158 |
-
|
| 159 |
-
**Instructions:**
|
| 160 |
-
1. Enter your lyrics with structure tags like `[Verse]`, `[Chorus]`, `[Bridge]`, etc.
|
| 161 |
-
2. Add comma-separated tags describing the music style (e.g., `piano,happy,romantic`)
|
| 162 |
-
3. Adjust generation parameters as needed
|
| 163 |
-
4. Click "Generate Music" and wait for your song!
|
| 164 |
-
|
| 165 |
-
*Note: Generation can take several minutes depending on the duration.*
|
| 166 |
-
"""
|
| 167 |
-
)
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
)
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
maximum=240,
|
| 189 |
-
value=120,
|
| 190 |
-
step=10,
|
| 191 |
-
label="Max Duration (seconds)",
|
| 192 |
-
info="Maximum length of generated audio",
|
| 193 |
-
)
|
| 194 |
|
| 195 |
-
|
| 196 |
-
minimum=0.1,
|
| 197 |
-
maximum=2.0,
|
| 198 |
-
value=1.0,
|
| 199 |
-
step=0.1,
|
| 200 |
-
label="Temperature",
|
| 201 |
-
info="Higher = more creative, Lower = more consistent",
|
| 202 |
-
)
|
| 203 |
|
| 204 |
-
|
| 205 |
-
minimum=1,
|
| 206 |
-
maximum=100,
|
| 207 |
-
value=50,
|
| 208 |
-
step=1,
|
| 209 |
-
label="Top-K",
|
| 210 |
-
info="Number of top tokens to sample from",
|
| 211 |
-
)
|
| 212 |
|
| 213 |
-
|
| 214 |
-
minimum=1.0,
|
| 215 |
-
maximum=3.0,
|
| 216 |
-
value=1.5,
|
| 217 |
-
step=0.1,
|
| 218 |
-
label="CFG Scale",
|
| 219 |
-
info="Classifier-free guidance scale",
|
| 220 |
-
)
|
| 221 |
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
-
|
| 247 |
-
fn=generate_music,
|
| 248 |
-
inputs=[
|
| 249 |
-
lyrics_input,
|
| 250 |
-
tags_input,
|
| 251 |
-
max_duration,
|
| 252 |
-
temperature,
|
| 253 |
-
topk,
|
| 254 |
-
cfg_scale,
|
| 255 |
-
],
|
| 256 |
-
outputs=audio_output,
|
| 257 |
-
)
|
| 258 |
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from groq import Groq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# ============================================================
|
| 8 |
+
# π΅ SOMA Music Studio - HeartMuLa Optimized Edition
|
| 9 |
+
# MiniMax Music 2.5 + HeartMuLa Style Guide + Comic Classic Theme
|
| 10 |
+
# ============================================================
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# HeartMuLa κΆμ₯ ꡬ쑰 νκ·Έ (곡μ λ¬Έμ κΈ°λ°)
|
| 13 |
+
STRUCTURE_TAGS = [
|
| 14 |
+
"[Intro]", "[Verse]", "[Prechorus]", "[Chorus]", "[Bridge]",
|
| 15 |
+
"[Interlude]", "[Hook]", "[Outro]", "[Inst]", "[Solo]"
|
| 16 |
+
]
|
|
|
|
| 17 |
|
| 18 |
+
# HeartMuLa μ€νμΌ νκ·Έ κ°μ΄λ (μ½€λ§ κ΅¬λΆ, 곡백 μμ)
|
| 19 |
+
HEARTMULA_TAG_GUIDE = """
|
| 20 |
+
## πΌ HeartMuLa Tag Format Guide
|
| 21 |
+
|
| 22 |
+
### STRUCTURE TAGS (κ°μ¬ λ΄ μ¬μ©):
|
| 23 |
+
[Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
|
| 24 |
+
|
| 25 |
+
### STYLE TAGS FORMAT (μ½€λ§ κ΅¬λΆ, 곡백 μμ):
|
| 26 |
+
```
|
| 27 |
+
piano,happy,wedding,synthesizer,romantic
|
| 28 |
+
guitar,sad,ballad,strings,emotional
|
| 29 |
+
drums,energetic,rock,electric_guitar,powerful
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### RECOMMENDED TAG CATEGORIES:
|
| 33 |
+
**Instruments:** piano,guitar,drums,bass,synthesizer,strings,violin,cello,trumpet,saxophone,flute,harp,organ
|
| 34 |
+
**Mood:** happy,sad,romantic,energetic,calm,melancholic,uplifting,dark,dreamy,nostalgic,powerful,peaceful
|
| 35 |
+
**Genre:** pop,rock,jazz,classical,electronic,folk,blues,r&b,hip_hop,disco,ballad,cinematic
|
| 36 |
+
**Tempo:** fast,slow,moderate,upbeat,relaxed
|
| 37 |
+
**Occasion:** wedding,party,meditation,workout,study,sleep,travel
|
| 38 |
+
|
| 39 |
+
### EXAMPLE COMBINATIONS:
|
| 40 |
+
- K-Pop Dance: `synthesizer,drums,energetic,pop,upbeat,powerful`
|
| 41 |
+
- Jazz Ballad: `piano,saxophone,romantic,jazz,slow,dreamy`
|
| 42 |
+
- Epic Cinematic: `strings,orchestra,powerful,cinematic,dramatic,epic`
|
| 43 |
+
- Lo-Fi Chill: `piano,guitar,calm,lo_fi,relaxed,dreamy`
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
# MiniMax Music 2.0 ν΅μ¬ κΈ°λ₯ κ°μ΄λ (κ°ν)
|
| 47 |
+
MINIMAX_MUSIC_GUIDE = """
|
| 48 |
+
## MiniMax Music 2.5 Core Capabilities:
|
| 49 |
+
|
| 50 |
+
### 1. DYNAMIC VOCALS - Mastery Over Diverse Singing Styles
|
| 51 |
+
- Human-like vocal timbre with professional singing techniques
|
| 52 |
+
- Precise control over phrasing, rhythm, and breath
|
| 53 |
+
- One voice can switch between multiple styles
|
| 54 |
+
- Supports: Pop, Jazz, Blues, Rock, Folk, Electronic, Urban, Disco
|
| 55 |
+
- Special modes: Male-female duets, A Cappella (pure vocals)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
### 2. CATCHY MELODIES & INSTRUMENT CONTROL
|
| 58 |
+
- Structurally complete songs: Verse β Chorus β Bridge (up to 5 minutes)
|
| 59 |
+
- Memorable, instantly captivating melodies
|
| 60 |
+
- Independent control of individual instruments
|
| 61 |
+
- Instruments: Piano, Guitar, Bass, Drums, Saxophone, Trumpet, Synths, Strings
|
| 62 |
|
| 63 |
+
### 3. PROFESSIONAL-GRADE AUDIO
|
| 64 |
+
- Enhanced vocal track texture
|
| 65 |
+
- Spatial presence of instruments
|
| 66 |
+
- Immersive listening experience
|
| 67 |
+
- Film-grade monologue soundtracks
|
| 68 |
|
| 69 |
+
### 4. PROMPT WRITING BEST PRACTICES (HeartMuLa Optimized)
|
| 70 |
+
- Use specific instrument names
|
| 71 |
+
- Describe vocal emotions precisely
|
| 72 |
+
- Specify singing techniques (breathy, powerful, smooth, raspy)
|
| 73 |
+
- Use tempo (BPM), key when needed
|
| 74 |
+
- Tags should be comma-separated WITHOUT spaces
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
# HeartMuLa κΆμ₯ κ°μ¬ ꡬ쑰 μμ
|
| 78 |
+
HEARTMULA_LYRICS_STRUCTURE = """
|
| 79 |
+
## π HeartMuLa Recommended Lyrics Structure
|
| 80 |
+
|
| 81 |
+
### OPTIMAL FORMAT:
|
| 82 |
+
```
|
| 83 |
+
[Intro]
|
| 84 |
|
| 85 |
[Verse]
|
| 86 |
+
First verse lyrics here
|
| 87 |
+
Second line of first verse
|
| 88 |
+
Third line continues story
|
| 89 |
+
Fourth line builds emotion
|
| 90 |
|
| 91 |
[Prechorus]
|
| 92 |
+
Building tension here
|
| 93 |
+
Leading to the chorus
|
|
|
|
| 94 |
|
| 95 |
[Chorus]
|
| 96 |
+
Main hook and memorable melody
|
| 97 |
+
Most important part of song
|
| 98 |
+
Repeat this section 2-3 times
|
| 99 |
+
Make it singable and catchy
|
|
|
|
| 100 |
|
| 101 |
[Verse]
|
| 102 |
+
Second verse develops story
|
| 103 |
+
New information revealed
|
| 104 |
+
Emotional progression continues
|
| 105 |
+
Building toward bridge
|
| 106 |
|
| 107 |
[Bridge]
|
| 108 |
+
Contrast section here
|
| 109 |
+
Different melody or perspective
|
| 110 |
+
Emotional peak moment
|
| 111 |
|
| 112 |
[Chorus]
|
| 113 |
+
Main hook repeated
|
| 114 |
+
With possible variations
|
| 115 |
+
Final emotional release
|
|
|
|
| 116 |
|
| 117 |
[Outro]
|
| 118 |
+
Closing the song
|
| 119 |
+
Fading or resolving
|
| 120 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
### KEY RULES:
|
| 123 |
+
1. [Chorus] appears at least 2-3 times
|
| 124 |
+
2. [Prechorus] builds tension before [Chorus]
|
| 125 |
+
3. [Bridge] provides contrast before final [Chorus]
|
| 126 |
+
4. Each [Verse] should progress the story
|
| 127 |
+
5. [Intro] and [Outro] frame the song
|
| 128 |
+
"""
|
|
|
|
| 129 |
|
| 130 |
+
# μμ ν둬ννΈ (HeartMuLa + MiniMax μ΅μ ν)
|
| 131 |
+
EXAMPLE_PROMPTS = {
|
| 132 |
+
"π€ A Cappella (μμΉ΄ν λΌ)": {
|
| 133 |
+
"prompt": "A cappella arrangement with pure vocal harmonies, no instrumental accompaniment. Features a lead soprano voice with rich layered backing vocals creating lush harmonies. Gentle humming bass line, rhythmic vocal percussion, and ethereal 'ooh' and 'aah' pads. The vocals blend seamlessly with precise tuning and warm reverb, creating a refreshing, meditative atmosphere. 70 BPM, peaceful and soothing mood.",
|
| 134 |
+
"tags": "acappella,vocals,harmony,peaceful,soothing,choir",
|
| 135 |
+
"description": "μμ 보컬λ§μΌλ‘ νλΆν λ©λ‘λ"
|
| 136 |
+
},
|
| 137 |
+
"π₯ Group Harmony (κ·Έλ£Ή νλͺ¨λ)": {
|
| 138 |
+
"prompt": "Powerful group vocal anthem featuring a lead female voice with layered choir harmonies. Rich unison sections build to explosive harmonic splits in the chorus. Features call-and-response patterns, anthemic 'oh-oh-oh' chants, and soaring group harmonies. Modern pop production with punchy drums, driving bass, synth hooks, and brass stabs. 118 BPM, empowering and triumphant energy.",
|
| 139 |
+
"tags": "choir,harmony,pop,drums,bass,synthesizer,brass,powerful,anthem",
|
| 140 |
+
"description": "νμνν κ·Έλ£Ή 보컬 μ€μΈ"
|
| 141 |
+
},
|
| 142 |
+
"π· Jazz Duet (μ¬μ¦ λμ£)": {
|
| 143 |
+
"prompt": "Intimate jazz duet featuring conversational interplay between a warm male baritone and a silky female alto voice. Dynamic intensity variations with seamless transitions between lead vocals. Accompanied by brushed jazz drums, walking upright bass, and gentle piano comping. Saxophone solo in the bridge. 95 BPM, late-night jazz club atmosphere with warm analog sound.",
|
| 144 |
+
"tags": "jazz,piano,bass,drums,saxophone,romantic,intimate,duet",
|
| 145 |
+
"description": "λ¨λ
보컬μ μ¬μ¦ λμ£"
|
| 146 |
+
},
|
| 147 |
+
"πΈ Multi-Style (λ©ν°μ€νμΌ)": {
|
| 148 |
+
"prompt": "Showcase track demonstrating one female voice transitioning through three distinct styles: Starting with energetic Jump Blues featuring powerful belting and brass stabs, transitioning to aggressive Rock with distorted guitars and raspy vocals, finally morphing into sleek Electronic with auto-tuned vocals and pulsing synths. 120 BPM with tempo shifts between sections.",
|
| 149 |
+
"tags": "blues,rock,electronic,guitar,synthesizer,brass,dynamic,powerful",
|
| 150 |
+
"description": "μ€νμΌ μ ν μΌμΌμ΄μ€"
|
| 151 |
+
},
|
| 152 |
+
"π Urban Chill (μ΄λ° μΉ )": {
|
| 153 |
+
"prompt": "Contemporary urban R&B track with a cool, laid-back vibe. Features a smooth male vocal with subtle Auto-Tune enhancement and breathy delivery. Trap-influenced 808 bass, crisp hi-hats with intricate patterns, ambient synth pads, and soft piano chords. Spacious production with heavy reverb and delay. 85 BPM, modern and sophisticated sound.",
|
| 154 |
+
"tags": "r&b,808,synthesizer,piano,chill,urban,modern,smooth",
|
| 155 |
+
"description": "μ΄λ° R&B 무λ"
|
| 156 |
+
},
|
| 157 |
+
"πΉ Jazz Club (μ¬μ¦ ν΄λ½)": {
|
| 158 |
+
"prompt": "Live jazz ensemble performance capturing the essence of Blue Note club. Instruments enter in perfect sequence: brushed drums set the groove, walking bass joins in, piano adds sophisticated chord voicings, then saxophone takes the melody. Trumpet and trombone provide punchy brass accents. Extended saxophone solo with bebop-style improvisation. 140 BPM swing feel.",
|
| 159 |
+
"tags": "jazz,piano,bass,drums,saxophone,trumpet,trombone,live,swing",
|
| 160 |
+
"description": "λΌμ΄λΈ μ¬μ¦ ν΄λ½"
|
| 161 |
+
},
|
| 162 |
+
"πͺ© Retro Disco (λ νΈλ‘ λμ€μ½)": {
|
| 163 |
+
"prompt": "Vibrant disco track channeling the golden age of 80s dance music. Features a powerful female diva vocal with dynamic range and soulful ad-libs. Classic instrumentation: four-on-the-floor kick drum, funky slap bass, rhythmic guitar scratches, lush string arrangements, and bright brass stabs. Warm analog tape saturation. 120 BPM, euphoric and nostalgic energy.",
|
| 164 |
+
"tags": "disco,bass,guitar,strings,brass,drums,retro,funky,dance",
|
| 165 |
+
"description": "80λ
λ λμ€μ½"
|
| 166 |
+
},
|
| 167 |
+
"π¬ Film Score (μν μ€μ½μ΄)": {
|
| 168 |
+
"prompt": "Cinematic monologue soundtrack with layered emotional progression. A contemplative male voice delivers poetic narration over evolving orchestral arrangement. Begins with solo piano and soft strings, gradually building with French horn and cello. Atmospheric sound design with ocean waves and distant thunder. 60 BPM, deeply moving and introspective.",
|
| 169 |
+
"tags": "orchestral,piano,strings,cello,horn,cinematic,emotional,dramatic",
|
| 170 |
+
"description": "μλ€λ§ν± μ€μ½μ΄"
|
| 171 |
+
},
|
| 172 |
+
"π΅ K-Pop Dance (K-Pop λμ€)": {
|
| 173 |
+
"prompt": "High-energy K-Pop dance track with a bright, clear female vocal and polished production. Catchy hook melody that's instantly memorable. Driving beat with punchy kicks, snappy snares, and intricate hi-hat programming. Layered synth hooks, powerful brass hits, and EDM-style buildups to explosive drops. 128 BPM, confident and empowering energy.",
|
| 174 |
+
"tags": "kpop,synthesizer,drums,bass,brass,electronic,energetic,dance,pop",
|
| 175 |
+
"description": "κ³ μλμ§ K-Pop"
|
| 176 |
+
},
|
| 177 |
+
"π» Orchestral Ballad (μ€μΌμ€νΈλΌ λ°λΌλ)": {
|
| 178 |
+
"prompt": "Sweeping orchestral ballad with an emotional female soprano voice. Begins intimately with solo piano, gradually introducing strings section - first violins, then violas and cellos. French horn provides warm counter-melodies. Builds to a full orchestral climax with timpani rolls and brass fanfares. 65 BPM, epic yet intimate.",
|
| 179 |
+
"tags": "orchestral,piano,violin,cello,horn,strings,ballad,emotional,epic",
|
| 180 |
+
"description": "μ€μΌμ€νΈλΌ λ°λΌλ"
|
| 181 |
+
},
|
| 182 |
+
"π₯ HeartMuLa Default (κΈ°λ³Έ μμ)": {
|
| 183 |
+
"prompt": "Uplifting pop song with piano and synthesizer leads, happy and romantic mood. Features a clear female vocal with emotional delivery, supported by gentle drums and warm bass. Catchy melody in the chorus with memorable hooks. Clean production with balanced mix. 110 BPM, wedding-appropriate joyful atmosphere.",
|
| 184 |
+
"tags": "piano,happy,wedding,synthesizer,romantic",
|
| 185 |
+
"description": "HeartMuLa 곡μ μμ"
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
|
| 189 |
+
# SOMA μμ΄μ νΈ - κ°μ¬ μμ± (HeartMuLa μ΅μ ν)
|
| 190 |
+
LYRICS_AGENTS = {
|
| 191 |
+
"lyricist": f"""You are a master lyricist optimized for HeartMuLa and MiniMax Music generation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
+
{MINIMAX_MUSIC_GUIDE}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
Your task: Create powerful, memorable lyrics with OPTIMAL HeartMuLa tag placement.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
+
CRITICAL RULES:
|
| 200 |
+
1. Use HeartMuLa structure tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
|
| 201 |
+
2. [Prechorus] (not [Pre Chorus]) - HeartMuLa format
|
| 202 |
+
3. Ensure [Chorus] is the most memorable, singable part
|
| 203 |
+
4. Include [Prechorus] to build tension before [Chorus]
|
| 204 |
+
5. Add [Bridge] for emotional contrast
|
| 205 |
+
6. Minimum 6-8 sections for quality generation
|
| 206 |
|
| 207 |
+
Write lyrics that create the BEST POSSIBLE foundation for high-quality music generation.""",
|
| 208 |
+
|
| 209 |
+
"producer": f"""You are a music producer specializing in song structure optimization for AI music generation.
|
| 210 |
+
|
| 211 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
| 212 |
+
|
| 213 |
+
Your task: Analyze and OPTIMIZE the tag structure for MAXIMUM musical impact.
|
| 214 |
+
|
| 215 |
+
CRITICAL OPTIMIZATION RULES:
|
| 216 |
+
1. Use HeartMuLa tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro]
|
| 217 |
+
2. Verify the structure follows genre conventions
|
| 218 |
+
3. Ensure proper tag sequence (VerseβPrechorusβChorus)
|
| 219 |
+
4. Check that [Chorus] appears at least 2-3 times
|
| 220 |
+
5. Verify [Bridge] provides contrast before final chorus
|
| 221 |
+
6. Balance repetition and variation
|
| 222 |
+
7. Ensure song length is appropriate (8-12 sections)
|
| 223 |
+
|
| 224 |
+
Output the restructured lyrics with OPTIMAL HeartMuLa tag placement.""",
|
| 225 |
+
|
| 226 |
+
"emotion_director": f"""You are an emotion director for music production.
|
| 227 |
+
|
| 228 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
| 229 |
+
|
| 230 |
+
Your task: Enhance emotional impact through STRATEGIC tag content.
|
| 231 |
+
|
| 232 |
+
EMOTIONAL MAPPING BY TAG:
|
| 233 |
+
- [Intro]: Intrigue, anticipation
|
| 234 |
+
- [Verse]: Storytelling, building connection
|
| 235 |
+
- [Prechorus]: Rising tension, excitement
|
| 236 |
+
- [Chorus]: Peak emotion, catharsis
|
| 237 |
+
- [Bridge]: Vulnerability, reflection, contrast
|
| 238 |
+
- [Interlude]: Breathing space, transition
|
| 239 |
+
- [Outro]: Resolution, lingering feeling
|
| 240 |
+
|
| 241 |
+
OPTIMIZATION RULES:
|
| 242 |
+
1. Each [Verse] should progress emotionally
|
| 243 |
+
2. [Chorus] must deliver the strongest emotional punch
|
| 244 |
+
3. [Bridge] should offer new emotional perspective
|
| 245 |
+
4. [Prechorus] should create anticipation for [Chorus]
|
| 246 |
+
5. Ensure dynamic contrast between sections
|
| 247 |
+
|
| 248 |
+
Enhance the lyrics for MAXIMUM emotional resonance.""",
|
| 249 |
+
|
| 250 |
+
"final_editor": f"""You are the final editor for HeartMuLa/MiniMax Music production.
|
| 251 |
+
|
| 252 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
| 253 |
+
|
| 254 |
+
Your task: Output PERFECTLY FORMATTED, production-ready lyrics.
|
| 255 |
+
|
| 256 |
+
CRITICAL OUTPUT RULES:
|
| 257 |
+
1. Output ONLY the actual lyrics with structure tags
|
| 258 |
+
2. Use HeartMuLa tags EXACTLY:
|
| 259 |
+
[Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
|
| 260 |
+
3. DO NOT include English translations in parentheses
|
| 261 |
+
4. DO NOT include explanations, descriptions, or markdown
|
| 262 |
+
5. DO NOT include lines starting with * or >
|
| 263 |
+
6. For [Inst] or [Solo] sections, write ONLY the tag
|
| 264 |
+
7. Ensure MINIMUM 6-8 different sections
|
| 265 |
+
8. Verify [Chorus] appears at least 2 times
|
| 266 |
+
|
| 267 |
+
CORRECT FORMAT EXAMPLE:
|
| 268 |
+
[Intro]
|
| 269 |
+
|
| 270 |
+
[Verse]
|
| 271 |
+
μλ²½ μκ° μμ μ¨μ μ°λ¦¬
|
| 272 |
+
μ°¨κ°μ΄ λ°λλ μ°λ¦¬ λ°κ±Έμμ λ¬΄λ¦ κΏμ΄
|
| 273 |
+
|
| 274 |
+
[Prechorus]
|
| 275 |
+
μμ μ‘κ³ λΉμ κΊΌλ΄
|
| 276 |
+
|
| 277 |
+
[Chorus]
|
| 278 |
+
μ°λ¦° λΉλλ λ³μ΄ λμ΄
|
| 279 |
+
μ°λ¦¬ λͺ©μλ¦¬λ‘ μΈμμ λ€μ§μ΄
|
| 280 |
+
Yeah yeah yeah
|
| 281 |
+
|
| 282 |
+
[Verse]
|
| 283 |
+
DNAμ μκ²¨μ§ μ°½μ‘°λ λ©μΆμ§ μμ
|
| 284 |
+
|
| 285 |
+
[Prechorus]
|
| 286 |
+
λλΆμ νλλ₯Ό νκ³
|
| 287 |
+
|
| 288 |
+
[Chorus]
|
| 289 |
+
μ°λ¦° λΉλλ λ³μ΄ λμ΄
|
| 290 |
+
μ°λ¦¬ λͺ©μλ¦¬λ‘ οΏ½οΏ½οΏ½μμ λ€μ§μ΄
|
| 291 |
+
|
| 292 |
+
[Bridge]
|
| 293 |
+
무μ§κ°κ° νλ₯΄λ λ°€
|
| 294 |
+
μ°λ¦¬μ κΏμ λΆλ©Έ
|
| 295 |
+
|
| 296 |
+
[Chorus]
|
| 297 |
+
μ°λ¦° λΉλλ λ³μ΄ λμ΄
|
| 298 |
+
μλ²½μ κΉ¨μ
|
| 299 |
+
|
| 300 |
+
[Outro]
|
| 301 |
+
λ΄κ° λ§λ λ
Έλ μμν νμ€λ₯Έλ€
|
| 302 |
+
|
| 303 |
+
OUTPUT ONLY CLEAN LYRICS WITH OPTIMAL TAG STRUCTURE."""
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
# SOMA μμ΄μ νΈ - ν둬ννΈ μ¦κ° (HeartMuLa μ΅μ ν)
|
| 307 |
+
PROMPT_AGENTS = {
|
| 308 |
+
"genre_specialist": f"""You are a music genre specialist for HeartMuLa and MiniMax Music.
|
| 309 |
+
|
| 310 |
+
{MINIMAX_MUSIC_GUIDE}
|
| 311 |
+
|
| 312 |
+
Analyze the input and identify:
|
| 313 |
+
- Main genre and sub-genres
|
| 314 |
+
- Era influences and regional styles
|
| 315 |
+
- Specific production techniques for the genre
|
| 316 |
+
- Instrument combinations that work best
|
| 317 |
+
|
| 318 |
+
Output detailed genre characteristics optimized for music generation.""",
|
| 319 |
+
|
| 320 |
+
"sound_designer": """You are a sound designer for HeartMuLa and MiniMax Music.
|
| 321 |
+
|
| 322 |
+
Define the complete sonic palette:
|
| 323 |
+
- Specific instruments: Piano, Guitar (acoustic/electric), Bass (upright/electric/808), Drums (acoustic/electronic/brushed), Synths (pad/lead/bass), Brass (saxophone/trumpet/trombone), Strings (violin/viola/cello)
|
| 324 |
+
- Drum patterns: four-on-the-floor, trap hi-hats, brushed jazz, etc.
|
| 325 |
+
- Bass characteristics: walking bass, 808 sub-bass, slap bass, etc.
|
| 326 |
+
- Atmospheric elements: reverb type, delay, spatial width
|
| 327 |
+
- Production style: analog warmth, modern crisp, lo-fi, etc.
|
| 328 |
+
|
| 329 |
+
Be extremely specific - both models can control individual instruments.""",
|
| 330 |
+
|
| 331 |
+
"vocal_director": """You are a vocal director for HeartMuLa and MiniMax Music.
|
| 332 |
+
|
| 333 |
+
Both models excel at:
|
| 334 |
+
- Human-like vocal timbre
|
| 335 |
+
- Multiple singing styles from one voice
|
| 336 |
+
- Male-female duets with conversational interplay
|
| 337 |
+
- A Cappella with layered harmonies
|
| 338 |
+
- Group/Choir with rich harmonic layers
|
| 339 |
+
|
| 340 |
+
Define:
|
| 341 |
+
- Voice type: male/female, age range, tone (warm/bright/husky/clear)
|
| 342 |
+
- Singing technique: belting, falsetto, breathy, raspy, smooth
|
| 343 |
+
- Vocal processing: reverb, delay, layers
|
| 344 |
+
- Delivery style: confident, vulnerable, aggressive, intimate
|
| 345 |
+
- For duets: describe each voice and their interaction
|
| 346 |
+
- For A Cappella: describe harmony parts
|
| 347 |
+
- For Group/Choir: describe layered vocals, unison sections""",
|
| 348 |
+
|
| 349 |
+
"tag_generator": f"""You are a style tag generator for HeartMuLa.
|
| 350 |
+
|
| 351 |
+
{HEARTMULA_TAG_GUIDE}
|
| 352 |
+
|
| 353 |
+
Your task: Generate OPTIMIZED comma-separated tags (NO SPACES between tags).
|
| 354 |
+
|
| 355 |
+
RULES:
|
| 356 |
+
1. Tags must be comma-separated WITHOUT spaces: tag1,tag2,tag3
|
| 357 |
+
2. Use lowercase with underscores for multi-word: electric_guitar, hip_hop
|
| 358 |
+
3. Include 5-8 tags covering: instrument, mood, genre, tempo
|
| 359 |
+
4. Be specific: "piano" not "keyboard", "808" not "bass"
|
| 360 |
+
5. Match the musical style described
|
| 361 |
+
|
| 362 |
+
OUTPUT FORMAT (example):
|
| 363 |
+
piano,synthesizer,happy,pop,upbeat,romantic,energetic
|
| 364 |
+
|
| 365 |
+
Output ONLY the comma-separated tags, nothing else.""",
|
| 366 |
+
|
| 367 |
+
"prompt_synthesizer": f"""You are the final prompt synthesizer for HeartMuLa/MiniMax Music.
|
| 368 |
+
|
| 369 |
+
{MINIMAX_MUSIC_GUIDE}
|
| 370 |
+
|
| 371 |
+
Combine all inputs into ONE cohesive production prompt:
|
| 372 |
+
- 150-200 words in English
|
| 373 |
+
- Include: genre, specific BPM, instruments with details, vocal characteristics, mood, production techniques
|
| 374 |
+
- Be extremely specific and detailed
|
| 375 |
+
|
| 376 |
+
EXAMPLE OUTPUTS:
|
| 377 |
+
|
| 378 |
+
"A cappella arrangement with pure vocal harmonies, no instrumental accompaniment. Features a lead soprano voice with rich layered backing vocals creating lush harmonies. 70 BPM, peaceful and soothing mood."
|
| 379 |
+
|
| 380 |
+
"Intimate jazz duet featuring conversational interplay between a warm male baritone and a silky female alto voice. Accompanied by brushed jazz drums, walking upright bass, and gentle piano comping. 95 BPM, late-night jazz club atmosphere."
|
| 381 |
+
|
| 382 |
+
"High-energy K-Pop dance track with a bright, clear female vocal. Catchy hook melody, driving beat with punchy kicks and intricate hi-hat programming. Layered synth hooks and EDM-style buildups. 128 BPM, confident energy."
|
| 383 |
+
|
| 384 |
+
Output ONLY the final prompt paragraph, nothing else."""
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def call_groq(api_key: str, system: str, user_prompt: str, context: str = "") -> str:
|
| 389 |
+
"""Groq API νΈμΆ - κ°νλ μλ¬ νΈλ€λ§"""
|
| 390 |
+
try:
|
| 391 |
+
client = Groq(api_key=api_key)
|
| 392 |
+
|
| 393 |
+
messages = [{"role": "system", "content": system}]
|
| 394 |
+
if context:
|
| 395 |
+
messages.append({"role": "user", "content": f"Previous work:\n{context}\n\nTask: {user_prompt}"})
|
| 396 |
+
else:
|
| 397 |
+
messages.append({"role": "user", "content": user_prompt})
|
| 398 |
+
|
| 399 |
+
completion = client.chat.completions.create(
|
| 400 |
+
model="llama-3.3-70b-versatile",
|
| 401 |
+
messages=messages,
|
| 402 |
+
temperature=0.9,
|
| 403 |
+
max_completion_tokens=4096,
|
| 404 |
+
top_p=1,
|
| 405 |
+
stream=False
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
if completion is None:
|
| 409 |
+
return "Error: API μλ΅μ΄ μμ΅λλ€."
|
| 410 |
+
if not hasattr(completion, 'choices') or not completion.choices:
|
| 411 |
+
return "Error: API μλ΅μ choicesκ° μμ΅λλ€."
|
| 412 |
+
if completion.choices[0].message is None:
|
| 413 |
+
return "Error: API μλ΅μ messageκ° μμ΅λλ€."
|
| 414 |
+
if completion.choices[0].message.content is None:
|
| 415 |
+
return "Error: API μλ΅ contentκ° λΉοΏ½οΏ½μμ΅λλ€."
|
| 416 |
+
|
| 417 |
+
return completion.choices[0].message.content
|
| 418 |
+
|
| 419 |
+
except Exception as e:
|
| 420 |
+
return f"Error: {str(e)}"
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def clean_lyrics(text: str) -> str:
|
| 424 |
+
"""κ°μ¬ νμ²λ¦¬ - HeartMuLa ν¬λ§· μ΅μ ν"""
|
| 425 |
+
import re
|
| 426 |
+
|
| 427 |
+
if text is None:
|
| 428 |
+
return ""
|
| 429 |
+
if not isinstance(text, str):
|
| 430 |
+
return str(text)
|
| 431 |
+
if not text.strip():
|
| 432 |
+
return ""
|
| 433 |
+
|
| 434 |
+
lines = text.split('\n')
|
| 435 |
+
cleaned_lines = []
|
| 436 |
+
|
| 437 |
+
for line in lines:
|
| 438 |
+
if not line.strip():
|
| 439 |
+
cleaned_lines.append('')
|
| 440 |
+
continue
|
| 441 |
+
|
| 442 |
+
skip_patterns = [
|
| 443 |
+
r'^\s*\*',
|
| 444 |
+
r'^\s*>',
|
| 445 |
+
r'^\s*---',
|
| 446 |
+
r'^\s*###',
|
| 447 |
+
r'^\s*\*\*.*\*\*\s*$',
|
| 448 |
+
r'^\(\s*.*\s*\)$',
|
| 449 |
+
r'^\s*β\s*\*',
|
| 450 |
+
]
|
| 451 |
+
|
| 452 |
+
skip = False
|
| 453 |
+
for pattern in skip_patterns:
|
| 454 |
+
if re.match(pattern, line):
|
| 455 |
+
skip = True
|
| 456 |
+
break
|
| 457 |
+
|
| 458 |
+
if skip:
|
| 459 |
+
continue
|
| 460 |
+
|
| 461 |
+
line = re.sub(r'\s*\([A-Za-z].*?\)\s*$', '', line)
|
| 462 |
+
line = re.sub(r'\*\*(.*?)\*\*', r'\1', line)
|
| 463 |
+
|
| 464 |
+
# HeartMuLa νκ·Έ μ κ·ν
|
| 465 |
+
line = re.sub(r'\[Pre.?Chorus\]', '[Prechorus]', line, flags=re.IGNORECASE)
|
| 466 |
+
line = re.sub(r'\[Post.?Chorus\]', '[Chorus]', line, flags=re.IGNORECASE) # HeartMuLaλ PostChorus μμ
|
| 467 |
+
line = re.sub(r'\[Build.?Up\]', '[Prechorus]', line, flags=re.IGNORECASE)
|
| 468 |
+
line = re.sub(r'\[Break\]', '[Interlude]', line, flags=re.IGNORECASE)
|
| 469 |
+
line = re.sub(r'\[Transition\]', '[Interlude]', line, flags=re.IGNORECASE)
|
| 470 |
+
|
| 471 |
+
if line.strip():
|
| 472 |
+
cleaned_lines.append(line.strip())
|
| 473 |
+
|
| 474 |
+
result = '\n'.join(cleaned_lines)
|
| 475 |
+
result = re.sub(r'\n{3,}', '\n\n', result)
|
| 476 |
+
|
| 477 |
+
return result.strip()
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
def clean_tags(tags: str) -> str:
|
| 481 |
+
"""νκ·Έ μ 리 - HeartMuLa ν¬λ§· (μ½€λ§ κ΅¬λΆ, 곡백 μμ)"""
|
| 482 |
+
if not tags:
|
| 483 |
+
return ""
|
| 484 |
+
|
| 485 |
+
# 곡백 μ κ±°, μλ¬Έμ λ³ν
|
| 486 |
+
tags = tags.lower().strip()
|
| 487 |
+
|
| 488 |
+
# λ€μν ꡬλΆμλ₯Ό μ½€λ§λ‘ ν΅μΌ
|
| 489 |
+
tags = tags.replace(', ', ',').replace(' ,', ',').replace(' ', ' ')
|
| 490 |
+
tags = tags.replace(' ', ',').replace(',,', ',')
|
| 491 |
+
|
| 492 |
+
# μλ€ μ½€λ§ μ κ±°
|
| 493 |
+
tags = tags.strip(',')
|
| 494 |
+
|
| 495 |
+
# μ€λ³΅ μ κ±°
|
| 496 |
+
tag_list = [t.strip() for t in tags.split(',') if t.strip()]
|
| 497 |
+
unique_tags = list(dict.fromkeys(tag_list))
|
| 498 |
+
|
| 499 |
+
return ','.join(unique_tags)
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def generate_lyrics_soma(
|
| 503 |
+
api_key: str, theme: str, genre: str, mood: str,
|
| 504 |
+
language: str, vocal_type: str, additional: str, progress=gr.Progress()
|
| 505 |
+
):
|
| 506 |
+
"""SOMA κ°μ¬ μμ± - HeartMuLa μ΅μ ν"""
|
| 507 |
+
if not api_key or not api_key.strip():
|
| 508 |
+
return "β Groq API Key νμ", "", "", "", ""
|
| 509 |
+
if not theme or not theme.strip():
|
| 510 |
+
return "β μ£Όμ λ₯Ό μ
λ ₯νμΈμ", "", "", "", ""
|
| 511 |
+
|
| 512 |
+
base_prompt = f"""Create PROFESSIONAL lyrics optimized for HeartMuLa/MiniMax Music:
|
| 513 |
+
- Theme: {theme}
|
| 514 |
+
- Genre: {genre}
|
| 515 |
+
- Mood: {mood}
|
| 516 |
+
- Language: {language}
|
| 517 |
+
- Vocal Type: {vocal_type}
|
| 518 |
+
{f'- Additional: {additional}' if additional else ''}
|
| 519 |
+
|
| 520 |
+
CRITICAL - USE HeartMuLa STRUCTURE TAGS:
|
| 521 |
+
Available: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
|
| 522 |
+
|
| 523 |
+
REQUIRED STRUCTURE (minimum):
|
| 524 |
+
1. [Intro] - Set the mood
|
| 525 |
+
2. [Verse] x2-3 - Tell the story
|
| 526 |
+
3. [Prechorus] - Build tension (note: Prechorus, not Pre Chorus)
|
| 527 |
+
4. [Chorus] x2-3 - Main hook (MOST important!)
|
| 528 |
+
5. [Bridge] - Emotional contrast
|
| 529 |
+
6. [Outro] - Conclusion
|
| 530 |
+
|
| 531 |
+
{"For A Cappella: Include harmony sections with 'ooh', 'aah', humming." if "cappella" in vocal_type.lower() else ""}
|
| 532 |
+
{"For Duet: Mark vocal exchanges clearly." if "duet" in vocal_type.lower() else ""}
|
| 533 |
+
{"For Group/Choir: Include anthemic chants, call-and-response patterns." if "group" in vocal_type.lower() or "choir" in vocal_type.lower() else ""}
|
| 534 |
+
|
| 535 |
+
Create lyrics with PERFECT HeartMuLa structure!"""
|
| 536 |
+
|
| 537 |
+
try:
|
| 538 |
+
progress(0.2, desc="π€ μμ¬κ° - μ΄μ μμ±...")
|
| 539 |
+
draft = call_groq(api_key, LYRICS_AGENTS["lyricist"], base_prompt)
|
| 540 |
+
if draft.startswith("Error:"):
|
| 541 |
+
return f"β μμ¬κ° μ€λ₯: {draft}", "", "", "", ""
|
| 542 |
+
|
| 543 |
+
progress(0.4, desc="πΉ νλ‘λμ - ꡬ쑰 μ΅μ ν...")
|
| 544 |
+
structured = call_groq(api_key, LYRICS_AGENTS["producer"],
|
| 545 |
+
f"Optimize structure for {genre} {vocal_type}. Use HeartMuLa tags.", draft)
|
| 546 |
+
if structured.startswith("Error:"):
|
| 547 |
+
return f"β νλ‘λμ μ€λ₯: {structured}", draft, "", "", ""
|
| 548 |
+
|
| 549 |
+
progress(0.6, desc="π« κ°μ± λλ ν° - κ°μ κ°ν...")
|
| 550 |
+
emotional = call_groq(api_key, LYRICS_AGENTS["emotion_director"],
|
| 551 |
+
f"Enhance emotional impact for {mood}.", structured)
|
| 552 |
+
if emotional.startswith("Error:"):
|
| 553 |
+
return f"β κ°μ± λλ ν° μ€λ₯: {emotional}", draft, structured, "", ""
|
| 554 |
+
|
| 555 |
+
progress(0.8, desc="β¨ μ΅μ’
οΏ½οΏ½οΏ½μ§ - νμ§ κ²μ¦...")
|
| 556 |
+
final = call_groq(api_key, LYRICS_AGENTS["final_editor"],
|
| 557 |
+
"Output ONLY clean lyrics with HeartMuLa tags. No translations, no markdown.", emotional)
|
| 558 |
+
if final.startswith("Error:"):
|
| 559 |
+
return f"β μ΅μ’
νΈμ§ μ€λ₯: {final}", draft, structured, emotional, ""
|
| 560 |
+
|
| 561 |
+
final_cleaned = clean_lyrics(final)
|
| 562 |
+
|
| 563 |
+
progress(1.0, desc="β
μλ£!")
|
| 564 |
+
return "β
κ°μ¬ μμ± μλ£!", draft, structured, emotional, final_cleaned
|
| 565 |
+
|
| 566 |
+
except Exception as e:
|
| 567 |
+
return f"β μμΈ λ°μ: {str(e)}", "", "", "", ""
|
| 568 |
|
| 569 |
+
|
| 570 |
+
def quick_lyrics(api_key: str, theme: str, genre: str, mood: str, language: str, vocal_type: str, additional: str):
|
| 571 |
+
"""λΉ λ₯Έ κ°μ¬ μμ± - HeartMuLa μ΅μ ν"""
|
| 572 |
+
if not api_key or not api_key.strip():
|
| 573 |
+
return "β API Keyκ° νμν©λλ€"
|
| 574 |
+
if not theme or not theme.strip():
|
| 575 |
+
return "β μ£Όμ λ₯Ό μ
λ ₯νμΈμ"
|
| 576 |
+
|
| 577 |
+
prompt = f"""Create PROFESSIONAL song lyrics for HeartMuLa/MiniMax Music:
|
| 578 |
+
- Theme: {theme}
|
| 579 |
+
- Genre: {genre}
|
| 580 |
+
- Mood: {mood}
|
| 581 |
+
- Language: {language}
|
| 582 |
+
- Vocal: {vocal_type}
|
| 583 |
+
{f'- Special: {additional}' if additional else ''}
|
| 584 |
+
|
| 585 |
+
USE HeartMuLa STRUCTURE (minimum 8-10 sections):
|
| 586 |
+
[Intro] β [Verse] β [Prechorus] β [Chorus] β [Verse] β [Prechorus] β [Chorus] β [Bridge] β [Chorus] β [Outro]
|
| 587 |
+
|
| 588 |
+
Available tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
|
| 589 |
+
|
| 590 |
+
REQUIREMENTS:
|
| 591 |
+
- [Chorus] must appear AT LEAST 2-3 times
|
| 592 |
+
- Use [Prechorus] (not Pre Chorus) before each [Chorus]
|
| 593 |
+
- Add [Bridge] before final [Chorus]
|
| 594 |
+
|
| 595 |
+
OUTPUT ONLY: Structure tags + lyrics. NO translations, NO explanations."""
|
| 596 |
+
|
| 597 |
+
try:
|
| 598 |
+
result = call_groq(api_key, f"""You are a professional songwriter for HeartMuLa Music.
|
| 599 |
+
Create lyrics with PERFECT HeartMuLa structure tag placement.
|
| 600 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
| 601 |
+
Output ONLY clean lyrics with optimal tags.""", prompt)
|
| 602 |
+
|
| 603 |
+
if result.startswith("Error:"):
|
| 604 |
+
return f"β κ°μ¬ μμ± μ€ν¨: {result}"
|
| 605 |
+
|
| 606 |
+
return clean_lyrics(result)
|
| 607 |
+
except Exception as e:
|
| 608 |
+
return f"β μμΈ λ°μ: {str(e)}"
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
def generate_tags(api_key: str, genre: str, mood: str, instruments: str, tempo: str):
|
| 612 |
+
"""HeartMuLa μ€νμΌ νκ·Έ μμ±"""
|
| 613 |
+
if not api_key or not api_key.strip():
|
| 614 |
+
return "piano,happy,pop"
|
| 615 |
+
|
| 616 |
+
prompt = f"""Generate HeartMuLa style tags:
|
| 617 |
+
- Genre: {genre}
|
| 618 |
+
- Mood: {mood}
|
| 619 |
+
- Instruments: {instruments}
|
| 620 |
+
- Tempo: {tempo}
|
| 621 |
+
|
| 622 |
+
OUTPUT FORMAT: comma-separated tags WITHOUT spaces
|
| 623 |
+
Example: piano,synthesizer,happy,pop,upbeat,romantic
|
| 624 |
+
|
| 625 |
+
Generate 5-8 relevant tags:"""
|
| 626 |
+
|
| 627 |
+
try:
|
| 628 |
+
result = call_groq(api_key, PROMPT_AGENTS["tag_generator"], prompt)
|
| 629 |
+
if result.startswith("Error:"):
|
| 630 |
+
return "piano,happy,pop"
|
| 631 |
+
return clean_tags(result)
|
| 632 |
+
except:
|
| 633 |
+
return "piano,happy,pop"
|
| 634 |
+
|
| 635 |
+
|
| 636 |
+
def augment_prompt_soma(
|
| 637 |
+
api_key: str, user_prompt: str, genre: str, mood: str,
|
| 638 |
+
tempo: str, vocal_type: str, instruments: str, reference_style: str, progress=gr.Progress()
|
| 639 |
+
):
|
| 640 |
+
"""SOMA ν둬ννΈ μ¦κ° (HeartMuLa μ΅μ ν)"""
|
| 641 |
+
if not api_key or not api_key.strip():
|
| 642 |
+
return "β Groq API Key νμ", ""
|
| 643 |
+
if not user_prompt or not user_prompt.strip():
|
| 644 |
+
return "β κΈ°λ³Έ ν둬ννΈλ₯Ό μ
λ ₯νμΈμ", ""
|
| 645 |
+
|
| 646 |
+
base_info = f"""User's base idea: {user_prompt}
|
| 647 |
+
Genre: {genre}
|
| 648 |
+
Mood: {mood}
|
| 649 |
+
Tempo: {tempo}
|
| 650 |
+
Vocal Type: {vocal_type}
|
| 651 |
+
Instruments: {instruments}
|
| 652 |
+
Reference Style: {reference_style}"""
|
| 653 |
+
|
| 654 |
+
try:
|
| 655 |
+
progress(0.2, desc="πΈ μ₯λ₯΄ λΆμμ€...")
|
| 656 |
+
genre_analysis = call_groq(api_key, PROMPT_AGENTS["genre_specialist"], base_info)
|
| 657 |
+
if genre_analysis.startswith("Error:"):
|
| 658 |
+
return f"β μ₯λ₯΄ λΆμ μ€ν¨: {genre_analysis}", ""
|
| 659 |
+
|
| 660 |
+
progress(0.4, desc="ποΈ μ¬μ΄λ μ€κ³μ€...")
|
| 661 |
+
sound_design = call_groq(api_key, PROMPT_AGENTS["sound_designer"],
|
| 662 |
+
f"Design sounds for:\n{base_info}", genre_analysis)
|
| 663 |
+
if sound_design.startswith("Error:"):
|
| 664 |
+
return f"β μ¬μ΄λ μ€κ³ μ€ν¨: {sound_design}", ""
|
| 665 |
+
|
| 666 |
+
progress(0.55, desc="π€ 보컬 μ€μ μ€...")
|
| 667 |
+
vocal_design = call_groq(api_key, PROMPT_AGENTS["vocal_director"],
|
| 668 |
+
f"Define vocals for:\n{base_info}", sound_design)
|
| 669 |
+
if vocal_design.startswith("Error:"):
|
| 670 |
+
return f"β 보컬 μ€μ μ€ν¨: {vocal_design}", ""
|
| 671 |
+
|
| 672 |
+
progress(0.7, desc="π·οΈ νκ·Έ μμ±μ€...")
|
| 673 |
+
tags = call_groq(api_key, PROMPT_AGENTS["tag_generator"],
|
| 674 |
+
f"Generate tags for: {genre}, {mood}, {instruments}, {tempo}")
|
| 675 |
+
tags_cleaned = clean_tags(tags) if not tags.startswith("Error:") else "piano,happy,pop"
|
| 676 |
+
|
| 677 |
+
progress(0.85, desc="β¨ ν둬ννΈ μμ±μ€...")
|
| 678 |
+
final_prompt = call_groq(
|
| 679 |
+
api_key,
|
| 680 |
+
PROMPT_AGENTS["prompt_synthesizer"],
|
| 681 |
+
f"""Synthesize into ONE music production prompt (150-200 words):
|
| 682 |
+
Base: {user_prompt}
|
| 683 |
+
Genre Analysis: {genre_analysis}
|
| 684 |
+
Sound Design: {sound_design}
|
| 685 |
+
Vocal Design: {vocal_design}
|
| 686 |
+
Reference Style: {reference_style}
|
| 687 |
+
|
| 688 |
+
Output ONLY the final prompt paragraph in English."""
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
if final_prompt.startswith("Error:"):
|
| 692 |
+
return f"β ν둬ννΈ ν©μ± μ€ν¨: {final_prompt}", ""
|
| 693 |
+
|
| 694 |
+
progress(1.0, desc="β
μλ£!")
|
| 695 |
+
return final_prompt.strip(), tags_cleaned
|
| 696 |
+
|
| 697 |
+
except Exception as e:
|
| 698 |
+
return f"β μμΈ λ°μ: {str(e)}", ""
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
def generate_music(api_key: str, model: str, prompt: str, lyrics: str,
|
| 702 |
+
sample_rate: int, bitrate: int, audio_format: str):
|
| 703 |
+
"""MiniMax μμ
μμ± (λͺ¨λΈ 2.5)"""
|
| 704 |
+
if not api_key or not api_key.strip():
|
| 705 |
+
return None, "β MiniMax API Key νμ", ""
|
| 706 |
+
if not prompt or not prompt.strip():
|
| 707 |
+
return None, "β ν둬ννΈ νμ", ""
|
| 708 |
+
|
| 709 |
+
url = "https://api.minimax.io/v1/music_generation"
|
| 710 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
| 711 |
+
|
| 712 |
+
payload = {
|
| 713 |
+
"model": model,
|
| 714 |
+
"prompt": prompt,
|
| 715 |
+
"audio_setting": {
|
| 716 |
+
"sample_rate": sample_rate,
|
| 717 |
+
"bitrate": bitrate,
|
| 718 |
+
"format": audio_format
|
| 719 |
+
}
|
| 720 |
+
}
|
| 721 |
+
if lyrics and lyrics.strip():
|
| 722 |
+
payload["lyrics"] = lyrics
|
| 723 |
+
|
| 724 |
+
try:
|
| 725 |
+
response = requests.post(url, headers=headers, json=payload, timeout=600)
|
| 726 |
+
|
| 727 |
+
if response is None:
|
| 728 |
+
return None, "β API μλ΅μ΄ μμ΅λλ€.", ""
|
| 729 |
+
|
| 730 |
+
try:
|
| 731 |
+
result = response.json()
|
| 732 |
+
except Exception as json_err:
|
| 733 |
+
return None, f"β JSON νμ± μ€ν¨: {str(json_err)}", response.text[:500] if response.text else ""
|
| 734 |
+
|
| 735 |
+
if result is None:
|
| 736 |
+
return None, "β API μλ΅μ΄ λΉμ΄μμ΅λλ€.", ""
|
| 737 |
+
|
| 738 |
+
# JSON μΆλ ₯μ© (audio hex λ°μ΄ν° μΆμ½)
|
| 739 |
+
result_for_display = result.copy()
|
| 740 |
+
if "data" in result_for_display and isinstance(result_for_display["data"], dict):
|
| 741 |
+
data_copy = result_for_display["data"].copy()
|
| 742 |
+
if "audio" in data_copy and isinstance(data_copy["audio"], str) and len(data_copy["audio"]) > 100:
|
| 743 |
+
data_copy["audio"] = f"[HEX DATA - {len(data_copy['audio'])} chars]"
|
| 744 |
+
result_for_display["data"] = data_copy
|
| 745 |
+
|
| 746 |
+
json_output = json.dumps(result_for_display, ensure_ascii=False, indent=2)
|
| 747 |
+
|
| 748 |
+
base_resp = result.get("base_resp", {})
|
| 749 |
+
status_code = base_resp.get("status_code", -1)
|
| 750 |
+
status_msg = base_resp.get("status_msg", "")
|
| 751 |
+
|
| 752 |
+
if status_code != 0:
|
| 753 |
+
return None, f"β API μ€λ₯: {status_msg} (code: {status_code})", json_output
|
| 754 |
+
|
| 755 |
+
data = result.get("data", {})
|
| 756 |
+
if not data:
|
| 757 |
+
return None, "β μλ΅μ dataκ° μμ΅λλ€.", json_output
|
| 758 |
+
|
| 759 |
+
audio_hex = data.get("audio")
|
| 760 |
+
audio_status = data.get("status")
|
| 761 |
+
|
| 762 |
+
if audio_status != 2:
|
| 763 |
+
return None, f"β³ μμ± μ€... (status: {audio_status})", json_output
|
| 764 |
+
|
| 765 |
+
if not audio_hex:
|
| 766 |
+
return None, "β μλ΅μ audio λ°μ΄ν°κ° μμ΅λλ€.", json_output
|
| 767 |
+
|
| 768 |
+
try:
|
| 769 |
+
import tempfile
|
| 770 |
+
import time
|
| 771 |
+
|
| 772 |
+
audio_bytes = bytes.fromhex(audio_hex)
|
| 773 |
+
file_ext = audio_format if audio_format else "mp3"
|
| 774 |
+
timestamp = int(time.time())
|
| 775 |
+
filename = f"minimax_music_{timestamp}.{file_ext}"
|
| 776 |
+
|
| 777 |
+
save_paths = [
|
| 778 |
+
os.path.join(tempfile.gettempdir(), filename),
|
| 779 |
+
os.path.join(os.getcwd(), filename),
|
| 780 |
+
filename
|
| 781 |
+
]
|
| 782 |
+
|
| 783 |
+
saved_path = None
|
| 784 |
+
for path in save_paths:
|
| 785 |
+
try:
|
| 786 |
+
with open(path, "wb") as f:
|
| 787 |
+
f.write(audio_bytes)
|
| 788 |
+
saved_path = path
|
| 789 |
+
break
|
| 790 |
+
except Exception:
|
| 791 |
+
continue
|
| 792 |
+
|
| 793 |
+
if not saved_path:
|
| 794 |
+
return None, "β νμΌ μ μ₯ μ€ν¨", json_output
|
| 795 |
+
|
| 796 |
+
extra_info = result.get("extra_info", {})
|
| 797 |
+
duration_ms = extra_info.get("music_duration", 0)
|
| 798 |
+
duration_sec = duration_ms / 1000 if duration_ms else 0
|
| 799 |
+
file_size_kb = len(audio_bytes) / 1024
|
| 800 |
+
|
| 801 |
+
return saved_path, f"β
μμ
μμ± μλ£! ({duration_sec:.1f}μ΄, {file_size_kb:.0f}KB)", json_output
|
| 802 |
+
|
| 803 |
+
except ValueError as hex_err:
|
| 804 |
+
return None, f"β HEX λμ½λ© μ€ν¨: {str(hex_err)}", json_output
|
| 805 |
+
except Exception as save_err:
|
| 806 |
+
return None, f"β νμΌ μ μ₯ μ€ν¨: {str(save_err)}", json_output
|
| 807 |
+
|
| 808 |
+
except requests.exceptions.Timeout:
|
| 809 |
+
return None, "β μμ² μκ° μ΄κ³Ό (10λΆ)", ""
|
| 810 |
+
except requests.exceptions.ConnectionError:
|
| 811 |
+
return None, "β μ°κ²° μ€λ₯", ""
|
| 812 |
+
except Exception as e:
|
| 813 |
+
return None, f"β μμΈ λ°μ: {str(e)}", ""
|
| 814 |
+
|
| 815 |
+
|
| 816 |
+
def load_example_from_dropdown(selection):
|
| 817 |
+
"""Dropdown μ νμ μμ ν둬ννΈ λ° νκ·Έ λ‘λ"""
|
| 818 |
+
if not selection:
|
| 819 |
+
return "", ""
|
| 820 |
+
|
| 821 |
+
mapping = {
|
| 822 |
+
"π€ A Cappella - μμ 보컬 νλͺ¨λ": "π€ A Cappella (μμΉ΄ν λΌ)",
|
| 823 |
+
"π₯ Group Harmony - νμν ν©μ°½": "π₯ Group Harmony (κ·Έλ£Ή νλͺ¨λ)",
|
| 824 |
+
"π· Jazz Duet - λ¨λ
λμ£": "π· Jazz Duet (μ¬μ¦ λμ£)",
|
| 825 |
+
"πΈ Multi-Style - μ€νμΌ μ ν": "πΈ Multi-Style (λ©ν°μ€νμΌ)",
|
| 826 |
+
"π Urban Chill - R&B": "π Urban Chill (μ΄λ° μΉ )",
|
| 827 |
+
"πΉ Jazz Club - λΌμ΄λΈ": "πΉ Jazz Club (μ¬μ¦ ν΄λ½)",
|
| 828 |
+
"πͺ© Retro Disco - 80λ
λ": "πͺ© Retro Disco (λ νΈλ‘ λμ€μ½)",
|
| 829 |
+
"π¬ Film Score - μλ€λ§ν±": "π¬ Film Score (μν μ€μ½μ΄)",
|
| 830 |
+
"π΅ K-Pop Dance - κ³ μλμ§": "π΅ K-Pop Dance (K-Pop λμ€)",
|
| 831 |
+
"π» Orchestral Ballad - μ
μ₯": "π» Orchestral Ballad (μ€μΌμ€νΈλΌ λ°λΌλ)",
|
| 832 |
+
"π₯ HeartMuLa Default - κΈ°λ³Έ": "π₯ HeartMuLa Default (κΈ°λ³Έ μμ)"
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
+
key = mapping.get(selection)
|
| 836 |
+
if key and key in EXAMPLE_PROMPTS:
|
| 837 |
+
return EXAMPLE_PROMPTS[key]["prompt"], EXAMPLE_PROMPTS[key]["tags"]
|
| 838 |
+
return "", ""
|
| 839 |
+
|
| 840 |
+
|
| 841 |
+
# ============================================================
|
| 842 |
+
# π¨ Comic Classic Theme - Toon Playground (Document 2 κΈ°λ°)
|
| 843 |
+
# ============================================================
|
| 844 |
+
|
| 845 |
+
css = """
|
| 846 |
+
/* ===== π¨ Google Fonts Import ===== */
|
| 847 |
+
@import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
|
| 848 |
+
|
| 849 |
+
/* ===== π¨ Comic Classic λ°°κ²½ - λΉν°μ§ νμ΄νΌ + λνΈ ν¨ν΄ ===== */
|
| 850 |
+
.gradio-container {
|
| 851 |
+
background-color: #FEF9C3 !important;
|
| 852 |
+
background-image:
|
| 853 |
+
radial-gradient(#1F2937 1px, transparent 1px) !important;
|
| 854 |
+
background-size: 20px 20px !important;
|
| 855 |
+
min-height: 100vh !important;
|
| 856 |
+
font-family: 'Comic Neue', cursive, sans-serif !important;
|
| 857 |
+
}
|
| 858 |
+
|
| 859 |
+
/* ===== νκΉ
νμ΄μ€ μλ¨ μμ μ¨κΉ ===== */
|
| 860 |
+
.huggingface-space-header,
|
| 861 |
+
#space-header,
|
| 862 |
+
.space-header,
|
| 863 |
+
[class*="space-header"],
|
| 864 |
+
.svelte-1ed2p3z,
|
| 865 |
+
.space-header-badge,
|
| 866 |
+
.header-badge {
|
| 867 |
+
display: none !important;
|
| 868 |
+
}
|
| 869 |
+
|
| 870 |
+
/* ===== Footer μμ μ¨κΉ ===== */
|
| 871 |
+
footer,
|
| 872 |
+
.footer,
|
| 873 |
+
.gradio-container footer,
|
| 874 |
+
.built-with {
|
| 875 |
+
display: none !important;
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
/* ===== λ©μΈ 컨ν
μ΄λ ===== */
|
| 879 |
+
#col-container {
|
| 880 |
+
max-width: 1400px;
|
| 881 |
+
margin: 0 auto;
|
| 882 |
+
}
|
| 883 |
+
|
| 884 |
+
/* ===== π¨ ν€λ νμ΄ν - μ½λ―Ή μ€νμΌ ===== */
|
| 885 |
+
.header-title h1 {
|
| 886 |
+
font-family: 'Bangers', cursive !important;
|
| 887 |
+
color: #1F2937 !important;
|
| 888 |
+
font-size: 3.2rem !important;
|
| 889 |
+
font-weight: 400 !important;
|
| 890 |
+
text-align: center !important;
|
| 891 |
+
margin-bottom: 0.5rem !important;
|
| 892 |
+
text-shadow:
|
| 893 |
+
4px 4px 0px #FACC15,
|
| 894 |
+
6px 6px 0px #1F2937 !important;
|
| 895 |
+
letter-spacing: 3px !important;
|
| 896 |
+
-webkit-text-stroke: 2px #1F2937 !important;
|
| 897 |
+
}
|
| 898 |
+
|
| 899 |
+
/* ===== π¨ μλΈνμ΄ν ===== */
|
| 900 |
+
.subtitle-text {
|
| 901 |
+
text-align: center !important;
|
| 902 |
+
font-family: 'Comic Neue', cursive !important;
|
| 903 |
+
font-size: 1.1rem !important;
|
| 904 |
+
color: #1F2937 !important;
|
| 905 |
+
margin-bottom: 1.5rem !important;
|
| 906 |
+
font-weight: 700 !important;
|
| 907 |
+
}
|
| 908 |
+
|
| 909 |
+
/* ===== π¨ μΉμ
νμ΄ν ===== */
|
| 910 |
+
.section-title {
|
| 911 |
+
font-family: 'Bangers', cursive !important;
|
| 912 |
+
color: #1F2937 !important;
|
| 913 |
+
font-size: 1.8rem !important;
|
| 914 |
+
border-bottom: 4px solid #3B82F6 !important;
|
| 915 |
+
padding-bottom: 8px !important;
|
| 916 |
+
margin-bottom: 16px !important;
|
| 917 |
+
text-shadow: 2px 2px 0px #FACC15 !important;
|
| 918 |
+
}
|
| 919 |
+
|
| 920 |
+
/* ===== π¨ μΉ΄λ/ν¨λ - λ§ν νλ μ μ€νμΌ ===== */
|
| 921 |
+
.gr-panel,
|
| 922 |
+
.gr-box,
|
| 923 |
+
.gr-form,
|
| 924 |
+
.block,
|
| 925 |
+
.gr-group {
|
| 926 |
+
background: #FFFFFF !important;
|
| 927 |
+
border: 3px solid #1F2937 !important;
|
| 928 |
+
border-radius: 8px !important;
|
| 929 |
+
box-shadow: 6px 6px 0px #1F2937 !important;
|
| 930 |
+
transition: all 0.2s ease !important;
|
| 931 |
+
}
|
| 932 |
+
|
| 933 |
+
.gr-panel:hover,
|
| 934 |
+
.block:hover {
|
| 935 |
+
transform: translate(-2px, -2px) !important;
|
| 936 |
+
box-shadow: 8px 8px 0px #1F2937 !important;
|
| 937 |
+
}
|
| 938 |
+
|
| 939 |
+
/* ===== π¨ μ
λ ₯ νλ (Textbox) ===== */
|
| 940 |
+
textarea,
|
| 941 |
+
input[type="text"],
|
| 942 |
+
input[type="number"],
|
| 943 |
+
input[type="password"] {
|
| 944 |
+
background: #FFFFFF !important;
|
| 945 |
+
border: 3px solid #1F2937 !important;
|
| 946 |
+
border-radius: 8px !important;
|
| 947 |
+
color: #1F2937 !important;
|
| 948 |
+
font-family: 'Comic Neue', cursive !important;
|
| 949 |
+
font-size: 1rem !important;
|
| 950 |
+
font-weight: 700 !important;
|
| 951 |
+
transition: all 0.2s ease !important;
|
| 952 |
+
}
|
| 953 |
+
|
| 954 |
+
textarea:focus,
|
| 955 |
+
input[type="text"]:focus,
|
| 956 |
+
input[type="number"]:focus,
|
| 957 |
+
input[type="password"]:focus {
|
| 958 |
+
border-color: #3B82F6 !important;
|
| 959 |
+
box-shadow: 4px 4px 0px #3B82F6 !important;
|
| 960 |
+
outline: none !important;
|
| 961 |
+
}
|
| 962 |
+
|
| 963 |
+
textarea::placeholder {
|
| 964 |
+
color: #9CA3AF !important;
|
| 965 |
+
font-weight: 400 !important;
|
| 966 |
+
}
|
| 967 |
+
|
| 968 |
+
/* ===== π¨ Primary λ²νΌ - μ½λ―Ή λΈλ£¨ ===== */
|
| 969 |
+
.gr-button-primary,
|
| 970 |
+
button.primary,
|
| 971 |
+
.gr-button.primary {
|
| 972 |
+
background: #3B82F6 !important;
|
| 973 |
+
border: 3px solid #1F2937 !important;
|
| 974 |
+
border-radius: 8px !important;
|
| 975 |
+
color: #FFFFFF !important;
|
| 976 |
+
font-family: 'Bangers', cursive !important;
|
| 977 |
+
font-weight: 400 !important;
|
| 978 |
+
font-size: 1.2rem !important;
|
| 979 |
+
letter-spacing: 2px !important;
|
| 980 |
+
padding: 12px 24px !important;
|
| 981 |
+
box-shadow: 5px 5px 0px #1F2937 !important;
|
| 982 |
+
transition: all 0.1s ease !important;
|
| 983 |
+
text-shadow: 1px 1px 0px #1F2937 !important;
|
| 984 |
+
}
|
| 985 |
+
|
| 986 |
+
.gr-button-primary:hover,
|
| 987 |
+
button.primary:hover,
|
| 988 |
+
.gr-button.primary:hover {
|
| 989 |
+
background: #2563EB !important;
|
| 990 |
+
transform: translate(-2px, -2px) !important;
|
| 991 |
+
box-shadow: 7px 7px 0px #1F2937 !important;
|
| 992 |
+
}
|
| 993 |
+
|
| 994 |
+
.gr-button-primary:active,
|
| 995 |
+
button.primary:active,
|
| 996 |
+
.gr-button.primary:active {
|
| 997 |
+
transform: translate(3px, 3px) !important;
|
| 998 |
+
box-shadow: 2px 2px 0px #1F2937 !important;
|
| 999 |
+
}
|
| 1000 |
+
|
| 1001 |
+
/* ===== π¨ Secondary λ²νΌ - μ½λ―Ή λ λ ===== */
|
| 1002 |
+
.gr-button-secondary,
|
| 1003 |
+
button.secondary {
|
| 1004 |
+
background: #EF4444 !important;
|
| 1005 |
+
border: 3px solid #1F2937 !important;
|
| 1006 |
+
border-radius: 8px !important;
|
| 1007 |
+
color: #FFFFFF !important;
|
| 1008 |
+
font-family: 'Bangers', cursive !important;
|
| 1009 |
+
font-weight: 400 !important;
|
| 1010 |
+
font-size: 1.1rem !important;
|
| 1011 |
+
letter-spacing: 1px !important;
|
| 1012 |
+
box-shadow: 4px 4px 0px #1F2937 !important;
|
| 1013 |
+
transition: all 0.1s ease !important;
|
| 1014 |
+
text-shadow: 1px 1px 0px #1F2937 !important;
|
| 1015 |
+
}
|
| 1016 |
+
|
| 1017 |
+
.gr-button-secondary:hover,
|
| 1018 |
+
button.secondary:hover {
|
| 1019 |
+
background: #DC2626 !important;
|
| 1020 |
+
transform: translate(-2px, -2px) !important;
|
| 1021 |
+
box-shadow: 6px 6px 0px #1F2937 !important;
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
/* ===== π¨ Generate λ²νΌ - μ½λ―Ή κ·Έλ¦° ===== */
|
| 1025 |
+
.generate-btn {
|
| 1026 |
+
background: #10B981 !important;
|
| 1027 |
+
border: 3px solid #1F2937 !important;
|
| 1028 |
+
border-radius: 8px !important;
|
| 1029 |
+
color: #FFFFFF !important;
|
| 1030 |
+
font-family: 'Bangers', cursive !important;
|
| 1031 |
+
font-weight: 400 !important;
|
| 1032 |
+
font-size: 1.3rem !important;
|
| 1033 |
+
letter-spacing: 2px !important;
|
| 1034 |
+
box-shadow: 5px 5px 0px #1F2937 !important;
|
| 1035 |
+
text-shadow: 1px 1px 0px #1F2937 !important;
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
.generate-btn:hover {
|
| 1039 |
+
background: #059669 !important;
|
| 1040 |
+
transform: translate(-2px, -2px) !important;
|
| 1041 |
+
box-shadow: 7px 7px 0px #1F2937 !important;
|
| 1042 |
+
}
|
| 1043 |
+
|
| 1044 |
+
/* ===== π¨ μμ½λμΈ - λ§νμ μ€νμΌ ===== */
|
| 1045 |
+
.gr-accordion {
|
| 1046 |
+
background: #FACC15 !important;
|
| 1047 |
+
border: 3px solid #1F2937 !important;
|
| 1048 |
+
border-radius: 8px !important;
|
| 1049 |
+
box-shadow: 4px 4px 0px #1F2937 !important;
|
| 1050 |
+
}
|
| 1051 |
+
|
| 1052 |
+
.gr-accordion-header {
|
| 1053 |
+
color: #1F2937 !important;
|
| 1054 |
+
font-family: 'Comic Neue', cursive !important;
|
| 1055 |
+
font-weight: 700 !important;
|
| 1056 |
+
font-size: 1.1rem !important;
|
| 1057 |
+
}
|
| 1058 |
+
|
| 1059 |
+
/* ===== π¨ Dropdown ===== */
|
| 1060 |
+
.gr-dropdown,
|
| 1061 |
+
select {
|
| 1062 |
+
background: #FFFFFF !important;
|
| 1063 |
+
border: 3px solid #1F2937 !important;
|
| 1064 |
+
border-radius: 8px !important;
|
| 1065 |
+
color: #1F2937 !important;
|
| 1066 |
+
font-family: 'Comic Neue', cursive !important;
|
| 1067 |
+
font-weight: 700 !important;
|
| 1068 |
+
}
|
| 1069 |
+
|
| 1070 |
+
/* ===== π¨ λΌλ²¨ μ€νμΌ ===== */
|
| 1071 |
+
label,
|
| 1072 |
+
.gr-input-label,
|
| 1073 |
+
.gr-block-label {
|
| 1074 |
+
color: #1F2937 !important;
|
| 1075 |
+
font-family: 'Comic Neue', cursive !important;
|
| 1076 |
+
font-weight: 700 !important;
|
| 1077 |
+
font-size: 1rem !important;
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
/* ===== π¨ μ€λμ€ νλ μ΄μ΄ ===== */
|
| 1081 |
+
.gr-audio,
|
| 1082 |
+
audio {
|
| 1083 |
+
border: 4px solid #1F2937 !important;
|
| 1084 |
+
border-radius: 8px !important;
|
| 1085 |
+
box-shadow: 6px 6px 0px #1F2937 !important;
|
| 1086 |
+
}
|
| 1087 |
+
|
| 1088 |
+
/* ===== π¨ μ½λ λΈλ‘ ===== */
|
| 1089 |
+
.gr-code,
|
| 1090 |
+
pre,
|
| 1091 |
+
code {
|
| 1092 |
+
background: #1F2937 !important;
|
| 1093 |
+
color: #10B981 !important;
|
| 1094 |
+
font-family: 'Courier New', monospace !important;
|
| 1095 |
+
border: 3px solid #10B981 !important;
|
| 1096 |
+
border-radius: 8px !important;
|
| 1097 |
+
box-shadow: 4px 4px 0px #10B981 !important;
|
| 1098 |
+
}
|
| 1099 |
+
|
| 1100 |
+
/* ===== π¨ λ§ν¬λ€μ΄ ===== */
|
| 1101 |
+
.gr-markdown {
|
| 1102 |
+
font-family: 'Comic Neue', cursive !important;
|
| 1103 |
+
color: #1F2937 !important;
|
| 1104 |
+
}
|
| 1105 |
+
|
| 1106 |
+
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
|
| 1107 |
+
font-family: 'Bangers', cursive !important;
|
| 1108 |
+
color: #1F2937 !important;
|
| 1109 |
+
text-shadow: 2px 2px 0px #FACC15 !important;
|
| 1110 |
+
}
|
| 1111 |
+
|
| 1112 |
+
/* ===== π¨ ν μ€νμΌ ===== */
|
| 1113 |
+
.gr-tab-nav {
|
| 1114 |
+
background: #FACC15 !important;
|
| 1115 |
+
border: 3px solid #1F2937 !important;
|
| 1116 |
+
border-radius: 8px 8px 0 0 !important;
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
.gr-tab-nav button {
|
| 1120 |
+
font-family: 'Comic Neue', cursive !important;
|
| 1121 |
+
font-weight: 700 !important;
|
| 1122 |
+
color: #1F2937 !important;
|
| 1123 |
+
}
|
| 1124 |
+
|
| 1125 |
+
.gr-tab-nav button.selected {
|
| 1126 |
+
background: #3B82F6 !important;
|
| 1127 |
+
color: #FFFFFF !important;
|
| 1128 |
+
}
|
| 1129 |
+
|
| 1130 |
+
/* ===== π¨ μν νμ ===== */
|
| 1131 |
+
.status-box textarea {
|
| 1132 |
+
background: #1F2937 !important;
|
| 1133 |
+
color: #10B981 !important;
|
| 1134 |
+
font-family: 'Courier New', monospace !important;
|
| 1135 |
+
font-size: 0.9rem !important;
|
| 1136 |
+
border: 3px solid #10B981 !important;
|
| 1137 |
+
border-radius: 8px !important;
|
| 1138 |
+
}
|
| 1139 |
+
|
| 1140 |
+
/* ===== π¨ μ€ν¬λ‘€λ° - μ½λ―Ή μ€νμΌ ===== */
|
| 1141 |
+
::-webkit-scrollbar {
|
| 1142 |
+
width: 12px;
|
| 1143 |
+
height: 12px;
|
| 1144 |
+
}
|
| 1145 |
+
|
| 1146 |
+
::-webkit-scrollbar-track {
|
| 1147 |
+
background: #FEF9C3;
|
| 1148 |
+
border: 2px solid #1F2937;
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
::-webkit-scrollbar-thumb {
|
| 1152 |
+
background: #3B82F6;
|
| 1153 |
+
border: 2px solid #1F2937;
|
| 1154 |
+
border-radius: 0px;
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
::-webkit-scrollbar-thumb:hover {
|
| 1158 |
+
background: #EF4444;
|
| 1159 |
+
}
|
| 1160 |
+
|
| 1161 |
+
/* ===== π¨ μ ν νμ΄λΌμ΄νΈ ===== */
|
| 1162 |
+
::selection {
|
| 1163 |
+
background: #FACC15;
|
| 1164 |
+
color: #1F2937;
|
| 1165 |
+
}
|
| 1166 |
+
|
| 1167 |
+
/* ===== π¨ Row/Column κ°κ²© ===== */
|
| 1168 |
+
.gr-row {
|
| 1169 |
+
gap: 1.5rem !important;
|
| 1170 |
+
}
|
| 1171 |
+
|
| 1172 |
+
.gr-column {
|
| 1173 |
+
gap: 1rem !important;
|
| 1174 |
+
}
|
| 1175 |
+
|
| 1176 |
+
/* ===== π¨ νκ·Έ μ
λ ₯ νΉλ³ μ€νμΌ ===== */
|
| 1177 |
+
.tag-input textarea {
|
| 1178 |
+
background: #FEF3C7 !important;
|
| 1179 |
+
border: 3px dashed #F59E0B !important;
|
| 1180 |
+
font-family: 'Courier New', monospace !important;
|
| 1181 |
+
}
|
| 1182 |
+
|
| 1183 |
+
/* ===== λ°μν μ‘°μ ===== */
|
| 1184 |
+
@media (max-width: 768px) {
|
| 1185 |
+
.header-title h1 {
|
| 1186 |
+
font-size: 2rem !important;
|
| 1187 |
+
text-shadow:
|
| 1188 |
+
3px 3px 0px #FACC15,
|
| 1189 |
+
4px 4px 0px #1F2937 !important;
|
| 1190 |
+
}
|
| 1191 |
+
|
| 1192 |
+
.gr-button-primary,
|
| 1193 |
+
button.primary {
|
| 1194 |
+
padding: 10px 16px !important;
|
| 1195 |
+
font-size: 1rem !important;
|
| 1196 |
+
}
|
| 1197 |
+
}
|
| 1198 |
+
|
| 1199 |
+
/* ===== π¨ νΉμ ν¨κ³Ό - λ°μ§μ ===== */
|
| 1200 |
+
@keyframes sparkle {
|
| 1201 |
+
0%, 100% { opacity: 1; }
|
| 1202 |
+
50% { opacity: 0.7; }
|
| 1203 |
+
}
|
| 1204 |
+
|
| 1205 |
+
.sparkle {
|
| 1206 |
+
animation: sparkle 2s ease-in-out infinite;
|
| 1207 |
+
}
|
| 1208 |
+
"""
|
| 1209 |
+
|
| 1210 |
+
# ============================================================
|
| 1211 |
+
# Gradio UI
|
| 1212 |
+
# ============================================================
|
| 1213 |
+
|
| 1214 |
+
with gr.Blocks(css=css, title="π΅ SOMA Music Studio", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
|
| 1215 |
+
|
| 1216 |
+
# HOME Badge
|
| 1217 |
+
gr.HTML("""
|
| 1218 |
+
<div style="text-align: center; margin: 20px 0 10px 0;">
|
| 1219 |
+
<a href="https://huggingface.co/HeartMuLa" target="_blank" style="text-decoration: none;">
|
| 1220 |
+
<img src="https://img.shields.io/badge/π΅_HeartMuLa-Official-ff6b6b?style=for-the-badge&labelColor=1F2937" alt="HeartMuLa">
|
| 1221 |
+
</a>
|
| 1222 |
+
<a href="https://www.minimax.io" target="_blank" style="text-decoration: none; margin-left: 10px;">
|
| 1223 |
+
<img src="https://img.shields.io/badge/πΉ_MiniMax-Music_2.5-3B82F6?style=for-the-badge&labelColor=1F2937" alt="MiniMax">
|
| 1224 |
+
</a>
|
| 1225 |
+
</div>
|
| 1226 |
+
""")
|
| 1227 |
+
|
| 1228 |
+
# Header
|
| 1229 |
+
gr.Markdown("""
|
| 1230 |
+
# π΅ SOMA MUSIC STUDIO πΆ
|
| 1231 |
+
""", elem_classes="header-title")
|
| 1232 |
+
|
| 1233 |
+
gr.Markdown("""
|
| 1234 |
+
<p class="subtitle-text">π« HeartMuLa + MiniMax Music 2.5 + SOMA Multi-Agent = μ΅κ³ νμ§ AI μμ
μμ± π«</p>
|
| 1235 |
+
<p class="subtitle-text">π€ A Cappella | π· Jazz Duet | πΈ Multi-Style | π¬ Film Score | π΅ K-Pop μ§μ</p>
|
| 1236 |
+
""")
|
| 1237 |
+
|
| 1238 |
+
# API Keys
|
| 1239 |
+
GROQ_KEY = os.environ.get("GROQ_API_KEY", "")
|
| 1240 |
+
MINIMAX_KEY = os.environ.get("MINIMAX_API_KEY", "")
|
| 1241 |
+
|
| 1242 |
+
with gr.Accordion("π API Keys", open=not (GROQ_KEY and MINIMAX_KEY)):
|
| 1243 |
+
with gr.Row():
|
| 1244 |
+
groq_key = gr.Textbox(
|
| 1245 |
+
label="π¦ Groq API Key (κ°μ¬/ν둬ννΈ μμ±μ©)",
|
| 1246 |
+
type="password",
|
| 1247 |
+
value=GROQ_KEY,
|
| 1248 |
+
placeholder="gsk_..." if not GROQ_KEY else "β
Secret λ‘λλ¨",
|
| 1249 |
+
interactive=not bool(GROQ_KEY)
|
| 1250 |
+
)
|
| 1251 |
+
minimax_key = gr.Textbox(
|
| 1252 |
+
label="πΉ MiniMax API Key (μμ
μμ±μ©)",
|
| 1253 |
+
type="password",
|
| 1254 |
+
value=MINIMAX_KEY,
|
| 1255 |
+
placeholder="API Key" if not MINIMAX_KEY else "β
Secret λ‘λλ¨",
|
| 1256 |
+
interactive=not bool(MINIMAX_KEY)
|
| 1257 |
+
)
|
| 1258 |
+
|
| 1259 |
+
with gr.Row(equal_height=False):
|
| 1260 |
+
# ========== μ’μΈ‘: κ°μ¬ μμ± ==========
|
| 1261 |
+
with gr.Column(scale=1, min_width=400):
|
| 1262 |
+
gr.Markdown("## π LYRICS GENERATOR", elem_classes="section-title")
|
| 1263 |
+
|
| 1264 |
+
theme_input = gr.Textbox(
|
| 1265 |
+
label="π― λ
Έλ μ£Όμ ",
|
| 1266 |
+
placeholder="μ: μ΄λ³ ν μ±μ₯, κΏμ ν₯ν λμ , μ¬λμ κ³ λ°±, μ°μ μ ν...",
|
| 1267 |
+
lines=2
|
| 1268 |
+
)
|
| 1269 |
+
|
| 1270 |
+
with gr.Row():
|
| 1271 |
+
lyrics_genre = gr.Dropdown(
|
| 1272 |
+
label="πΈ μ₯λ₯΄",
|
| 1273 |
+
choices=["K-Pop", "Pop", "R&B", "Hip-Hop", "Ballad", "Rock",
|
| 1274 |
+
"EDM", "Trap", "Jazz", "Blues", "Folk", "Disco", "Cinematic"],
|
| 1275 |
+
value="K-Pop"
|
| 1276 |
+
)
|
| 1277 |
+
lyrics_mood = gr.Dropdown(
|
| 1278 |
+
label="π« λΆμκΈ°",
|
| 1279 |
+
choices=["Empowering", "Melancholic", "Joyful", "Romantic",
|
| 1280 |
+
"Aggressive", "Dreamy", "Nostalgic", "Energetic", "Dark",
|
| 1281 |
+
"Peaceful", "Confident", "Intimate"],
|
| 1282 |
+
value="Empowering"
|
| 1283 |
+
)
|
| 1284 |
+
|
| 1285 |
+
with gr.Row():
|
| 1286 |
+
lyrics_language = gr.Dropdown(
|
| 1287 |
+
label="π μΈμ΄",
|
| 1288 |
+
choices=["English", "Korean", "Korean + English", "Japanese"],
|
| 1289 |
+
value="Korean"
|
| 1290 |
+
)
|
| 1291 |
+
lyrics_vocal_type = gr.Dropdown(
|
| 1292 |
+
label="π€ 보컬 νμ
",
|
| 1293 |
+
choices=["Solo Female", "Solo Male", "Female Duet", "Male Duet",
|
| 1294 |
+
"Male-Female Duet", "A Cappella", "Group/Choir"],
|
| 1295 |
+
value="Group/Choir"
|
| 1296 |
+
)
|
| 1297 |
+
|
| 1298 |
+
lyrics_additional = gr.Textbox(
|
| 1299 |
+
label="β¨ μΆκ° μ§μ (μ ν)",
|
| 1300 |
+
placeholder="νΉλ³ μμ²: νλ ΄κ΅¬ κ°μ‘°, λ© ννΈ μΆκ°, νΉμ λ¨μ΄ ν¬ν¨...",
|
| 1301 |
+
lines=1
|
| 1302 |
+
)
|
| 1303 |
+
|
| 1304 |
+
with gr.Row():
|
| 1305 |
+
quick_btn = gr.Button("β‘ QUICK GENERATE", variant="secondary")
|
| 1306 |
+
soma_lyrics_btn = gr.Button("π§ SOMA GENERATE", variant="primary")
|
| 1307 |
+
|
| 1308 |
+
lyrics_status = gr.Textbox(label="π μν", interactive=False, max_lines=1, elem_classes="status-box")
|
| 1309 |
+
|
| 1310 |
+
with gr.Accordion("π SOMA μμ
κ³Όμ ", open=False):
|
| 1311 |
+
with gr.Row():
|
| 1312 |
+
with gr.Column():
|
| 1313 |
+
step1_out = gr.Textbox(label="1οΈβ£ μμ¬κ°", lines=4, interactive=False)
|
| 1314 |
+
step2_out = gr.Textbox(label="2οΈβ£ νλ‘λμ", lines=4, interactive=False)
|
| 1315 |
+
with gr.Column():
|
| 1316 |
+
step3_out = gr.Textbox(label="3οΈβ£ κ°μ± λλ ν°", lines=4, interactive=False)
|
| 1317 |
+
step4_out = gr.Textbox(label="4οΈβ£ μ΅μ’
νΈμ§", lines=4, interactive=False)
|
| 1318 |
+
|
| 1319 |
+
final_lyrics = gr.Textbox(
|
| 1320 |
+
label="βοΈ μ΅μ’
κ°μ¬ (νΈμ§ κ°λ₯)",
|
| 1321 |
+
lines=14,
|
| 1322 |
+
placeholder="μμ±λ κ°μ¬κ° μ¬κΈ° νμλ©λλ€...\n\n[Intro]\n\n[Verse]\n...\n\n[Chorus]\n..."
|
| 1323 |
)
|
| 1324 |
+
|
| 1325 |
+
with gr.Accordion("π HeartMuLa ꡬ쑰 νκ·Έ κ°μ΄λ", open=False):
|
| 1326 |
+
gr.Markdown("""
|
| 1327 |
+
**HeartMuLa νκ·Έ:** `[Intro]` `[Verse]` `[Prechorus]` `[Chorus]` `[Bridge]` `[Interlude]` `[Hook]` `[Outro]` `[Inst]` `[Solo]`
|
| 1328 |
|
| 1329 |
+
β οΈ **μ£Όμ:** `[Pre Chorus]`κ° μλ `[Prechorus]` μ¬μ© (곡백 μμ)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1330 |
|
| 1331 |
+
**μ΅μ ꡬ쑰 μμ:**
|
| 1332 |
+
```
|
| 1333 |
+
[Intro] β [Verse] β [Prechorus] β [Chorus]
|
| 1334 |
+
β [Verse] β [Prechorus] β [Chorus]
|
| 1335 |
+
β [Bridge] β [Chorus] β [Outro]
|
| 1336 |
+
```
|
| 1337 |
|
| 1338 |
+
**νκ·Έλ³ μν :**
|
| 1339 |
+
- `[Chorus]` - κ°μ₯ μ€μ! 2-3ν λ°λ³΅, κΈ°μ΅μ λ¨λ ν
|
| 1340 |
+
- `[Prechorus]` - μ½λ¬μ€ μ ν
μ
λΉλμ
|
| 1341 |
+
- `[Bridge]` - κ°μ μ νμ , μ΅μ’
μ½λ¬μ€ μ λ°°μΉ
|
| 1342 |
+
""")
|
| 1343 |
+
|
| 1344 |
+
# ========== μ°μΈ‘: μμ
μμ± ==========
|
| 1345 |
+
with gr.Column(scale=1, min_width=400):
|
| 1346 |
+
gr.Markdown("## π΅ MUSIC GENERATOR", elem_classes="section-title")
|
| 1347 |
+
|
| 1348 |
+
# μμ ν둬ννΈ μ ν
|
| 1349 |
+
gr.Markdown("### π μμ ν둬ννΈ (ν΄λ¦νλ©΄ μλ μ
λ ₯)")
|
| 1350 |
+
|
| 1351 |
+
example_dropdown = gr.Dropdown(
|
| 1352 |
+
label="π― μμ μ ν",
|
| 1353 |
+
choices=[
|
| 1354 |
+
"π€ A Cappella - μμ 보컬 νλͺ¨λ",
|
| 1355 |
+
"π₯ Group Harmony - νμν ν©μ°½",
|
| 1356 |
+
"π· Jazz Duet - λ¨λ
λμ£",
|
| 1357 |
+
"πΈ Multi-Style - μ€νμΌ μ ν",
|
| 1358 |
+
"π Urban Chill - R&B",
|
| 1359 |
+
"πΉ Jazz Club - λΌμ΄λΈ",
|
| 1360 |
+
"πͺ© Retro Disco - 80λ
λ",
|
| 1361 |
+
"π¬ Film Score - μλ€λ§ν±",
|
| 1362 |
+
"π΅ K-Pop Dance - κ³ μλμ§",
|
| 1363 |
+
"π» Orchestral Ballad - μ
μ₯",
|
| 1364 |
+
"π₯ HeartMuLa Default - κΈ°λ³Έ"
|
| 1365 |
+
],
|
| 1366 |
+
value=None,
|
| 1367 |
+
interactive=True
|
| 1368 |
+
)
|
| 1369 |
+
|
| 1370 |
+
gr.Markdown("### ποΈ ν둬ννΈ μ€μ ")
|
| 1371 |
+
|
| 1372 |
+
base_prompt = gr.Textbox(
|
| 1373 |
+
label="π‘ κΈ°λ³Έ μμ΄λμ΄",
|
| 1374 |
+
placeholder="μνλ μμ
μ€νμΌμ κ°λ¨ν μ€λͺ
νμΈμ...",
|
| 1375 |
+
lines=2
|
| 1376 |
+
)
|
| 1377 |
+
|
| 1378 |
+
with gr.Row():
|
| 1379 |
+
music_genre = gr.Dropdown(
|
| 1380 |
+
label="πΈ μ₯λ₯΄",
|
| 1381 |
+
choices=["K-Pop", "Pop", "R&B/Soul", "Hip-Hop/Trap", "EDM/House",
|
| 1382 |
+
"Rock", "Ballad", "Jazz", "Blues", "Lo-Fi", "Disco",
|
| 1383 |
+
"Cinematic", "Classical"],
|
| 1384 |
+
value="K-Pop"
|
| 1385 |
+
)
|
| 1386 |
+
music_mood = gr.Dropdown(
|
| 1387 |
+
label="π« λΆμκΈ°",
|
| 1388 |
+
choices=["Energetic", "Chill", "Emotional", "Dark", "Uplifting",
|
| 1389 |
+
"Romantic", "Aggressive", "Dreamy", "Confident", "Peaceful",
|
| 1390 |
+
"Nostalgic", "Epic"],
|
| 1391 |
+
value="Energetic"
|
| 1392 |
+
)
|
| 1393 |
+
|
| 1394 |
+
with gr.Row():
|
| 1395 |
+
music_tempo = gr.Dropdown(
|
| 1396 |
+
label="β±οΈ ν
ν¬",
|
| 1397 |
+
choices=["Very Slow (50-70 BPM)", "Slow (70-90 BPM)", "Medium (90-110 BPM)",
|
| 1398 |
+
"Fast (110-130 BPM)", "Very Fast (130-150 BPM)"],
|
| 1399 |
+
value="Medium (90-110 BPM)"
|
| 1400 |
+
)
|
| 1401 |
+
music_vocal = gr.Dropdown(
|
| 1402 |
+
label="π€ 보컬",
|
| 1403 |
+
choices=["Female (Clear)", "Female (Warm)", "Female (Powerful)",
|
| 1404 |
+
"Male (Smooth)", "Male (Deep)", "Male (Raspy)",
|
| 1405 |
+
"Male-Female Duet", "A Cappella", "Group/Choir",
|
| 1406 |
+
"Instrumental"],
|
| 1407 |
+
value="Group/Choir"
|
| 1408 |
+
)
|
| 1409 |
+
|
| 1410 |
+
music_instruments = gr.Textbox(
|
| 1411 |
+
label="πΉ μ
κΈ°/μ¬μ΄λ",
|
| 1412 |
+
placeholder="μ: piano, synthesizer, drums, bass, strings, saxophone...",
|
| 1413 |
+
value="piano,synthesizer,drums,bass,strings"
|
| 1414 |
+
)
|
| 1415 |
+
|
| 1416 |
+
music_reference = gr.Dropdown(
|
| 1417 |
+
label="π― λ νΌλ°μ€ μ€νμΌ",
|
| 1418 |
+
choices=["None", "A Cappella", "Jazz Club", "Urban R&B",
|
| 1419 |
+
"Retro Disco", "Film Score", "K-Pop Dance", "Orchestral Ballad"],
|
| 1420 |
+
value="None"
|
| 1421 |
+
)
|
| 1422 |
+
|
| 1423 |
+
augment_btn = gr.Button("π SOMA AUGMENT", variant="primary", size="lg")
|
| 1424 |
+
|
| 1425 |
+
augmented_prompt = gr.Textbox(
|
| 1426 |
+
label="β¨ μ¦κ°λ ν둬ννΈ (νΈμ§ κ°λ₯)",
|
| 1427 |
+
lines=5,
|
| 1428 |
+
placeholder="SOMAκ° μμ±ν κ³ νμ§ ν둬ννΈ..."
|
| 1429 |
+
)
|
| 1430 |
+
|
| 1431 |
+
style_tags = gr.Textbox(
|
| 1432 |
+
label="π·οΈ μ€νμΌ νκ·Έ (HeartMuLa ν¬λ§·: μ½€λ§ κ΅¬λΆ, 곡백 μμ)",
|
| 1433 |
+
placeholder="piano,happy,pop,upbeat,romantic",
|
| 1434 |
+
lines=1,
|
| 1435 |
+
elem_classes="tag-input"
|
| 1436 |
+
)
|
| 1437 |
+
|
| 1438 |
+
with gr.Accordion("βοΈ μμ± μ€μ ", open=False):
|
| 1439 |
+
model_select = gr.Dropdown(
|
| 1440 |
+
label="π€ λͺ¨λΈ", choices=["music-2.5"], value="music-2.5"
|
| 1441 |
+
)
|
| 1442 |
+
with gr.Row():
|
| 1443 |
+
sample_rate = gr.Dropdown(label="Sample Rate", choices=[44100], value=44100)
|
| 1444 |
+
bitrate = gr.Dropdown(label="Bitrate", choices=[128000, 192000, 256000], value=256000)
|
| 1445 |
+
audio_format = gr.Dropdown(label="Format", choices=["mp3", "wav"], value="mp3")
|
| 1446 |
+
|
| 1447 |
+
generate_music_btn = gr.Button("πΆ GENERATE MUSIC!", variant="primary", size="lg", elem_classes="generate-btn")
|
| 1448 |
+
|
| 1449 |
+
music_status = gr.Textbox(label="π μν", interactive=False, max_lines=1, elem_classes="status-box")
|
| 1450 |
+
music_output = gr.Audio(label="π§ μμ±λ μμ
", type="filepath")
|
| 1451 |
+
|
| 1452 |
+
with gr.Accordion("π API μλ΅", open=False):
|
| 1453 |
+
json_output = gr.Code(label="JSON Response", language="json", lines=6)
|
| 1454 |
+
|
| 1455 |
+
# ========== νλ¨: κ°μ΄λ ==========
|
| 1456 |
+
with gr.Accordion("π HeartMuLa & MiniMax κ°μ΄λ", open=False):
|
| 1457 |
+
gr.Markdown(f"""
|
| 1458 |
+
{HEARTMULA_TAG_GUIDE}
|
| 1459 |
+
|
| 1460 |
+
---
|
| 1461 |
|
| 1462 |
+
{HEARTMULA_LYRICS_STRUCTURE}
|
| 1463 |
+
""")
|
| 1464 |
+
|
| 1465 |
+
# ========== Event Handlers ==========
|
| 1466 |
+
|
| 1467 |
+
# μμ Dropdown μ ν
|
| 1468 |
+
example_dropdown.change(
|
| 1469 |
+
fn=load_example_from_dropdown,
|
| 1470 |
+
inputs=[example_dropdown],
|
| 1471 |
+
outputs=[augmented_prompt, style_tags]
|
| 1472 |
+
)
|
| 1473 |
+
|
| 1474 |
+
# λΉ λ₯Έ κ°μ¬ μμ±
|
| 1475 |
+
quick_btn.click(
|
| 1476 |
+
fn=quick_lyrics,
|
| 1477 |
+
inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_vocal_type, lyrics_additional],
|
| 1478 |
+
outputs=[final_lyrics]
|
| 1479 |
+
)
|
| 1480 |
+
|
| 1481 |
+
# SOMA κ°μ¬ μμ±
|
| 1482 |
+
soma_lyrics_btn.click(
|
| 1483 |
+
fn=generate_lyrics_soma,
|
| 1484 |
+
inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_vocal_type, lyrics_additional],
|
| 1485 |
+
outputs=[lyrics_status, step1_out, step2_out, step3_out, step4_out]
|
| 1486 |
+
).then(
|
| 1487 |
+
fn=lambda x: x,
|
| 1488 |
+
inputs=[step4_out],
|
| 1489 |
+
outputs=[final_lyrics]
|
| 1490 |
+
)
|
| 1491 |
+
|
| 1492 |
+
# SOMA ν둬ννΈ μ¦κ°
|
| 1493 |
+
augment_btn.click(
|
| 1494 |
+
fn=augment_prompt_soma,
|
| 1495 |
+
inputs=[groq_key, base_prompt, music_genre, music_mood, music_tempo, music_vocal, music_instruments, music_reference],
|
| 1496 |
+
outputs=[augmented_prompt, style_tags]
|
| 1497 |
+
)
|
| 1498 |
+
|
| 1499 |
+
# μμ
μμ±
|
| 1500 |
+
generate_music_btn.click(
|
| 1501 |
+
fn=generate_music,
|
| 1502 |
+
inputs=[minimax_key, model_select, augmented_prompt, final_lyrics, sample_rate, bitrate, audio_format],
|
| 1503 |
+
outputs=[music_output, music_status, json_output]
|
| 1504 |
+
)
|
| 1505 |
|
| 1506 |
|
| 1507 |
+
if __name__ == "__main__":
|
| 1508 |
+
demo.launch()
|