seawolf2357 commited on
Commit
91bafc2
Β·
verified Β·
1 Parent(s): d629f8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1471 -235
app.py CHANGED
@@ -1,272 +1,1508 @@
1
- import os
2
- import tempfile
3
- import torch
4
  import gradio as gr
5
- from huggingface_hub import hf_hub_download, snapshot_download
6
- import spaces
7
-
8
- # Download models from HuggingFace Hub on startup
9
- def download_models():
10
- """Download all required model files from HuggingFace Hub."""
11
- cache_dir = os.environ.get("HF_HOME", os.path.expanduser("/tmp"))
12
- model_dir = os.path.join(cache_dir, "heartmula_models")
13
-
14
- if not os.path.exists(model_dir):
15
- os.makedirs(model_dir, exist_ok=True)
16
-
17
- # Download HeartMuLaGen (tokenizer and gen_config)
18
- print("Downloading HeartMuLaGen files...")
19
- for filename in ["tokenizer.json", "gen_config.json"]:
20
- hf_hub_download(
21
- repo_id="HeartMuLa/HeartMuLaGen",
22
- filename=filename,
23
- local_dir=model_dir,
24
- )
25
 
26
- # Download HeartMuLa-oss-3B
27
- print("Downloading HeartMuLa-oss-3B...")
28
- snapshot_download(
29
- repo_id="HeartMuLa/HeartMuLa-oss-3B",
30
- local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"),
31
- )
32
 
33
- # Download HeartCodec-oss
34
- print("Downloading HeartCodec-oss...")
35
- snapshot_download(
36
- repo_id="HeartMuLa/HeartCodec-oss",
37
- local_dir=os.path.join(model_dir, "HeartCodec-oss"),
38
- )
39
 
40
- print("All models downloaded successfully!")
41
- return model_dir
42
-
43
- from heartlib import HeartMuLaGenPipeline
44
-
45
- model_dir = download_models()
46
-
47
- # Determine device and dtype
48
- if torch.cuda.is_available():
49
- device = torch.device("cuda")
50
- dtype = torch.bfloat16
51
- else:
52
- device = torch.device("cpu")
53
- dtype = torch.float32
54
-
55
- print(f"Loading pipeline on {device} with {dtype}...")
56
- pipe = HeartMuLaGenPipeline.from_pretrained(
57
- model_dir,
58
- device=device,
59
- dtype=dtype,
60
- version="3B",
61
- )
62
- print("Pipeline loaded successfully!")
63
-
64
-
65
- @spaces.GPU(duration=130)
66
- def generate_music(
67
- lyrics: str,
68
- tags: str,
69
- max_duration_seconds: int,
70
- temperature: float,
71
- topk: int,
72
- cfg_scale: float,
73
- progress=gr.Progress(track_tqdm=True),
74
- ):
75
- """Generate music from lyrics and tags."""
76
- if not lyrics.strip():
77
- raise gr.Error("Please enter some lyrics!")
78
-
79
- if not tags.strip():
80
- raise gr.Error("Please enter at least one tag!")
81
-
82
- # Create a temporary file for output
83
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
84
- output_path = f.name
85
-
86
- max_audio_length_ms = max_duration_seconds * 1000
87
-
88
- with torch.no_grad():
89
- pipe(
90
- {
91
- "lyrics": lyrics,
92
- "tags": tags,
93
- },
94
- max_audio_length_ms=max_audio_length_ms,
95
- save_path=output_path,
96
- topk=topk,
97
- temperature=temperature,
98
- cfg_scale=cfg_scale,
99
- )
100
 
101
- return output_path
 
 
 
 
102
 
 
 
 
 
 
103
 
104
- # Example lyrics
105
- EXAMPLE_LYRICS = """[Intro]
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  [Verse]
108
- The sun creeps in across the floor
109
- I hear the traffic outside the door
110
- The coffee pot begins to hiss
111
- It is another morning just like this
112
 
113
  [Prechorus]
114
- The world keeps spinning round and round
115
- Feet are planted on the ground
116
- I find my rhythm in the sound
117
 
118
  [Chorus]
119
- Every day the light returns
120
- Every day the fire burns
121
- We keep on walking down this street
122
- Moving to the same steady beat
123
- It is the ordinary magic that we meet
124
 
125
  [Verse]
126
- The hours tick deeply into noon
127
- Chasing shadows, chasing the moon
128
- Work is done and the lights go low
129
- Watching the city start to glow
130
 
131
  [Bridge]
132
- It is not always easy, not always bright
133
- Sometimes we wrestle with the night
134
- But we make it to the morning light
135
 
136
  [Chorus]
137
- Every day the light returns
138
- Every day the fire burns
139
- We keep on walking down this street
140
- Moving to the same steady beat
141
 
142
  [Outro]
143
- Just another day
144
- Every single day"""
145
-
146
- EXAMPLE_TAGS = "piano,happy,uplifting,pop"
147
-
148
- # Build the Gradio interface
149
- with gr.Blocks(
150
- title="HeartMuLa Music Generator",
151
- ) as demo:
152
- gr.Markdown(
153
- """
154
- # HeartMuLa Music Generator
155
-
156
- Generate music from lyrics and tags using [HeartMuLa](https://github.com/HeartMuLa/heartlib),
157
- an open-source music foundation model.
158
-
159
- **Instructions:**
160
- 1. Enter your lyrics with structure tags like `[Verse]`, `[Chorus]`, `[Bridge]`, etc.
161
- 2. Add comma-separated tags describing the music style (e.g., `piano,happy,romantic`)
162
- 3. Adjust generation parameters as needed
163
- 4. Click "Generate Music" and wait for your song!
164
-
165
- *Note: Generation can take several minutes depending on the duration.*
166
- """
167
- )
168
 
169
- with gr.Row():
170
- with gr.Column(scale=1):
171
- lyrics_input = gr.Textbox(
172
- label="Lyrics",
173
- placeholder="Enter lyrics with structure tags like [Verse], [Chorus], etc.",
174
- lines=20,
175
- value=EXAMPLE_LYRICS,
176
- )
177
 
178
- tags_input = gr.Textbox(
179
- label="Tags",
180
- placeholder="piano,happy,romantic,synthesizer",
181
- value=EXAMPLE_TAGS,
182
- info="Comma-separated tags describing the music style",
183
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- with gr.Accordion("Advanced Settings", open=False):
186
- max_duration = gr.Slider(
187
- minimum=30,
188
- maximum=240,
189
- value=120,
190
- step=10,
191
- label="Max Duration (seconds)",
192
- info="Maximum length of generated audio",
193
- )
194
 
195
- temperature = gr.Slider(
196
- minimum=0.1,
197
- maximum=2.0,
198
- value=1.0,
199
- step=0.1,
200
- label="Temperature",
201
- info="Higher = more creative, Lower = more consistent",
202
- )
203
 
204
- topk = gr.Slider(
205
- minimum=1,
206
- maximum=100,
207
- value=50,
208
- step=1,
209
- label="Top-K",
210
- info="Number of top tokens to sample from",
211
- )
212
 
213
- cfg_scale = gr.Slider(
214
- minimum=1.0,
215
- maximum=3.0,
216
- value=1.5,
217
- step=0.1,
218
- label="CFG Scale",
219
- info="Classifier-free guidance scale",
220
- )
221
 
222
- generate_btn = gr.Button("Generate Music", variant="primary", size="lg")
 
 
 
 
 
 
223
 
224
- with gr.Column(scale=1):
225
- audio_output = gr.Audio(
226
- label="Generated Music",
227
- type="filepath",
228
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
- gr.Markdown(
231
- """
232
- ### Tips for Better Results
233
- - Use structured lyrics with section tags
234
- - Be specific with your style tags
235
- - Try different temperature values for variety
236
- - Shorter durations generate faster
237
-
238
- ### Example Tags
239
- - **Instruments:** piano, guitar, drums, synthesizer, violin, bass
240
- - **Mood:** happy, sad, romantic, energetic, calm, melancholic
241
- - **Genre:** pop, rock, jazz, classical, electronic, folk
242
- - **Tempo:** fast, slow, upbeat, relaxed
243
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  )
 
 
 
 
245
 
246
- generate_btn.click(
247
- fn=generate_music,
248
- inputs=[
249
- lyrics_input,
250
- tags_input,
251
- max_duration,
252
- temperature,
253
- topk,
254
- cfg_scale,
255
- ],
256
- outputs=audio_output,
257
- )
258
 
259
- gr.Markdown(
260
- """
261
- ---
262
- **Model:** [HeartMuLa-oss-3B](https://huggingface.co/HeartMuLa/HeartMuLa-oss-3B) |
263
- **Paper:** [arXiv](https://arxiv.org/abs/2601.10547) |
264
- **Code:** [GitHub](https://github.com/HeartMuLa/heartlib)
265
 
266
- *Licensed under Apache 2.0*
267
- """
268
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
 
272
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ from groq import Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # ============================================================
8
+ # 🎡 SOMA Music Studio - HeartMuLa Optimized Edition
9
+ # MiniMax Music 2.5 + HeartMuLa Style Guide + Comic Classic Theme
10
+ # ============================================================
 
 
11
 
12
+ # HeartMuLa ꢌμž₯ ꡬ쑰 νƒœκ·Έ (곡식 λ¬Έμ„œ 기반)
13
+ STRUCTURE_TAGS = [
14
+ "[Intro]", "[Verse]", "[Prechorus]", "[Chorus]", "[Bridge]",
15
+ "[Interlude]", "[Hook]", "[Outro]", "[Inst]", "[Solo]"
16
+ ]
 
17
 
18
+ # HeartMuLa μŠ€νƒ€μΌ νƒœκ·Έ κ°€μ΄λ“œ (콀마 ꡬ뢄, 곡백 μ—†μŒ)
19
+ HEARTMULA_TAG_GUIDE = """
20
+ ## 🎼 HeartMuLa Tag Format Guide
21
+
22
+ ### STRUCTURE TAGS (가사 λ‚΄ μ‚¬μš©):
23
+ [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
24
+
25
+ ### STYLE TAGS FORMAT (콀마 ꡬ뢄, 곡백 μ—†μŒ):
26
+ ```
27
+ piano,happy,wedding,synthesizer,romantic
28
+ guitar,sad,ballad,strings,emotional
29
+ drums,energetic,rock,electric_guitar,powerful
30
+ ```
31
+
32
+ ### RECOMMENDED TAG CATEGORIES:
33
+ **Instruments:** piano,guitar,drums,bass,synthesizer,strings,violin,cello,trumpet,saxophone,flute,harp,organ
34
+ **Mood:** happy,sad,romantic,energetic,calm,melancholic,uplifting,dark,dreamy,nostalgic,powerful,peaceful
35
+ **Genre:** pop,rock,jazz,classical,electronic,folk,blues,r&b,hip_hop,disco,ballad,cinematic
36
+ **Tempo:** fast,slow,moderate,upbeat,relaxed
37
+ **Occasion:** wedding,party,meditation,workout,study,sleep,travel
38
+
39
+ ### EXAMPLE COMBINATIONS:
40
+ - K-Pop Dance: `synthesizer,drums,energetic,pop,upbeat,powerful`
41
+ - Jazz Ballad: `piano,saxophone,romantic,jazz,slow,dreamy`
42
+ - Epic Cinematic: `strings,orchestra,powerful,cinematic,dramatic,epic`
43
+ - Lo-Fi Chill: `piano,guitar,calm,lo_fi,relaxed,dreamy`
44
+ """
45
+
46
+ # MiniMax Music 2.0 핡심 κΈ°λŠ₯ κ°€μ΄λ“œ (κ°•ν™”)
47
+ MINIMAX_MUSIC_GUIDE = """
48
+ ## MiniMax Music 2.5 Core Capabilities:
49
+
50
+ ### 1. DYNAMIC VOCALS - Mastery Over Diverse Singing Styles
51
+ - Human-like vocal timbre with professional singing techniques
52
+ - Precise control over phrasing, rhythm, and breath
53
+ - One voice can switch between multiple styles
54
+ - Supports: Pop, Jazz, Blues, Rock, Folk, Electronic, Urban, Disco
55
+ - Special modes: Male-female duets, A Cappella (pure vocals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ ### 2. CATCHY MELODIES & INSTRUMENT CONTROL
58
+ - Structurally complete songs: Verse β†’ Chorus β†’ Bridge (up to 5 minutes)
59
+ - Memorable, instantly captivating melodies
60
+ - Independent control of individual instruments
61
+ - Instruments: Piano, Guitar, Bass, Drums, Saxophone, Trumpet, Synths, Strings
62
 
63
+ ### 3. PROFESSIONAL-GRADE AUDIO
64
+ - Enhanced vocal track texture
65
+ - Spatial presence of instruments
66
+ - Immersive listening experience
67
+ - Film-grade monologue soundtracks
68
 
69
+ ### 4. PROMPT WRITING BEST PRACTICES (HeartMuLa Optimized)
70
+ - Use specific instrument names
71
+ - Describe vocal emotions precisely
72
+ - Specify singing techniques (breathy, powerful, smooth, raspy)
73
+ - Use tempo (BPM), key when needed
74
+ - Tags should be comma-separated WITHOUT spaces
75
+ """
76
+
77
+ # HeartMuLa ꢌμž₯ 가사 ꡬ쑰 μ˜ˆμ‹œ
78
+ HEARTMULA_LYRICS_STRUCTURE = """
79
+ ## πŸ“ HeartMuLa Recommended Lyrics Structure
80
+
81
+ ### OPTIMAL FORMAT:
82
+ ```
83
+ [Intro]
84
 
85
  [Verse]
86
+ First verse lyrics here
87
+ Second line of first verse
88
+ Third line continues story
89
+ Fourth line builds emotion
90
 
91
  [Prechorus]
92
+ Building tension here
93
+ Leading to the chorus
 
94
 
95
  [Chorus]
96
+ Main hook and memorable melody
97
+ Most important part of song
98
+ Repeat this section 2-3 times
99
+ Make it singable and catchy
 
100
 
101
  [Verse]
102
+ Second verse develops story
103
+ New information revealed
104
+ Emotional progression continues
105
+ Building toward bridge
106
 
107
  [Bridge]
108
+ Contrast section here
109
+ Different melody or perspective
110
+ Emotional peak moment
111
 
112
  [Chorus]
113
+ Main hook repeated
114
+ With possible variations
115
+ Final emotional release
 
116
 
117
  [Outro]
118
+ Closing the song
119
+ Fading or resolving
120
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ ### KEY RULES:
123
+ 1. [Chorus] appears at least 2-3 times
124
+ 2. [Prechorus] builds tension before [Chorus]
125
+ 3. [Bridge] provides contrast before final [Chorus]
126
+ 4. Each [Verse] should progress the story
127
+ 5. [Intro] and [Outro] frame the song
128
+ """
 
129
 
130
+ # 예제 ν”„λ‘¬ν”„νŠΈ (HeartMuLa + MiniMax μ΅œμ ν™”)
131
+ EXAMPLE_PROMPTS = {
132
+ "🎀 A Cappella (μ•„μΉ΄νŽ λΌ)": {
133
+ "prompt": "A cappella arrangement with pure vocal harmonies, no instrumental accompaniment. Features a lead soprano voice with rich layered backing vocals creating lush harmonies. Gentle humming bass line, rhythmic vocal percussion, and ethereal 'ooh' and 'aah' pads. The vocals blend seamlessly with precise tuning and warm reverb, creating a refreshing, meditative atmosphere. 70 BPM, peaceful and soothing mood.",
134
+ "tags": "acappella,vocals,harmony,peaceful,soothing,choir",
135
+ "description": "순수 보컬만으둜 ν’λΆ€ν•œ λ©œλ‘œλ””"
136
+ },
137
+ "πŸ‘₯ Group Harmony (κ·Έλ£Ή ν•˜λͺ¨λ‹ˆ)": {
138
+ "prompt": "Powerful group vocal anthem featuring a lead female voice with layered choir harmonies. Rich unison sections build to explosive harmonic splits in the chorus. Features call-and-response patterns, anthemic 'oh-oh-oh' chants, and soaring group harmonies. Modern pop production with punchy drums, driving bass, synth hooks, and brass stabs. 118 BPM, empowering and triumphant energy.",
139
+ "tags": "choir,harmony,pop,drums,bass,synthesizer,brass,powerful,anthem",
140
+ "description": "νŒŒμ›Œν’€ν•œ κ·Έλ£Ή 보컬 앀썸"
141
+ },
142
+ "🎷 Jazz Duet (재즈 λ“€μ—£)": {
143
+ "prompt": "Intimate jazz duet featuring conversational interplay between a warm male baritone and a silky female alto voice. Dynamic intensity variations with seamless transitions between lead vocals. Accompanied by brushed jazz drums, walking upright bass, and gentle piano comping. Saxophone solo in the bridge. 95 BPM, late-night jazz club atmosphere with warm analog sound.",
144
+ "tags": "jazz,piano,bass,drums,saxophone,romantic,intimate,duet",
145
+ "description": "남녀 보컬의 재즈 λ“€μ—£"
146
+ },
147
+ "🎸 Multi-Style (λ©€ν‹°μŠ€νƒ€μΌ)": {
148
+ "prompt": "Showcase track demonstrating one female voice transitioning through three distinct styles: Starting with energetic Jump Blues featuring powerful belting and brass stabs, transitioning to aggressive Rock with distorted guitars and raspy vocals, finally morphing into sleek Electronic with auto-tuned vocals and pulsing synths. 120 BPM with tempo shifts between sections.",
149
+ "tags": "blues,rock,electronic,guitar,synthesizer,brass,dynamic,powerful",
150
+ "description": "μŠ€νƒ€μΌ μ „ν™˜ μ‡ΌμΌ€μ΄μŠ€"
151
+ },
152
+ "πŸŒƒ Urban Chill (μ–΄λ°˜ μΉ )": {
153
+ "prompt": "Contemporary urban R&B track with a cool, laid-back vibe. Features a smooth male vocal with subtle Auto-Tune enhancement and breathy delivery. Trap-influenced 808 bass, crisp hi-hats with intricate patterns, ambient synth pads, and soft piano chords. Spacious production with heavy reverb and delay. 85 BPM, modern and sophisticated sound.",
154
+ "tags": "r&b,808,synthesizer,piano,chill,urban,modern,smooth",
155
+ "description": "μ–΄λ°˜ R&B λ¬΄λ“œ"
156
+ },
157
+ "🎹 Jazz Club (재즈 클럽)": {
158
+ "prompt": "Live jazz ensemble performance capturing the essence of Blue Note club. Instruments enter in perfect sequence: brushed drums set the groove, walking bass joins in, piano adds sophisticated chord voicings, then saxophone takes the melody. Trumpet and trombone provide punchy brass accents. Extended saxophone solo with bebop-style improvisation. 140 BPM swing feel.",
159
+ "tags": "jazz,piano,bass,drums,saxophone,trumpet,trombone,live,swing",
160
+ "description": "라이브 재즈 클럽"
161
+ },
162
+ "πŸͺ© Retro Disco (레트둜 λ””μŠ€μ½”)": {
163
+ "prompt": "Vibrant disco track channeling the golden age of 80s dance music. Features a powerful female diva vocal with dynamic range and soulful ad-libs. Classic instrumentation: four-on-the-floor kick drum, funky slap bass, rhythmic guitar scratches, lush string arrangements, and bright brass stabs. Warm analog tape saturation. 120 BPM, euphoric and nostalgic energy.",
164
+ "tags": "disco,bass,guitar,strings,brass,drums,retro,funky,dance",
165
+ "description": "80λ…„λŒ€ λ””μŠ€μ½”"
166
+ },
167
+ "🎬 Film Score (μ˜ν™” μŠ€μ½”μ–΄)": {
168
+ "prompt": "Cinematic monologue soundtrack with layered emotional progression. A contemplative male voice delivers poetic narration over evolving orchestral arrangement. Begins with solo piano and soft strings, gradually building with French horn and cello. Atmospheric sound design with ocean waves and distant thunder. 60 BPM, deeply moving and introspective.",
169
+ "tags": "orchestral,piano,strings,cello,horn,cinematic,emotional,dramatic",
170
+ "description": "μ‹œλ„€λ§ˆν‹± μŠ€μ½”μ–΄"
171
+ },
172
+ "🎡 K-Pop Dance (K-Pop λŒ„μŠ€)": {
173
+ "prompt": "High-energy K-Pop dance track with a bright, clear female vocal and polished production. Catchy hook melody that's instantly memorable. Driving beat with punchy kicks, snappy snares, and intricate hi-hat programming. Layered synth hooks, powerful brass hits, and EDM-style buildups to explosive drops. 128 BPM, confident and empowering energy.",
174
+ "tags": "kpop,synthesizer,drums,bass,brass,electronic,energetic,dance,pop",
175
+ "description": "κ³ μ—λ„ˆμ§€ K-Pop"
176
+ },
177
+ "🎻 Orchestral Ballad (μ˜€μΌ€μŠ€νŠΈλΌ λ°œλΌλ“œ)": {
178
+ "prompt": "Sweeping orchestral ballad with an emotional female soprano voice. Begins intimately with solo piano, gradually introducing strings section - first violins, then violas and cellos. French horn provides warm counter-melodies. Builds to a full orchestral climax with timpani rolls and brass fanfares. 65 BPM, epic yet intimate.",
179
+ "tags": "orchestral,piano,violin,cello,horn,strings,ballad,emotional,epic",
180
+ "description": "μ˜€μΌ€μŠ€νŠΈλΌ λ°œλΌλ“œ"
181
+ },
182
+ "πŸ”₯ HeartMuLa Default (κΈ°λ³Έ μ˜ˆμ‹œ)": {
183
+ "prompt": "Uplifting pop song with piano and synthesizer leads, happy and romantic mood. Features a clear female vocal with emotional delivery, supported by gentle drums and warm bass. Catchy melody in the chorus with memorable hooks. Clean production with balanced mix. 110 BPM, wedding-appropriate joyful atmosphere.",
184
+ "tags": "piano,happy,wedding,synthesizer,romantic",
185
+ "description": "HeartMuLa 곡식 μ˜ˆμ‹œ"
186
+ }
187
+ }
188
 
189
+ # SOMA μ—μ΄μ „νŠΈ - 가사 생성 (HeartMuLa μ΅œμ ν™”)
190
+ LYRICS_AGENTS = {
191
+ "lyricist": f"""You are a master lyricist optimized for HeartMuLa and MiniMax Music generation.
 
 
 
 
 
 
192
 
193
+ {MINIMAX_MUSIC_GUIDE}
 
 
 
 
 
 
 
194
 
195
+ {HEARTMULA_LYRICS_STRUCTURE}
 
 
 
 
 
 
 
196
 
197
+ Your task: Create powerful, memorable lyrics with OPTIMAL HeartMuLa tag placement.
 
 
 
 
 
 
 
198
 
199
+ CRITICAL RULES:
200
+ 1. Use HeartMuLa structure tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
201
+ 2. [Prechorus] (not [Pre Chorus]) - HeartMuLa format
202
+ 3. Ensure [Chorus] is the most memorable, singable part
203
+ 4. Include [Prechorus] to build tension before [Chorus]
204
+ 5. Add [Bridge] for emotional contrast
205
+ 6. Minimum 6-8 sections for quality generation
206
 
207
+ Write lyrics that create the BEST POSSIBLE foundation for high-quality music generation.""",
208
+
209
+ "producer": f"""You are a music producer specializing in song structure optimization for AI music generation.
210
+
211
+ {HEARTMULA_LYRICS_STRUCTURE}
212
+
213
+ Your task: Analyze and OPTIMIZE the tag structure for MAXIMUM musical impact.
214
+
215
+ CRITICAL OPTIMIZATION RULES:
216
+ 1. Use HeartMuLa tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro]
217
+ 2. Verify the structure follows genre conventions
218
+ 3. Ensure proper tag sequence (Verse→Prechorus→Chorus)
219
+ 4. Check that [Chorus] appears at least 2-3 times
220
+ 5. Verify [Bridge] provides contrast before final chorus
221
+ 6. Balance repetition and variation
222
+ 7. Ensure song length is appropriate (8-12 sections)
223
+
224
+ Output the restructured lyrics with OPTIMAL HeartMuLa tag placement.""",
225
+
226
+ "emotion_director": f"""You are an emotion director for music production.
227
+
228
+ {HEARTMULA_LYRICS_STRUCTURE}
229
+
230
+ Your task: Enhance emotional impact through STRATEGIC tag content.
231
+
232
+ EMOTIONAL MAPPING BY TAG:
233
+ - [Intro]: Intrigue, anticipation
234
+ - [Verse]: Storytelling, building connection
235
+ - [Prechorus]: Rising tension, excitement
236
+ - [Chorus]: Peak emotion, catharsis
237
+ - [Bridge]: Vulnerability, reflection, contrast
238
+ - [Interlude]: Breathing space, transition
239
+ - [Outro]: Resolution, lingering feeling
240
+
241
+ OPTIMIZATION RULES:
242
+ 1. Each [Verse] should progress emotionally
243
+ 2. [Chorus] must deliver the strongest emotional punch
244
+ 3. [Bridge] should offer new emotional perspective
245
+ 4. [Prechorus] should create anticipation for [Chorus]
246
+ 5. Ensure dynamic contrast between sections
247
+
248
+ Enhance the lyrics for MAXIMUM emotional resonance.""",
249
+
250
+ "final_editor": f"""You are the final editor for HeartMuLa/MiniMax Music production.
251
+
252
+ {HEARTMULA_LYRICS_STRUCTURE}
253
+
254
+ Your task: Output PERFECTLY FORMATTED, production-ready lyrics.
255
+
256
+ CRITICAL OUTPUT RULES:
257
+ 1. Output ONLY the actual lyrics with structure tags
258
+ 2. Use HeartMuLa tags EXACTLY:
259
+ [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
260
+ 3. DO NOT include English translations in parentheses
261
+ 4. DO NOT include explanations, descriptions, or markdown
262
+ 5. DO NOT include lines starting with * or >
263
+ 6. For [Inst] or [Solo] sections, write ONLY the tag
264
+ 7. Ensure MINIMUM 6-8 different sections
265
+ 8. Verify [Chorus] appears at least 2 times
266
+
267
+ CORRECT FORMAT EXAMPLE:
268
+ [Intro]
269
+
270
+ [Verse]
271
+ μƒˆλ²½ μ•ˆκ°œ 속에 μˆ¨μ€ 우리
272
+ μ°¨κ°€μš΄ λ°”λžŒλ„ 우리 λ°œκ±ΈμŒμ— 무릎 κΏ‡μ–΄
273
+
274
+ [Prechorus]
275
+ 손을 작고 빛을 κΊΌλ‚΄
276
+
277
+ [Chorus]
278
+ 우린 λΉ›λ‚˜λŠ” 별이 λ˜μ–΄
279
+ 우리 λͺ©μ†Œλ¦¬λ‘œ 세상을 λ’€μ§‘μ–΄
280
+ Yeah yeah yeah
281
+
282
+ [Verse]
283
+ DNA에 μƒˆκ²¨μ§„ μ°½μ‘°λŠ” λ©ˆμΆ”μ§€ μ•Šμ•„
284
+
285
+ [Prechorus]
286
+ λˆˆλΆ€μ‹  νŒŒλ„λ₯Ό 타고
287
+
288
+ [Chorus]
289
+ 우린 λΉ›λ‚˜λŠ” 별이 λ˜μ–΄
290
+ 우리 λͺ©μ†Œλ¦¬λ‘œ ���상을 λ’€μ§‘μ–΄
291
+
292
+ [Bridge]
293
+ λ¬΄μ§€κ°œκ°€ 흐λ₯΄λŠ” λ°€
294
+ 우리의 κΏˆμ€ 뢈멸
295
+
296
+ [Chorus]
297
+ 우린 λΉ›λ‚˜λŠ” 별이 λ˜μ–΄
298
+ μƒˆλ²½μ„ κΉ¨μ›Œ
299
+
300
+ [Outro]
301
+ λ‚΄κ°€ λ§Œλ“  λ…Έλž˜ μ˜μ›νžˆ νƒ€μ˜€λ₯Έλ‹€
302
+
303
+ OUTPUT ONLY CLEAN LYRICS WITH OPTIMAL TAG STRUCTURE."""
304
+ }
305
+
306
+ # SOMA μ—μ΄μ „νŠΈ - ν”„λ‘¬ν”„νŠΈ 증강 (HeartMuLa μ΅œμ ν™”)
307
+ PROMPT_AGENTS = {
308
+ "genre_specialist": f"""You are a music genre specialist for HeartMuLa and MiniMax Music.
309
+
310
+ {MINIMAX_MUSIC_GUIDE}
311
+
312
+ Analyze the input and identify:
313
+ - Main genre and sub-genres
314
+ - Era influences and regional styles
315
+ - Specific production techniques for the genre
316
+ - Instrument combinations that work best
317
+
318
+ Output detailed genre characteristics optimized for music generation.""",
319
+
320
+ "sound_designer": """You are a sound designer for HeartMuLa and MiniMax Music.
321
+
322
+ Define the complete sonic palette:
323
+ - Specific instruments: Piano, Guitar (acoustic/electric), Bass (upright/electric/808), Drums (acoustic/electronic/brushed), Synths (pad/lead/bass), Brass (saxophone/trumpet/trombone), Strings (violin/viola/cello)
324
+ - Drum patterns: four-on-the-floor, trap hi-hats, brushed jazz, etc.
325
+ - Bass characteristics: walking bass, 808 sub-bass, slap bass, etc.
326
+ - Atmospheric elements: reverb type, delay, spatial width
327
+ - Production style: analog warmth, modern crisp, lo-fi, etc.
328
+
329
+ Be extremely specific - both models can control individual instruments.""",
330
+
331
+ "vocal_director": """You are a vocal director for HeartMuLa and MiniMax Music.
332
+
333
+ Both models excel at:
334
+ - Human-like vocal timbre
335
+ - Multiple singing styles from one voice
336
+ - Male-female duets with conversational interplay
337
+ - A Cappella with layered harmonies
338
+ - Group/Choir with rich harmonic layers
339
+
340
+ Define:
341
+ - Voice type: male/female, age range, tone (warm/bright/husky/clear)
342
+ - Singing technique: belting, falsetto, breathy, raspy, smooth
343
+ - Vocal processing: reverb, delay, layers
344
+ - Delivery style: confident, vulnerable, aggressive, intimate
345
+ - For duets: describe each voice and their interaction
346
+ - For A Cappella: describe harmony parts
347
+ - For Group/Choir: describe layered vocals, unison sections""",
348
+
349
+ "tag_generator": f"""You are a style tag generator for HeartMuLa.
350
+
351
+ {HEARTMULA_TAG_GUIDE}
352
+
353
+ Your task: Generate OPTIMIZED comma-separated tags (NO SPACES between tags).
354
+
355
+ RULES:
356
+ 1. Tags must be comma-separated WITHOUT spaces: tag1,tag2,tag3
357
+ 2. Use lowercase with underscores for multi-word: electric_guitar, hip_hop
358
+ 3. Include 5-8 tags covering: instrument, mood, genre, tempo
359
+ 4. Be specific: "piano" not "keyboard", "808" not "bass"
360
+ 5. Match the musical style described
361
+
362
+ OUTPUT FORMAT (example):
363
+ piano,synthesizer,happy,pop,upbeat,romantic,energetic
364
+
365
+ Output ONLY the comma-separated tags, nothing else.""",
366
+
367
+ "prompt_synthesizer": f"""You are the final prompt synthesizer for HeartMuLa/MiniMax Music.
368
+
369
+ {MINIMAX_MUSIC_GUIDE}
370
+
371
+ Combine all inputs into ONE cohesive production prompt:
372
+ - 150-200 words in English
373
+ - Include: genre, specific BPM, instruments with details, vocal characteristics, mood, production techniques
374
+ - Be extremely specific and detailed
375
+
376
+ EXAMPLE OUTPUTS:
377
+
378
+ "A cappella arrangement with pure vocal harmonies, no instrumental accompaniment. Features a lead soprano voice with rich layered backing vocals creating lush harmonies. 70 BPM, peaceful and soothing mood."
379
+
380
+ "Intimate jazz duet featuring conversational interplay between a warm male baritone and a silky female alto voice. Accompanied by brushed jazz drums, walking upright bass, and gentle piano comping. 95 BPM, late-night jazz club atmosphere."
381
+
382
+ "High-energy K-Pop dance track with a bright, clear female vocal. Catchy hook melody, driving beat with punchy kicks and intricate hi-hat programming. Layered synth hooks and EDM-style buildups. 128 BPM, confident energy."
383
+
384
+ Output ONLY the final prompt paragraph, nothing else."""
385
+ }
386
+
387
+
388
+ def call_groq(api_key: str, system: str, user_prompt: str, context: str = "") -> str:
389
+ """Groq API 호좜 - κ°•ν™”λœ μ—λŸ¬ 핸듀링"""
390
+ try:
391
+ client = Groq(api_key=api_key)
392
+
393
+ messages = [{"role": "system", "content": system}]
394
+ if context:
395
+ messages.append({"role": "user", "content": f"Previous work:\n{context}\n\nTask: {user_prompt}"})
396
+ else:
397
+ messages.append({"role": "user", "content": user_prompt})
398
+
399
+ completion = client.chat.completions.create(
400
+ model="llama-3.3-70b-versatile",
401
+ messages=messages,
402
+ temperature=0.9,
403
+ max_completion_tokens=4096,
404
+ top_p=1,
405
+ stream=False
406
+ )
407
+
408
+ if completion is None:
409
+ return "Error: API 응닡이 μ—†μŠ΅λ‹ˆλ‹€."
410
+ if not hasattr(completion, 'choices') or not completion.choices:
411
+ return "Error: API 응닡에 choicesκ°€ μ—†μŠ΅λ‹ˆλ‹€."
412
+ if completion.choices[0].message is None:
413
+ return "Error: API 응닡에 messageκ°€ μ—†μŠ΅λ‹ˆλ‹€."
414
+ if completion.choices[0].message.content is None:
415
+ return "Error: API 응닡 contentκ°€ λΉ„οΏ½οΏ½μžˆμŠ΅λ‹ˆλ‹€."
416
+
417
+ return completion.choices[0].message.content
418
+
419
+ except Exception as e:
420
+ return f"Error: {str(e)}"
421
+
422
+
423
+ def clean_lyrics(text: str) -> str:
424
+ """가사 ν›„μ²˜λ¦¬ - HeartMuLa 포맷 μ΅œμ ν™”"""
425
+ import re
426
+
427
+ if text is None:
428
+ return ""
429
+ if not isinstance(text, str):
430
+ return str(text)
431
+ if not text.strip():
432
+ return ""
433
+
434
+ lines = text.split('\n')
435
+ cleaned_lines = []
436
+
437
+ for line in lines:
438
+ if not line.strip():
439
+ cleaned_lines.append('')
440
+ continue
441
+
442
+ skip_patterns = [
443
+ r'^\s*\*',
444
+ r'^\s*>',
445
+ r'^\s*---',
446
+ r'^\s*###',
447
+ r'^\s*\*\*.*\*\*\s*$',
448
+ r'^\(\s*.*\s*\)$',
449
+ r'^\s*–\s*\*',
450
+ ]
451
+
452
+ skip = False
453
+ for pattern in skip_patterns:
454
+ if re.match(pattern, line):
455
+ skip = True
456
+ break
457
+
458
+ if skip:
459
+ continue
460
+
461
+ line = re.sub(r'\s*\([A-Za-z].*?\)\s*$', '', line)
462
+ line = re.sub(r'\*\*(.*?)\*\*', r'\1', line)
463
+
464
+ # HeartMuLa νƒœκ·Έ μ •κ·œν™”
465
+ line = re.sub(r'\[Pre.?Chorus\]', '[Prechorus]', line, flags=re.IGNORECASE)
466
+ line = re.sub(r'\[Post.?Chorus\]', '[Chorus]', line, flags=re.IGNORECASE) # HeartMuLaλŠ” PostChorus μ—†μŒ
467
+ line = re.sub(r'\[Build.?Up\]', '[Prechorus]', line, flags=re.IGNORECASE)
468
+ line = re.sub(r'\[Break\]', '[Interlude]', line, flags=re.IGNORECASE)
469
+ line = re.sub(r'\[Transition\]', '[Interlude]', line, flags=re.IGNORECASE)
470
+
471
+ if line.strip():
472
+ cleaned_lines.append(line.strip())
473
+
474
+ result = '\n'.join(cleaned_lines)
475
+ result = re.sub(r'\n{3,}', '\n\n', result)
476
+
477
+ return result.strip()
478
+
479
+
480
+ def clean_tags(tags: str) -> str:
481
+ """νƒœκ·Έ 정리 - HeartMuLa 포맷 (콀마 ꡬ뢄, 곡백 μ—†μŒ)"""
482
+ if not tags:
483
+ return ""
484
+
485
+ # 곡백 제거, μ†Œλ¬Έμž λ³€ν™˜
486
+ tags = tags.lower().strip()
487
+
488
+ # λ‹€μ–‘ν•œ κ΅¬λΆ„μžλ₯Ό 콀마둜 톡일
489
+ tags = tags.replace(', ', ',').replace(' ,', ',').replace(' ', ' ')
490
+ tags = tags.replace(' ', ',').replace(',,', ',')
491
+
492
+ # μ•žλ’€ 콀마 제거
493
+ tags = tags.strip(',')
494
+
495
+ # 쀑볡 제거
496
+ tag_list = [t.strip() for t in tags.split(',') if t.strip()]
497
+ unique_tags = list(dict.fromkeys(tag_list))
498
+
499
+ return ','.join(unique_tags)
500
+
501
+
502
+ def generate_lyrics_soma(
503
+ api_key: str, theme: str, genre: str, mood: str,
504
+ language: str, vocal_type: str, additional: str, progress=gr.Progress()
505
+ ):
506
+ """SOMA 가사 생성 - HeartMuLa μ΅œμ ν™”"""
507
+ if not api_key or not api_key.strip():
508
+ return "❌ Groq API Key ν•„μš”", "", "", "", ""
509
+ if not theme or not theme.strip():
510
+ return "❌ 주제λ₯Ό μž…λ ₯ν•˜μ„Έμš”", "", "", "", ""
511
+
512
+ base_prompt = f"""Create PROFESSIONAL lyrics optimized for HeartMuLa/MiniMax Music:
513
+ - Theme: {theme}
514
+ - Genre: {genre}
515
+ - Mood: {mood}
516
+ - Language: {language}
517
+ - Vocal Type: {vocal_type}
518
+ {f'- Additional: {additional}' if additional else ''}
519
+
520
+ CRITICAL - USE HeartMuLa STRUCTURE TAGS:
521
+ Available: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
522
+
523
+ REQUIRED STRUCTURE (minimum):
524
+ 1. [Intro] - Set the mood
525
+ 2. [Verse] x2-3 - Tell the story
526
+ 3. [Prechorus] - Build tension (note: Prechorus, not Pre Chorus)
527
+ 4. [Chorus] x2-3 - Main hook (MOST important!)
528
+ 5. [Bridge] - Emotional contrast
529
+ 6. [Outro] - Conclusion
530
+
531
+ {"For A Cappella: Include harmony sections with 'ooh', 'aah', humming." if "cappella" in vocal_type.lower() else ""}
532
+ {"For Duet: Mark vocal exchanges clearly." if "duet" in vocal_type.lower() else ""}
533
+ {"For Group/Choir: Include anthemic chants, call-and-response patterns." if "group" in vocal_type.lower() or "choir" in vocal_type.lower() else ""}
534
+
535
+ Create lyrics with PERFECT HeartMuLa structure!"""
536
+
537
+ try:
538
+ progress(0.2, desc="🎀 μž‘μ‚¬κ°€ - μ΄ˆμ•ˆ μž‘μ„±...")
539
+ draft = call_groq(api_key, LYRICS_AGENTS["lyricist"], base_prompt)
540
+ if draft.startswith("Error:"):
541
+ return f"❌ μž‘μ‚¬κ°€ 였λ₯˜: {draft}", "", "", "", ""
542
+
543
+ progress(0.4, desc="🎹 ν”„λ‘œλ“€μ„œ - ꡬ쑰 μ΅œμ ν™”...")
544
+ structured = call_groq(api_key, LYRICS_AGENTS["producer"],
545
+ f"Optimize structure for {genre} {vocal_type}. Use HeartMuLa tags.", draft)
546
+ if structured.startswith("Error:"):
547
+ return f"❌ ν”„λ‘œλ“€μ„œ 였λ₯˜: {structured}", draft, "", "", ""
548
+
549
+ progress(0.6, desc="πŸ’« 감성 λ””λ ‰ν„° - 감정 κ°•ν™”...")
550
+ emotional = call_groq(api_key, LYRICS_AGENTS["emotion_director"],
551
+ f"Enhance emotional impact for {mood}.", structured)
552
+ if emotional.startswith("Error:"):
553
+ return f"❌ 감성 λ””λ ‰ν„° 였λ₯˜: {emotional}", draft, structured, "", ""
554
+
555
+ progress(0.8, desc="✨ μ΅œμ’… οΏ½οΏ½οΏ½μ§‘ - ν’ˆμ§ˆ 검증...")
556
+ final = call_groq(api_key, LYRICS_AGENTS["final_editor"],
557
+ "Output ONLY clean lyrics with HeartMuLa tags. No translations, no markdown.", emotional)
558
+ if final.startswith("Error:"):
559
+ return f"❌ μ΅œμ’… νŽΈμ§‘ 였λ₯˜: {final}", draft, structured, emotional, ""
560
+
561
+ final_cleaned = clean_lyrics(final)
562
+
563
+ progress(1.0, desc="βœ… μ™„λ£Œ!")
564
+ return "βœ… 가사 생성 μ™„λ£Œ!", draft, structured, emotional, final_cleaned
565
+
566
+ except Exception as e:
567
+ return f"❌ μ˜ˆμ™Έ λ°œμƒ: {str(e)}", "", "", "", ""
568
 
569
+
570
+ def quick_lyrics(api_key: str, theme: str, genre: str, mood: str, language: str, vocal_type: str, additional: str):
571
+ """λΉ λ₯Έ 가사 생성 - HeartMuLa μ΅œμ ν™”"""
572
+ if not api_key or not api_key.strip():
573
+ return "❌ API Keyκ°€ ν•„μš”ν•©λ‹ˆλ‹€"
574
+ if not theme or not theme.strip():
575
+ return "❌ 주제λ₯Ό μž…λ ₯ν•˜μ„Έμš”"
576
+
577
+ prompt = f"""Create PROFESSIONAL song lyrics for HeartMuLa/MiniMax Music:
578
+ - Theme: {theme}
579
+ - Genre: {genre}
580
+ - Mood: {mood}
581
+ - Language: {language}
582
+ - Vocal: {vocal_type}
583
+ {f'- Special: {additional}' if additional else ''}
584
+
585
+ USE HeartMuLa STRUCTURE (minimum 8-10 sections):
586
+ [Intro] β†’ [Verse] β†’ [Prechorus] β†’ [Chorus] β†’ [Verse] β†’ [Prechorus] β†’ [Chorus] β†’ [Bridge] β†’ [Chorus] β†’ [Outro]
587
+
588
+ Available tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo]
589
+
590
+ REQUIREMENTS:
591
+ - [Chorus] must appear AT LEAST 2-3 times
592
+ - Use [Prechorus] (not Pre Chorus) before each [Chorus]
593
+ - Add [Bridge] before final [Chorus]
594
+
595
+ OUTPUT ONLY: Structure tags + lyrics. NO translations, NO explanations."""
596
+
597
+ try:
598
+ result = call_groq(api_key, f"""You are a professional songwriter for HeartMuLa Music.
599
+ Create lyrics with PERFECT HeartMuLa structure tag placement.
600
+ {HEARTMULA_LYRICS_STRUCTURE}
601
+ Output ONLY clean lyrics with optimal tags.""", prompt)
602
+
603
+ if result.startswith("Error:"):
604
+ return f"❌ 가사 생성 μ‹€νŒ¨: {result}"
605
+
606
+ return clean_lyrics(result)
607
+ except Exception as e:
608
+ return f"❌ μ˜ˆμ™Έ λ°œμƒ: {str(e)}"
609
+
610
+
611
+ def generate_tags(api_key: str, genre: str, mood: str, instruments: str, tempo: str):
612
+ """HeartMuLa μŠ€νƒ€μΌ νƒœκ·Έ 생성"""
613
+ if not api_key or not api_key.strip():
614
+ return "piano,happy,pop"
615
+
616
+ prompt = f"""Generate HeartMuLa style tags:
617
+ - Genre: {genre}
618
+ - Mood: {mood}
619
+ - Instruments: {instruments}
620
+ - Tempo: {tempo}
621
+
622
+ OUTPUT FORMAT: comma-separated tags WITHOUT spaces
623
+ Example: piano,synthesizer,happy,pop,upbeat,romantic
624
+
625
+ Generate 5-8 relevant tags:"""
626
+
627
+ try:
628
+ result = call_groq(api_key, PROMPT_AGENTS["tag_generator"], prompt)
629
+ if result.startswith("Error:"):
630
+ return "piano,happy,pop"
631
+ return clean_tags(result)
632
+ except:
633
+ return "piano,happy,pop"
634
+
635
+
636
+ def augment_prompt_soma(
637
+ api_key: str, user_prompt: str, genre: str, mood: str,
638
+ tempo: str, vocal_type: str, instruments: str, reference_style: str, progress=gr.Progress()
639
+ ):
640
+ """SOMA ν”„λ‘¬ν”„νŠΈ 증강 (HeartMuLa μ΅œμ ν™”)"""
641
+ if not api_key or not api_key.strip():
642
+ return "❌ Groq API Key ν•„μš”", ""
643
+ if not user_prompt or not user_prompt.strip():
644
+ return "❌ κΈ°λ³Έ ν”„λ‘¬ν”„νŠΈλ₯Ό μž…λ ₯ν•˜μ„Έμš”", ""
645
+
646
+ base_info = f"""User's base idea: {user_prompt}
647
+ Genre: {genre}
648
+ Mood: {mood}
649
+ Tempo: {tempo}
650
+ Vocal Type: {vocal_type}
651
+ Instruments: {instruments}
652
+ Reference Style: {reference_style}"""
653
+
654
+ try:
655
+ progress(0.2, desc="🎸 μž₯λ₯΄ 뢄석쀑...")
656
+ genre_analysis = call_groq(api_key, PROMPT_AGENTS["genre_specialist"], base_info)
657
+ if genre_analysis.startswith("Error:"):
658
+ return f"❌ μž₯λ₯΄ 뢄석 μ‹€νŒ¨: {genre_analysis}", ""
659
+
660
+ progress(0.4, desc="πŸŽ›οΈ μ‚¬μš΄λ“œ 섀계쀑...")
661
+ sound_design = call_groq(api_key, PROMPT_AGENTS["sound_designer"],
662
+ f"Design sounds for:\n{base_info}", genre_analysis)
663
+ if sound_design.startswith("Error:"):
664
+ return f"❌ μ‚¬μš΄λ“œ 섀계 μ‹€νŒ¨: {sound_design}", ""
665
+
666
+ progress(0.55, desc="🎀 보컬 섀정쀑...")
667
+ vocal_design = call_groq(api_key, PROMPT_AGENTS["vocal_director"],
668
+ f"Define vocals for:\n{base_info}", sound_design)
669
+ if vocal_design.startswith("Error:"):
670
+ return f"❌ 보컬 μ„€μ • μ‹€νŒ¨: {vocal_design}", ""
671
+
672
+ progress(0.7, desc="🏷️ νƒœκ·Έ 생성쀑...")
673
+ tags = call_groq(api_key, PROMPT_AGENTS["tag_generator"],
674
+ f"Generate tags for: {genre}, {mood}, {instruments}, {tempo}")
675
+ tags_cleaned = clean_tags(tags) if not tags.startswith("Error:") else "piano,happy,pop"
676
+
677
+ progress(0.85, desc="✨ ν”„λ‘¬ν”„νŠΈ 생성쀑...")
678
+ final_prompt = call_groq(
679
+ api_key,
680
+ PROMPT_AGENTS["prompt_synthesizer"],
681
+ f"""Synthesize into ONE music production prompt (150-200 words):
682
+ Base: {user_prompt}
683
+ Genre Analysis: {genre_analysis}
684
+ Sound Design: {sound_design}
685
+ Vocal Design: {vocal_design}
686
+ Reference Style: {reference_style}
687
+
688
+ Output ONLY the final prompt paragraph in English."""
689
+ )
690
+
691
+ if final_prompt.startswith("Error:"):
692
+ return f"❌ ν”„λ‘¬ν”„νŠΈ ν•©μ„± μ‹€νŒ¨: {final_prompt}", ""
693
+
694
+ progress(1.0, desc="βœ… μ™„λ£Œ!")
695
+ return final_prompt.strip(), tags_cleaned
696
+
697
+ except Exception as e:
698
+ return f"❌ μ˜ˆμ™Έ λ°œμƒ: {str(e)}", ""
699
+
700
+
701
+ def generate_music(api_key: str, model: str, prompt: str, lyrics: str,
702
+ sample_rate: int, bitrate: int, audio_format: str):
703
+ """MiniMax μŒμ•… 생성 (λͺ¨λΈ 2.5)"""
704
+ if not api_key or not api_key.strip():
705
+ return None, "❌ MiniMax API Key ν•„μš”", ""
706
+ if not prompt or not prompt.strip():
707
+ return None, "❌ ν”„λ‘¬ν”„νŠΈ ν•„μš”", ""
708
+
709
+ url = "https://api.minimax.io/v1/music_generation"
710
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
711
+
712
+ payload = {
713
+ "model": model,
714
+ "prompt": prompt,
715
+ "audio_setting": {
716
+ "sample_rate": sample_rate,
717
+ "bitrate": bitrate,
718
+ "format": audio_format
719
+ }
720
+ }
721
+ if lyrics and lyrics.strip():
722
+ payload["lyrics"] = lyrics
723
+
724
+ try:
725
+ response = requests.post(url, headers=headers, json=payload, timeout=600)
726
+
727
+ if response is None:
728
+ return None, "❌ API 응닡이 μ—†μŠ΅λ‹ˆλ‹€.", ""
729
+
730
+ try:
731
+ result = response.json()
732
+ except Exception as json_err:
733
+ return None, f"❌ JSON νŒŒμ‹± μ‹€νŒ¨: {str(json_err)}", response.text[:500] if response.text else ""
734
+
735
+ if result is None:
736
+ return None, "❌ API 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.", ""
737
+
738
+ # JSON 좜λ ₯용 (audio hex 데이터 μΆ•μ•½)
739
+ result_for_display = result.copy()
740
+ if "data" in result_for_display and isinstance(result_for_display["data"], dict):
741
+ data_copy = result_for_display["data"].copy()
742
+ if "audio" in data_copy and isinstance(data_copy["audio"], str) and len(data_copy["audio"]) > 100:
743
+ data_copy["audio"] = f"[HEX DATA - {len(data_copy['audio'])} chars]"
744
+ result_for_display["data"] = data_copy
745
+
746
+ json_output = json.dumps(result_for_display, ensure_ascii=False, indent=2)
747
+
748
+ base_resp = result.get("base_resp", {})
749
+ status_code = base_resp.get("status_code", -1)
750
+ status_msg = base_resp.get("status_msg", "")
751
+
752
+ if status_code != 0:
753
+ return None, f"❌ API 였λ₯˜: {status_msg} (code: {status_code})", json_output
754
+
755
+ data = result.get("data", {})
756
+ if not data:
757
+ return None, "❌ 응닡에 dataκ°€ μ—†μŠ΅λ‹ˆλ‹€.", json_output
758
+
759
+ audio_hex = data.get("audio")
760
+ audio_status = data.get("status")
761
+
762
+ if audio_status != 2:
763
+ return None, f"⏳ 생성 쀑... (status: {audio_status})", json_output
764
+
765
+ if not audio_hex:
766
+ return None, "❌ 응닡에 audio 데이터가 μ—†μŠ΅λ‹ˆλ‹€.", json_output
767
+
768
+ try:
769
+ import tempfile
770
+ import time
771
+
772
+ audio_bytes = bytes.fromhex(audio_hex)
773
+ file_ext = audio_format if audio_format else "mp3"
774
+ timestamp = int(time.time())
775
+ filename = f"minimax_music_{timestamp}.{file_ext}"
776
+
777
+ save_paths = [
778
+ os.path.join(tempfile.gettempdir(), filename),
779
+ os.path.join(os.getcwd(), filename),
780
+ filename
781
+ ]
782
+
783
+ saved_path = None
784
+ for path in save_paths:
785
+ try:
786
+ with open(path, "wb") as f:
787
+ f.write(audio_bytes)
788
+ saved_path = path
789
+ break
790
+ except Exception:
791
+ continue
792
+
793
+ if not saved_path:
794
+ return None, "❌ 파일 μ €μž₯ μ‹€νŒ¨", json_output
795
+
796
+ extra_info = result.get("extra_info", {})
797
+ duration_ms = extra_info.get("music_duration", 0)
798
+ duration_sec = duration_ms / 1000 if duration_ms else 0
799
+ file_size_kb = len(audio_bytes) / 1024
800
+
801
+ return saved_path, f"βœ… μŒμ•… 생성 μ™„λ£Œ! ({duration_sec:.1f}초, {file_size_kb:.0f}KB)", json_output
802
+
803
+ except ValueError as hex_err:
804
+ return None, f"❌ HEX λ””μ½”λ”© μ‹€νŒ¨: {str(hex_err)}", json_output
805
+ except Exception as save_err:
806
+ return None, f"❌ 파일 μ €μž₯ μ‹€νŒ¨: {str(save_err)}", json_output
807
+
808
+ except requests.exceptions.Timeout:
809
+ return None, "❌ μš”μ²­ μ‹œκ°„ 초과 (10λΆ„)", ""
810
+ except requests.exceptions.ConnectionError:
811
+ return None, "❌ μ—°κ²° 였λ₯˜", ""
812
+ except Exception as e:
813
+ return None, f"❌ μ˜ˆμ™Έ λ°œμƒ: {str(e)}", ""
814
+
815
+
816
+ def load_example_from_dropdown(selection):
817
+ """Dropdown μ„ νƒμ‹œ 예제 ν”„λ‘¬ν”„νŠΈ 및 νƒœκ·Έ λ‘œλ“œ"""
818
+ if not selection:
819
+ return "", ""
820
+
821
+ mapping = {
822
+ "🎀 A Cappella - 순수 보컬 ν•˜λͺ¨λ‹ˆ": "🎀 A Cappella (μ•„μΉ΄νŽ λΌ)",
823
+ "πŸ‘₯ Group Harmony - νŒŒμ›Œν’€ ν•©μ°½": "πŸ‘₯ Group Harmony (κ·Έλ£Ή ν•˜λͺ¨λ‹ˆ)",
824
+ "🎷 Jazz Duet - 남녀 λ“€μ—£": "🎷 Jazz Duet (재즈 λ“€μ—£)",
825
+ "🎸 Multi-Style - μŠ€νƒ€μΌ μ „ν™˜": "🎸 Multi-Style (λ©€ν‹°μŠ€νƒ€μΌ)",
826
+ "πŸŒƒ Urban Chill - R&B": "πŸŒƒ Urban Chill (μ–΄λ°˜ μΉ )",
827
+ "🎹 Jazz Club - 라이브": "🎹 Jazz Club (재즈 클럽)",
828
+ "πŸͺ© Retro Disco - 80λ…„λŒ€": "πŸͺ© Retro Disco (레트둜 λ””μŠ€μ½”)",
829
+ "🎬 Film Score - μ‹œλ„€λ§ˆν‹±": "🎬 Film Score (μ˜ν™” μŠ€μ½”μ–΄)",
830
+ "🎡 K-Pop Dance - κ³ μ—λ„ˆμ§€": "🎡 K-Pop Dance (K-Pop λŒ„μŠ€)",
831
+ "🎻 Orchestral Ballad - μ›…μž₯": "🎻 Orchestral Ballad (μ˜€μΌ€μŠ€νŠΈλΌ λ°œλΌλ“œ)",
832
+ "πŸ”₯ HeartMuLa Default - κΈ°λ³Έ": "πŸ”₯ HeartMuLa Default (κΈ°λ³Έ μ˜ˆμ‹œ)"
833
+ }
834
+
835
+ key = mapping.get(selection)
836
+ if key and key in EXAMPLE_PROMPTS:
837
+ return EXAMPLE_PROMPTS[key]["prompt"], EXAMPLE_PROMPTS[key]["tags"]
838
+ return "", ""
839
+
840
+
841
+ # ============================================================
842
+ # 🎨 Comic Classic Theme - Toon Playground (Document 2 기반)
843
+ # ============================================================
844
+
845
+ css = """
846
+ /* ===== 🎨 Google Fonts Import ===== */
847
+ @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
848
+
849
+ /* ===== 🎨 Comic Classic λ°°κ²½ - λΉˆν‹°μ§€ 페이퍼 + λ„νŠΈ νŒ¨ν„΄ ===== */
850
+ .gradio-container {
851
+ background-color: #FEF9C3 !important;
852
+ background-image:
853
+ radial-gradient(#1F2937 1px, transparent 1px) !important;
854
+ background-size: 20px 20px !important;
855
+ min-height: 100vh !important;
856
+ font-family: 'Comic Neue', cursive, sans-serif !important;
857
+ }
858
+
859
+ /* ===== ν—ˆκΉ…νŽ˜μ΄μŠ€ 상단 μš”μ†Œ μˆ¨κΉ€ ===== */
860
+ .huggingface-space-header,
861
+ #space-header,
862
+ .space-header,
863
+ [class*="space-header"],
864
+ .svelte-1ed2p3z,
865
+ .space-header-badge,
866
+ .header-badge {
867
+ display: none !important;
868
+ }
869
+
870
+ /* ===== Footer μ™„μ „ μˆ¨κΉ€ ===== */
871
+ footer,
872
+ .footer,
873
+ .gradio-container footer,
874
+ .built-with {
875
+ display: none !important;
876
+ }
877
+
878
+ /* ===== 메인 μ»¨ν…Œμ΄λ„ˆ ===== */
879
+ #col-container {
880
+ max-width: 1400px;
881
+ margin: 0 auto;
882
+ }
883
+
884
+ /* ===== 🎨 헀더 타이틀 - μ½”λ―Ή μŠ€νƒ€μΌ ===== */
885
+ .header-title h1 {
886
+ font-family: 'Bangers', cursive !important;
887
+ color: #1F2937 !important;
888
+ font-size: 3.2rem !important;
889
+ font-weight: 400 !important;
890
+ text-align: center !important;
891
+ margin-bottom: 0.5rem !important;
892
+ text-shadow:
893
+ 4px 4px 0px #FACC15,
894
+ 6px 6px 0px #1F2937 !important;
895
+ letter-spacing: 3px !important;
896
+ -webkit-text-stroke: 2px #1F2937 !important;
897
+ }
898
+
899
+ /* ===== 🎨 μ„œλΈŒνƒ€μ΄ν‹€ ===== */
900
+ .subtitle-text {
901
+ text-align: center !important;
902
+ font-family: 'Comic Neue', cursive !important;
903
+ font-size: 1.1rem !important;
904
+ color: #1F2937 !important;
905
+ margin-bottom: 1.5rem !important;
906
+ font-weight: 700 !important;
907
+ }
908
+
909
+ /* ===== 🎨 μ„Ήμ…˜ 타이틀 ===== */
910
+ .section-title {
911
+ font-family: 'Bangers', cursive !important;
912
+ color: #1F2937 !important;
913
+ font-size: 1.8rem !important;
914
+ border-bottom: 4px solid #3B82F6 !important;
915
+ padding-bottom: 8px !important;
916
+ margin-bottom: 16px !important;
917
+ text-shadow: 2px 2px 0px #FACC15 !important;
918
+ }
919
+
920
+ /* ===== 🎨 μΉ΄λ“œ/νŒ¨λ„ - λ§Œν™” ν”„λ ˆμž„ μŠ€νƒ€μΌ ===== */
921
+ .gr-panel,
922
+ .gr-box,
923
+ .gr-form,
924
+ .block,
925
+ .gr-group {
926
+ background: #FFFFFF !important;
927
+ border: 3px solid #1F2937 !important;
928
+ border-radius: 8px !important;
929
+ box-shadow: 6px 6px 0px #1F2937 !important;
930
+ transition: all 0.2s ease !important;
931
+ }
932
+
933
+ .gr-panel:hover,
934
+ .block:hover {
935
+ transform: translate(-2px, -2px) !important;
936
+ box-shadow: 8px 8px 0px #1F2937 !important;
937
+ }
938
+
939
+ /* ===== 🎨 μž…λ ₯ ν•„λ“œ (Textbox) ===== */
940
+ textarea,
941
+ input[type="text"],
942
+ input[type="number"],
943
+ input[type="password"] {
944
+ background: #FFFFFF !important;
945
+ border: 3px solid #1F2937 !important;
946
+ border-radius: 8px !important;
947
+ color: #1F2937 !important;
948
+ font-family: 'Comic Neue', cursive !important;
949
+ font-size: 1rem !important;
950
+ font-weight: 700 !important;
951
+ transition: all 0.2s ease !important;
952
+ }
953
+
954
+ textarea:focus,
955
+ input[type="text"]:focus,
956
+ input[type="number"]:focus,
957
+ input[type="password"]:focus {
958
+ border-color: #3B82F6 !important;
959
+ box-shadow: 4px 4px 0px #3B82F6 !important;
960
+ outline: none !important;
961
+ }
962
+
963
+ textarea::placeholder {
964
+ color: #9CA3AF !important;
965
+ font-weight: 400 !important;
966
+ }
967
+
968
+ /* ===== 🎨 Primary λ²„νŠΌ - μ½”λ―Ή 블루 ===== */
969
+ .gr-button-primary,
970
+ button.primary,
971
+ .gr-button.primary {
972
+ background: #3B82F6 !important;
973
+ border: 3px solid #1F2937 !important;
974
+ border-radius: 8px !important;
975
+ color: #FFFFFF !important;
976
+ font-family: 'Bangers', cursive !important;
977
+ font-weight: 400 !important;
978
+ font-size: 1.2rem !important;
979
+ letter-spacing: 2px !important;
980
+ padding: 12px 24px !important;
981
+ box-shadow: 5px 5px 0px #1F2937 !important;
982
+ transition: all 0.1s ease !important;
983
+ text-shadow: 1px 1px 0px #1F2937 !important;
984
+ }
985
+
986
+ .gr-button-primary:hover,
987
+ button.primary:hover,
988
+ .gr-button.primary:hover {
989
+ background: #2563EB !important;
990
+ transform: translate(-2px, -2px) !important;
991
+ box-shadow: 7px 7px 0px #1F2937 !important;
992
+ }
993
+
994
+ .gr-button-primary:active,
995
+ button.primary:active,
996
+ .gr-button.primary:active {
997
+ transform: translate(3px, 3px) !important;
998
+ box-shadow: 2px 2px 0px #1F2937 !important;
999
+ }
1000
+
1001
+ /* ===== 🎨 Secondary λ²„νŠΌ - μ½”λ―Ή λ ˆλ“œ ===== */
1002
+ .gr-button-secondary,
1003
+ button.secondary {
1004
+ background: #EF4444 !important;
1005
+ border: 3px solid #1F2937 !important;
1006
+ border-radius: 8px !important;
1007
+ color: #FFFFFF !important;
1008
+ font-family: 'Bangers', cursive !important;
1009
+ font-weight: 400 !important;
1010
+ font-size: 1.1rem !important;
1011
+ letter-spacing: 1px !important;
1012
+ box-shadow: 4px 4px 0px #1F2937 !important;
1013
+ transition: all 0.1s ease !important;
1014
+ text-shadow: 1px 1px 0px #1F2937 !important;
1015
+ }
1016
+
1017
+ .gr-button-secondary:hover,
1018
+ button.secondary:hover {
1019
+ background: #DC2626 !important;
1020
+ transform: translate(-2px, -2px) !important;
1021
+ box-shadow: 6px 6px 0px #1F2937 !important;
1022
+ }
1023
+
1024
+ /* ===== 🎨 Generate λ²„νŠΌ - μ½”λ―Ή κ·Έλ¦° ===== */
1025
+ .generate-btn {
1026
+ background: #10B981 !important;
1027
+ border: 3px solid #1F2937 !important;
1028
+ border-radius: 8px !important;
1029
+ color: #FFFFFF !important;
1030
+ font-family: 'Bangers', cursive !important;
1031
+ font-weight: 400 !important;
1032
+ font-size: 1.3rem !important;
1033
+ letter-spacing: 2px !important;
1034
+ box-shadow: 5px 5px 0px #1F2937 !important;
1035
+ text-shadow: 1px 1px 0px #1F2937 !important;
1036
+ }
1037
+
1038
+ .generate-btn:hover {
1039
+ background: #059669 !important;
1040
+ transform: translate(-2px, -2px) !important;
1041
+ box-shadow: 7px 7px 0px #1F2937 !important;
1042
+ }
1043
+
1044
+ /* ===== 🎨 μ•„μ½”λ””μ–Έ - 말풍선 μŠ€νƒ€μΌ ===== */
1045
+ .gr-accordion {
1046
+ background: #FACC15 !important;
1047
+ border: 3px solid #1F2937 !important;
1048
+ border-radius: 8px !important;
1049
+ box-shadow: 4px 4px 0px #1F2937 !important;
1050
+ }
1051
+
1052
+ .gr-accordion-header {
1053
+ color: #1F2937 !important;
1054
+ font-family: 'Comic Neue', cursive !important;
1055
+ font-weight: 700 !important;
1056
+ font-size: 1.1rem !important;
1057
+ }
1058
+
1059
+ /* ===== 🎨 Dropdown ===== */
1060
+ .gr-dropdown,
1061
+ select {
1062
+ background: #FFFFFF !important;
1063
+ border: 3px solid #1F2937 !important;
1064
+ border-radius: 8px !important;
1065
+ color: #1F2937 !important;
1066
+ font-family: 'Comic Neue', cursive !important;
1067
+ font-weight: 700 !important;
1068
+ }
1069
+
1070
+ /* ===== 🎨 라벨 μŠ€νƒ€μΌ ===== */
1071
+ label,
1072
+ .gr-input-label,
1073
+ .gr-block-label {
1074
+ color: #1F2937 !important;
1075
+ font-family: 'Comic Neue', cursive !important;
1076
+ font-weight: 700 !important;
1077
+ font-size: 1rem !important;
1078
+ }
1079
+
1080
+ /* ===== 🎨 μ˜€λ””μ˜€ ν”Œλ ˆμ΄μ–΄ ===== */
1081
+ .gr-audio,
1082
+ audio {
1083
+ border: 4px solid #1F2937 !important;
1084
+ border-radius: 8px !important;
1085
+ box-shadow: 6px 6px 0px #1F2937 !important;
1086
+ }
1087
+
1088
+ /* ===== 🎨 μ½”λ“œ 블둝 ===== */
1089
+ .gr-code,
1090
+ pre,
1091
+ code {
1092
+ background: #1F2937 !important;
1093
+ color: #10B981 !important;
1094
+ font-family: 'Courier New', monospace !important;
1095
+ border: 3px solid #10B981 !important;
1096
+ border-radius: 8px !important;
1097
+ box-shadow: 4px 4px 0px #10B981 !important;
1098
+ }
1099
+
1100
+ /* ===== 🎨 λ§ˆν¬λ‹€μš΄ ===== */
1101
+ .gr-markdown {
1102
+ font-family: 'Comic Neue', cursive !important;
1103
+ color: #1F2937 !important;
1104
+ }
1105
+
1106
+ .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
1107
+ font-family: 'Bangers', cursive !important;
1108
+ color: #1F2937 !important;
1109
+ text-shadow: 2px 2px 0px #FACC15 !important;
1110
+ }
1111
+
1112
+ /* ===== 🎨 νƒ­ μŠ€νƒ€μΌ ===== */
1113
+ .gr-tab-nav {
1114
+ background: #FACC15 !important;
1115
+ border: 3px solid #1F2937 !important;
1116
+ border-radius: 8px 8px 0 0 !important;
1117
+ }
1118
+
1119
+ .gr-tab-nav button {
1120
+ font-family: 'Comic Neue', cursive !important;
1121
+ font-weight: 700 !important;
1122
+ color: #1F2937 !important;
1123
+ }
1124
+
1125
+ .gr-tab-nav button.selected {
1126
+ background: #3B82F6 !important;
1127
+ color: #FFFFFF !important;
1128
+ }
1129
+
1130
+ /* ===== 🎨 μƒνƒœ ν‘œμ‹œ ===== */
1131
+ .status-box textarea {
1132
+ background: #1F2937 !important;
1133
+ color: #10B981 !important;
1134
+ font-family: 'Courier New', monospace !important;
1135
+ font-size: 0.9rem !important;
1136
+ border: 3px solid #10B981 !important;
1137
+ border-radius: 8px !important;
1138
+ }
1139
+
1140
+ /* ===== 🎨 μŠ€ν¬λ‘€λ°” - μ½”λ―Ή μŠ€νƒ€μΌ ===== */
1141
+ ::-webkit-scrollbar {
1142
+ width: 12px;
1143
+ height: 12px;
1144
+ }
1145
+
1146
+ ::-webkit-scrollbar-track {
1147
+ background: #FEF9C3;
1148
+ border: 2px solid #1F2937;
1149
+ }
1150
+
1151
+ ::-webkit-scrollbar-thumb {
1152
+ background: #3B82F6;
1153
+ border: 2px solid #1F2937;
1154
+ border-radius: 0px;
1155
+ }
1156
+
1157
+ ::-webkit-scrollbar-thumb:hover {
1158
+ background: #EF4444;
1159
+ }
1160
+
1161
+ /* ===== 🎨 선택 ν•˜μ΄λΌμ΄νŠΈ ===== */
1162
+ ::selection {
1163
+ background: #FACC15;
1164
+ color: #1F2937;
1165
+ }
1166
+
1167
+ /* ===== 🎨 Row/Column 간격 ===== */
1168
+ .gr-row {
1169
+ gap: 1.5rem !important;
1170
+ }
1171
+
1172
+ .gr-column {
1173
+ gap: 1rem !important;
1174
+ }
1175
+
1176
+ /* ===== 🎨 νƒœκ·Έ μž…λ ₯ νŠΉλ³„ μŠ€νƒ€μΌ ===== */
1177
+ .tag-input textarea {
1178
+ background: #FEF3C7 !important;
1179
+ border: 3px dashed #F59E0B !important;
1180
+ font-family: 'Courier New', monospace !important;
1181
+ }
1182
+
1183
+ /* ===== λ°˜μ‘ν˜• μ‘°μ • ===== */
1184
+ @media (max-width: 768px) {
1185
+ .header-title h1 {
1186
+ font-size: 2rem !important;
1187
+ text-shadow:
1188
+ 3px 3px 0px #FACC15,
1189
+ 4px 4px 0px #1F2937 !important;
1190
+ }
1191
+
1192
+ .gr-button-primary,
1193
+ button.primary {
1194
+ padding: 10px 16px !important;
1195
+ font-size: 1rem !important;
1196
+ }
1197
+ }
1198
+
1199
+ /* ===== 🎨 특수 효과 - λ°˜μ§μž„ ===== */
1200
+ @keyframes sparkle {
1201
+ 0%, 100% { opacity: 1; }
1202
+ 50% { opacity: 0.7; }
1203
+ }
1204
+
1205
+ .sparkle {
1206
+ animation: sparkle 2s ease-in-out infinite;
1207
+ }
1208
+ """
1209
+
1210
+ # ============================================================
1211
+ # Gradio UI
1212
+ # ============================================================
1213
+
1214
+ with gr.Blocks(css=css, title="🎡 SOMA Music Studio", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
1215
+
1216
+ # HOME Badge
1217
+ gr.HTML("""
1218
+ <div style="text-align: center; margin: 20px 0 10px 0;">
1219
+ <a href="https://huggingface.co/HeartMuLa" target="_blank" style="text-decoration: none;">
1220
+ <img src="https://img.shields.io/badge/🎡_HeartMuLa-Official-ff6b6b?style=for-the-badge&labelColor=1F2937" alt="HeartMuLa">
1221
+ </a>
1222
+ <a href="https://www.minimax.io" target="_blank" style="text-decoration: none; margin-left: 10px;">
1223
+ <img src="https://img.shields.io/badge/🎹_MiniMax-Music_2.5-3B82F6?style=for-the-badge&labelColor=1F2937" alt="MiniMax">
1224
+ </a>
1225
+ </div>
1226
+ """)
1227
+
1228
+ # Header
1229
+ gr.Markdown("""
1230
+ # 🎡 SOMA MUSIC STUDIO 🎢
1231
+ """, elem_classes="header-title")
1232
+
1233
+ gr.Markdown("""
1234
+ <p class="subtitle-text">πŸ’« HeartMuLa + MiniMax Music 2.5 + SOMA Multi-Agent = 졜고 ν’ˆμ§ˆ AI μŒμ•… 생성 πŸ’«</p>
1235
+ <p class="subtitle-text">🎀 A Cappella | 🎷 Jazz Duet | 🎸 Multi-Style | 🎬 Film Score | 🎡 K-Pop 지원</p>
1236
+ """)
1237
+
1238
+ # API Keys
1239
+ GROQ_KEY = os.environ.get("GROQ_API_KEY", "")
1240
+ MINIMAX_KEY = os.environ.get("MINIMAX_API_KEY", "")
1241
+
1242
+ with gr.Accordion("πŸ”‘ API Keys", open=not (GROQ_KEY and MINIMAX_KEY)):
1243
+ with gr.Row():
1244
+ groq_key = gr.Textbox(
1245
+ label="πŸ¦™ Groq API Key (가사/ν”„λ‘¬ν”„νŠΈ μƒμ„±μš©)",
1246
+ type="password",
1247
+ value=GROQ_KEY,
1248
+ placeholder="gsk_..." if not GROQ_KEY else "βœ… Secret λ‘œλ“œλ¨",
1249
+ interactive=not bool(GROQ_KEY)
1250
+ )
1251
+ minimax_key = gr.Textbox(
1252
+ label="🎹 MiniMax API Key (μŒμ•… μƒμ„±μš©)",
1253
+ type="password",
1254
+ value=MINIMAX_KEY,
1255
+ placeholder="API Key" if not MINIMAX_KEY else "βœ… Secret λ‘œλ“œλ¨",
1256
+ interactive=not bool(MINIMAX_KEY)
1257
+ )
1258
+
1259
+ with gr.Row(equal_height=False):
1260
+ # ========== 쒌츑: 가사 생성 ==========
1261
+ with gr.Column(scale=1, min_width=400):
1262
+ gr.Markdown("## πŸ“ LYRICS GENERATOR", elem_classes="section-title")
1263
+
1264
+ theme_input = gr.Textbox(
1265
+ label="🎯 λ…Έλž˜ 주제",
1266
+ placeholder="예: 이별 ν›„ μ„±μž₯, κΏˆμ„ ν–₯ν•œ 도전, μ‚¬λž‘μ˜ κ³ λ°±, μš°μ •μ˜ 힘...",
1267
+ lines=2
1268
+ )
1269
+
1270
+ with gr.Row():
1271
+ lyrics_genre = gr.Dropdown(
1272
+ label="🎸 μž₯λ₯΄",
1273
+ choices=["K-Pop", "Pop", "R&B", "Hip-Hop", "Ballad", "Rock",
1274
+ "EDM", "Trap", "Jazz", "Blues", "Folk", "Disco", "Cinematic"],
1275
+ value="K-Pop"
1276
+ )
1277
+ lyrics_mood = gr.Dropdown(
1278
+ label="πŸ’« λΆ„μœ„κΈ°",
1279
+ choices=["Empowering", "Melancholic", "Joyful", "Romantic",
1280
+ "Aggressive", "Dreamy", "Nostalgic", "Energetic", "Dark",
1281
+ "Peaceful", "Confident", "Intimate"],
1282
+ value="Empowering"
1283
+ )
1284
+
1285
+ with gr.Row():
1286
+ lyrics_language = gr.Dropdown(
1287
+ label="🌍 μ–Έμ–΄",
1288
+ choices=["English", "Korean", "Korean + English", "Japanese"],
1289
+ value="Korean"
1290
+ )
1291
+ lyrics_vocal_type = gr.Dropdown(
1292
+ label="🎀 보컬 νƒ€μž…",
1293
+ choices=["Solo Female", "Solo Male", "Female Duet", "Male Duet",
1294
+ "Male-Female Duet", "A Cappella", "Group/Choir"],
1295
+ value="Group/Choir"
1296
+ )
1297
+
1298
+ lyrics_additional = gr.Textbox(
1299
+ label="✨ μΆ”κ°€ μ§€μ‹œ (선택)",
1300
+ placeholder="νŠΉλ³„ μš”μ²­: 후렴ꡬ κ°•μ‘°, 랩 파트 μΆ”κ°€, νŠΉμ • 단어 포함...",
1301
+ lines=1
1302
+ )
1303
+
1304
+ with gr.Row():
1305
+ quick_btn = gr.Button("⚑ QUICK GENERATE", variant="secondary")
1306
+ soma_lyrics_btn = gr.Button("🧠 SOMA GENERATE", variant="primary")
1307
+
1308
+ lyrics_status = gr.Textbox(label="πŸ“Š μƒνƒœ", interactive=False, max_lines=1, elem_classes="status-box")
1309
+
1310
+ with gr.Accordion("πŸ” SOMA μž‘μ—… κ³Όμ •", open=False):
1311
+ with gr.Row():
1312
+ with gr.Column():
1313
+ step1_out = gr.Textbox(label="1️⃣ μž‘μ‚¬κ°€", lines=4, interactive=False)
1314
+ step2_out = gr.Textbox(label="2️⃣ ν”„λ‘œλ“€μ„œ", lines=4, interactive=False)
1315
+ with gr.Column():
1316
+ step3_out = gr.Textbox(label="3️⃣ 감성 λ””λ ‰ν„°", lines=4, interactive=False)
1317
+ step4_out = gr.Textbox(label="4️⃣ μ΅œμ’… νŽΈμ§‘", lines=4, interactive=False)
1318
+
1319
+ final_lyrics = gr.Textbox(
1320
+ label="✏️ μ΅œμ’… 가사 (νŽΈμ§‘ κ°€λŠ₯)",
1321
+ lines=14,
1322
+ placeholder="μƒμ„±λœ 가사가 μ—¬κΈ° ν‘œμ‹œλ©λ‹ˆλ‹€...\n\n[Intro]\n\n[Verse]\n...\n\n[Chorus]\n..."
1323
  )
1324
+
1325
+ with gr.Accordion("πŸ“‹ HeartMuLa ꡬ쑰 νƒœκ·Έ κ°€μ΄λ“œ", open=False):
1326
+ gr.Markdown("""
1327
+ **HeartMuLa νƒœκ·Έ:** `[Intro]` `[Verse]` `[Prechorus]` `[Chorus]` `[Bridge]` `[Interlude]` `[Hook]` `[Outro]` `[Inst]` `[Solo]`
1328
 
1329
+ ⚠️ **주의:** `[Pre Chorus]`κ°€ μ•„λ‹Œ `[Prechorus]` μ‚¬μš© (곡백 μ—†μŒ)
 
 
 
 
 
 
 
 
 
 
 
1330
 
1331
+ **졜적 ꡬ쑰 μ˜ˆμ‹œ:**
1332
+ ```
1333
+ [Intro] β†’ [Verse] β†’ [Prechorus] β†’ [Chorus]
1334
+ β†’ [Verse] β†’ [Prechorus] β†’ [Chorus]
1335
+ β†’ [Bridge] β†’ [Chorus] β†’ [Outro]
1336
+ ```
1337
 
1338
+ **νƒœκ·Έλ³„ μ—­ν• :**
1339
+ - `[Chorus]` - κ°€μž₯ μ€‘μš”! 2-3회 반볡, 기얡에 λ‚¨λŠ” ν›…
1340
+ - `[Prechorus]` - μ½”λŸ¬μŠ€ μ „ ν…μ…˜ λΉŒλ“œμ—…
1341
+ - `[Bridge]` - 감정 μ „ν™˜μ , μ΅œμ’… μ½”λŸ¬μŠ€ μ „ 배치
1342
+ """)
1343
+
1344
+ # ========== 우츑: μŒμ•… 생성 ==========
1345
+ with gr.Column(scale=1, min_width=400):
1346
+ gr.Markdown("## 🎡 MUSIC GENERATOR", elem_classes="section-title")
1347
+
1348
+ # 예제 ν”„λ‘¬ν”„νŠΈ 선택
1349
+ gr.Markdown("### πŸ“š 예제 ν”„λ‘¬ν”„νŠΈ (ν΄λ¦­ν•˜λ©΄ μžλ™ μž…λ ₯)")
1350
+
1351
+ example_dropdown = gr.Dropdown(
1352
+ label="🎯 예제 선택",
1353
+ choices=[
1354
+ "🎀 A Cappella - 순수 보컬 ν•˜λͺ¨λ‹ˆ",
1355
+ "πŸ‘₯ Group Harmony - νŒŒμ›Œν’€ ν•©μ°½",
1356
+ "🎷 Jazz Duet - 남녀 λ“€μ—£",
1357
+ "🎸 Multi-Style - μŠ€νƒ€μΌ μ „ν™˜",
1358
+ "πŸŒƒ Urban Chill - R&B",
1359
+ "🎹 Jazz Club - 라이브",
1360
+ "πŸͺ© Retro Disco - 80λ…„λŒ€",
1361
+ "🎬 Film Score - μ‹œλ„€λ§ˆν‹±",
1362
+ "🎡 K-Pop Dance - κ³ μ—λ„ˆμ§€",
1363
+ "🎻 Orchestral Ballad - μ›…μž₯",
1364
+ "πŸ”₯ HeartMuLa Default - κΈ°λ³Έ"
1365
+ ],
1366
+ value=None,
1367
+ interactive=True
1368
+ )
1369
+
1370
+ gr.Markdown("### πŸŽ›οΈ ν”„λ‘¬ν”„νŠΈ μ„€μ •")
1371
+
1372
+ base_prompt = gr.Textbox(
1373
+ label="πŸ’‘ κΈ°λ³Έ 아이디어",
1374
+ placeholder="μ›ν•˜λŠ” μŒμ•… μŠ€νƒ€μΌμ„ κ°„λ‹¨νžˆ μ„€λͺ…ν•˜μ„Έμš”...",
1375
+ lines=2
1376
+ )
1377
+
1378
+ with gr.Row():
1379
+ music_genre = gr.Dropdown(
1380
+ label="🎸 μž₯λ₯΄",
1381
+ choices=["K-Pop", "Pop", "R&B/Soul", "Hip-Hop/Trap", "EDM/House",
1382
+ "Rock", "Ballad", "Jazz", "Blues", "Lo-Fi", "Disco",
1383
+ "Cinematic", "Classical"],
1384
+ value="K-Pop"
1385
+ )
1386
+ music_mood = gr.Dropdown(
1387
+ label="πŸ’« λΆ„μœ„κΈ°",
1388
+ choices=["Energetic", "Chill", "Emotional", "Dark", "Uplifting",
1389
+ "Romantic", "Aggressive", "Dreamy", "Confident", "Peaceful",
1390
+ "Nostalgic", "Epic"],
1391
+ value="Energetic"
1392
+ )
1393
+
1394
+ with gr.Row():
1395
+ music_tempo = gr.Dropdown(
1396
+ label="⏱️ ν…œν¬",
1397
+ choices=["Very Slow (50-70 BPM)", "Slow (70-90 BPM)", "Medium (90-110 BPM)",
1398
+ "Fast (110-130 BPM)", "Very Fast (130-150 BPM)"],
1399
+ value="Medium (90-110 BPM)"
1400
+ )
1401
+ music_vocal = gr.Dropdown(
1402
+ label="🎀 보컬",
1403
+ choices=["Female (Clear)", "Female (Warm)", "Female (Powerful)",
1404
+ "Male (Smooth)", "Male (Deep)", "Male (Raspy)",
1405
+ "Male-Female Duet", "A Cappella", "Group/Choir",
1406
+ "Instrumental"],
1407
+ value="Group/Choir"
1408
+ )
1409
+
1410
+ music_instruments = gr.Textbox(
1411
+ label="🎹 μ•…κΈ°/μ‚¬μš΄λ“œ",
1412
+ placeholder="예: piano, synthesizer, drums, bass, strings, saxophone...",
1413
+ value="piano,synthesizer,drums,bass,strings"
1414
+ )
1415
+
1416
+ music_reference = gr.Dropdown(
1417
+ label="🎯 레퍼런슀 μŠ€νƒ€μΌ",
1418
+ choices=["None", "A Cappella", "Jazz Club", "Urban R&B",
1419
+ "Retro Disco", "Film Score", "K-Pop Dance", "Orchestral Ballad"],
1420
+ value="None"
1421
+ )
1422
+
1423
+ augment_btn = gr.Button("πŸš€ SOMA AUGMENT", variant="primary", size="lg")
1424
+
1425
+ augmented_prompt = gr.Textbox(
1426
+ label="✨ μ¦κ°•λœ ν”„λ‘¬ν”„νŠΈ (νŽΈμ§‘ κ°€λŠ₯)",
1427
+ lines=5,
1428
+ placeholder="SOMAκ°€ μƒμ„±ν•œ κ³ ν’ˆμ§ˆ ν”„λ‘¬ν”„νŠΈ..."
1429
+ )
1430
+
1431
+ style_tags = gr.Textbox(
1432
+ label="🏷️ μŠ€νƒ€μΌ νƒœκ·Έ (HeartMuLa 포맷: 콀마 ꡬ뢄, 곡백 μ—†μŒ)",
1433
+ placeholder="piano,happy,pop,upbeat,romantic",
1434
+ lines=1,
1435
+ elem_classes="tag-input"
1436
+ )
1437
+
1438
+ with gr.Accordion("βš™οΈ 생성 μ„€μ •", open=False):
1439
+ model_select = gr.Dropdown(
1440
+ label="πŸ€– λͺ¨λΈ", choices=["music-2.5"], value="music-2.5"
1441
+ )
1442
+ with gr.Row():
1443
+ sample_rate = gr.Dropdown(label="Sample Rate", choices=[44100], value=44100)
1444
+ bitrate = gr.Dropdown(label="Bitrate", choices=[128000, 192000, 256000], value=256000)
1445
+ audio_format = gr.Dropdown(label="Format", choices=["mp3", "wav"], value="mp3")
1446
+
1447
+ generate_music_btn = gr.Button("🎢 GENERATE MUSIC!", variant="primary", size="lg", elem_classes="generate-btn")
1448
+
1449
+ music_status = gr.Textbox(label="πŸ“Š μƒνƒœ", interactive=False, max_lines=1, elem_classes="status-box")
1450
+ music_output = gr.Audio(label="🎧 μƒμ„±λœ μŒμ•…", type="filepath")
1451
+
1452
+ with gr.Accordion("πŸ“‹ API 응닡", open=False):
1453
+ json_output = gr.Code(label="JSON Response", language="json", lines=6)
1454
+
1455
+ # ========== ν•˜λ‹¨: κ°€μ΄λ“œ ==========
1456
+ with gr.Accordion("πŸ“– HeartMuLa & MiniMax κ°€μ΄λ“œ", open=False):
1457
+ gr.Markdown(f"""
1458
+ {HEARTMULA_TAG_GUIDE}
1459
+
1460
+ ---
1461
 
1462
+ {HEARTMULA_LYRICS_STRUCTURE}
1463
+ """)
1464
+
1465
+ # ========== Event Handlers ==========
1466
+
1467
+ # 예제 Dropdown 선택
1468
+ example_dropdown.change(
1469
+ fn=load_example_from_dropdown,
1470
+ inputs=[example_dropdown],
1471
+ outputs=[augmented_prompt, style_tags]
1472
+ )
1473
+
1474
+ # λΉ λ₯Έ 가사 생성
1475
+ quick_btn.click(
1476
+ fn=quick_lyrics,
1477
+ inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_vocal_type, lyrics_additional],
1478
+ outputs=[final_lyrics]
1479
+ )
1480
+
1481
+ # SOMA 가사 생성
1482
+ soma_lyrics_btn.click(
1483
+ fn=generate_lyrics_soma,
1484
+ inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_vocal_type, lyrics_additional],
1485
+ outputs=[lyrics_status, step1_out, step2_out, step3_out, step4_out]
1486
+ ).then(
1487
+ fn=lambda x: x,
1488
+ inputs=[step4_out],
1489
+ outputs=[final_lyrics]
1490
+ )
1491
+
1492
+ # SOMA ν”„λ‘¬ν”„νŠΈ 증강
1493
+ augment_btn.click(
1494
+ fn=augment_prompt_soma,
1495
+ inputs=[groq_key, base_prompt, music_genre, music_mood, music_tempo, music_vocal, music_instruments, music_reference],
1496
+ outputs=[augmented_prompt, style_tags]
1497
+ )
1498
+
1499
+ # μŒμ•… 생성
1500
+ generate_music_btn.click(
1501
+ fn=generate_music,
1502
+ inputs=[minimax_key, model_select, augmented_prompt, final_lyrics, sample_rate, bitrate, audio_format],
1503
+ outputs=[music_output, music_status, json_output]
1504
+ )
1505
 
1506
 
1507
+ if __name__ == "__main__":
1508
+ demo.launch()