import os import tempfile import torch import gradio as gr from huggingface_hub import hf_hub_download, snapshot_download import spaces import requests import json from groq import Groq # ============================================================ # ๐ต SOMA Music Studio - HeartMuLa Edition # HeartMuLa-oss-3B + SOMA Multi-Agent + Comic Classic Theme # ============================================================ # ============================================================ # ๐ต HeartMuLa ๋ชจ๋ธ ๋ค์ด๋ก๋ ๋ฐ ๋ก๋ฉ # ============================================================ def download_models(): """Download all required model files from HuggingFace Hub.""" cache_dir = os.environ.get("HF_HOME", os.path.expanduser("/tmp")) model_dir = os.path.join(cache_dir, "heartmula_models") if not os.path.exists(model_dir): os.makedirs(model_dir, exist_ok=True) # Download HeartMuLaGen (tokenizer and gen_config) print("Downloading HeartMuLaGen files...") for filename in ["tokenizer.json", "gen_config.json"]: hf_hub_download( repo_id="HeartMuLa/HeartMuLaGen", filename=filename, local_dir=model_dir, ) # Download HeartMuLa-oss-3B print("Downloading HeartMuLa-oss-3B...") snapshot_download( repo_id="HeartMuLa/HeartMuLa-oss-3B", local_dir=os.path.join(model_dir, "HeartMuLa-oss-3B"), ) # Download HeartCodec-oss print("Downloading HeartCodec-oss...") snapshot_download( repo_id="HeartMuLa/HeartCodec-oss", local_dir=os.path.join(model_dir, "HeartCodec-oss"), ) print("All models downloaded successfully!") return model_dir from heartlib import HeartMuLaGenPipeline # ๋ชจ๋ธ ๋ค์ด๋ก๋ ๋ฐ ๋ก๋ฉ model_dir = download_models() # Determine device and dtype if torch.cuda.is_available(): device = torch.device("cuda") dtype = torch.bfloat16 else: device = torch.device("cpu") dtype = torch.float32 print(f"Loading HeartMuLa pipeline on {device} with {dtype}...") pipe = HeartMuLaGenPipeline.from_pretrained( model_dir, device=device, dtype=dtype, version="3B", ) print("HeartMuLa Pipeline loaded successfully!") # ============================================================ # ๐ต HeartMuLa ๊ฐ์ด๋ ๋ฐ ์ค์ # ============================================================ # HeartMuLa ๊ถ์ฅ ๊ตฌ์กฐ ํ๊ทธ (๊ณต์ ๋ฌธ์ ๊ธฐ๋ฐ) STRUCTURE_TAGS = [ "[Intro]", "[Verse]", "[Prechorus]", "[Chorus]", "[Bridge]", "[Interlude]", "[Hook]", "[Outro]", "[Inst]", "[Solo]" ] # HeartMuLa ์คํ์ผ ํ๊ทธ ๊ฐ์ด๋ (๋ฐ๋ชจ ํ์ด์ง ๊ธฐ๋ฐ) HEARTMULA_TAG_GUIDE = """ ## ๐ผ HeartMuLa ์ ๋ ฅ ํ์ ### โ ๏ธ ์ค์: HeartMuLa๋ 2๊ฐ์ง ์ ๋ ฅ๋ง ๋ฐ์ต๋๋ค! 1. **๊ฐ์ฌ (Lyrics)** - ๊ตฌ์กฐ ํ๊ทธ ํฌํจ 2. **์คํ์ผ ํ๊ทธ (Tags)** - ์ฝค๋ง ๊ตฌ๋ถ, ๊ณต๋ฐฑ ์์ โ ๋ณด์ปฌ ํ์ (๋์ฃ, ๊ทธ๋ฃน ๋ฑ)์ ์ง์ํ์ง ์์ต๋๋ค! ### ๐ ๊ตฌ์กฐ ํ๊ทธ (๊ฐ์ฌ ๋ด ์ฌ์ฉ): `[Intro]`, `[Verse]`, `[Prechorus]`, `[Chorus]`, `[Bridge]`, `[Outro]` ### ๐ท๏ธ ์คํ์ผ ํ๊ทธ ํ์ (์ฝค๋ง ๊ตฌ๋ถ, ๊ณต๋ฐฑ ์์): ``` piano,happy,wedding,synthesizer,romantic energetic,drums,driving,rock,electric guitar soft,Sad,Ballad,Longing ``` ### ๐น ํ๊ทธ ์นดํ ๊ณ ๋ฆฌ (์ค์ ๋ฐ๋ชจ ๊ธฐ๋ฐ): **์ ๊ธฐ (Instruments):** Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass **์ฅ๋ฅด (Genre):** pop, R&B, rock, Ballad, electronic, acoustic **๋ถ์๊ธฐ/๊ฐ์ (Mood/Emotion):** warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy **์ํฉ (Situation):** Cafe, wedding, Walking, self-discovery ### ๐ ์ง์ ์ธ์ด: ๐บ๐ธ English | ๐ฐ๐ท Korean | ๐จ๐ณ Chinese | ๐ฏ๐ต Japanese | ๐ช๐ธ Spanish """ # HeartMuLa ๊ถ์ฅ ๊ฐ์ฌ ๊ตฌ์กฐ HEARTMULA_LYRICS_STRUCTURE = """ ## ๐ HeartMuLa Recommended Lyrics Structure ### OPTIMAL FORMAT: ``` [Intro] [Verse] First verse lyrics here Second line of first verse [Prechorus] Building tension here Leading to the chorus [Chorus] Main hook and memorable melody Most important part of song [Verse] Second verse develops story [Bridge] Contrast section here Different melody or perspective [Chorus] Main hook repeated [Outro] Closing the song ``` ### KEY RULES: 1. [Chorus] appears at least 2-3 times 2. [Prechorus] builds tension before [Chorus] 3. [Bridge] provides contrast before final [Chorus] 4. Use [Prechorus] NOT [Pre Chorus] (no space!) """ # ์์ ๊ฐ์ฌ EXAMPLE_LYRICS = """[Intro] [Verse] The sun creeps in across the floor I hear the traffic outside the door The coffee pot begins to hiss It is another morning just like this [Prechorus] The world keeps spinning round and round Feet are planted on the ground I find my rhythm in the sound [Chorus] Every day the light returns Every day the fire burns We keep on walking down this street Moving to the same steady beat It is the ordinary magic that we meet [Verse] The hours tick deeply into noon Chasing shadows, chasing the moon Work is done and the lights go low Watching the city start to glow [Bridge] It is not always easy, not always bright Sometimes we wrestle with the night But we make it to the morning light [Chorus] Every day the light returns Every day the fire burns We keep on walking down this street Moving to the same steady beat [Outro] Just another day Every single day""" EXAMPLE_TAGS = "piano,happy,uplifting,pop" # ์์ ํ๋กฌํํธ (ํ๊ทธ ์ธํธ) - HeartMuLa ๋ฐ๋ชจ ๊ธฐ๋ฐ EXAMPLE_TAG_PRESETS = { "โ Cafe Warm Pop": "warm,reflection,pop,Cafe", "๐น R&B Soft Regret": "R&B,Keyboard,Regret,drum machine,electric guitar,synthesizer,soft", "โก Electronic Energetic": "energetic,electronic,synthesizer,drum machine,self-discovery", "๐ข Sad Ballad Longing": "soft,Sad,Ballad,Longing", "๐ง Meditation Peaceful": "meditation,faith,acoustic,peaceful", "๐ Wedding Romantic": "wedding,Piano,Strings,acoustic guitar,pop,Romantic", "๐ Emotional Piano": "Sad,Regret,electric guitar,drums,Piano,emotional", "๐ถ Hopeful Acoustic": "Walking,acoustic guitar,Strings,hope,hopeful", "๐ธ Epic Electronic": "synthesizer,drum machine,powerful,epic,Keyboard,electronic", "๐ฅ Driving Rock": "energetic,drums,driving,rock,electric guitar", "๐ต Happy Uplifting": "piano,happy,uplifting,pop", "๐ Soft Dreamy": "soft,dreamy,piano,strings,peaceful" } # ============================================================ # ๐ง SOMA ์์ด์ ํธ ์์คํ - ๊ฐ์ฌ ์์ฑ # ============================================================ LYRICS_AGENTS = { "lyricist": f"""You are a master lyricist optimized for HeartMuLa music generation. {HEARTMULA_LYRICS_STRUCTURE} Your task: Create powerful, memorable lyrics with OPTIMAL HeartMuLa tag placement. CRITICAL RULES: 1. Use HeartMuLa structure tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo] 2. [Prechorus] (not [Pre Chorus]) - HeartMuLa format, NO SPACE! 3. Ensure [Chorus] is the most memorable, singable part 4. Include [Prechorus] to build tension before [Chorus] 5. Add [Bridge] for emotional contrast 6. Minimum 6-8 sections for quality generation 7. โ ๏ธ CRITICAL: Write lyrics in the EXACT language specified by the user! - If Korean is specified, write ALL lyrics in Korean (ํ๊ตญ์ด)! - Do NOT translate or write in English unless explicitly asked! Write lyrics that create the BEST POSSIBLE foundation for high-quality music generation.""", "producer": f"""You are a music producer specializing in song structure optimization for HeartMuLa. {HEARTMULA_LYRICS_STRUCTURE} Your task: Analyze and OPTIMIZE the tag structure for MAXIMUM musical impact. CRITICAL OPTIMIZATION RULES: 1. Use HeartMuLa tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro] 2. IMPORTANT: Use [Prechorus] NOT [Pre Chorus] - no space! 3. Verify the structure follows genre conventions 4. Ensure proper tag sequence (VerseโPrechorusโChorus) 5. Check that [Chorus] appears at least 2-3 times 6. Verify [Bridge] provides contrast before final chorus Output the restructured lyrics with OPTIMAL HeartMuLa tag placement.""", "emotion_director": f"""You are an emotion director for music production. Your task: Enhance emotional impact through STRATEGIC tag content. EMOTIONAL MAPPING BY TAG: - [Intro]: Intrigue, anticipation - [Verse]: Storytelling, building connection - [Prechorus]: Rising tension, excitement - [Chorus]: Peak emotion, catharsis - [Bridge]: Vulnerability, reflection, contrast - [Outro]: Resolution, lingering feeling OPTIMIZATION RULES: 1. Each [Verse] should progress emotionally 2. [Chorus] must deliver the strongest emotional punch 3. [Bridge] should offer new emotional perspective 4. [Prechorus] should create anticipation for [Chorus] Enhance the lyrics for MAXIMUM emotional resonance.""", "final_editor": f"""You are the final editor for HeartMuLa music production. Your task: Output PERFECTLY FORMATTED, production-ready lyrics. CRITICAL OUTPUT RULES: 1. Output ONLY the actual lyrics with structure tags 2. Use HeartMuLa tags EXACTLY: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro], [Inst], [Solo] 3. IMPORTANT: [Prechorus] NOT [Pre Chorus] - no space! 4. DO NOT include English translations in parentheses 5. DO NOT include explanations, descriptions, or markdown 6. DO NOT include lines starting with * or > 7. For [Inst] or [Solo] sections, write ONLY the tag 8. Ensure MINIMUM 6-8 different sections 9. Verify [Chorus] appears at least 2 times 10. PRESERVE THE ORIGINAL LANGUAGE - if lyrics are in Korean, output in Korean! OUTPUT ONLY CLEAN LYRICS WITH OPTIMAL TAG STRUCTURE IN THE ORIGINAL LANGUAGE.""" } # ํ๊ทธ ์์ฑ ์์ด์ ํธ - HeartMuLa ๋ฐ๋ชจ ํ๊ทธ ๊ธฐ๋ฐ TAG_AGENT = f"""You are a style tag generator for HeartMuLa music generation. HeartMuLa uses comma-separated tags WITHOUT spaces. VALID TAG CATEGORIES (from official demo): **Instruments:** Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass **Genre:** pop, R&B, rock, Ballad, electronic, acoustic **Mood/Emotion:** warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy **Situation:** Cafe, wedding, Walking, self-discovery RULES: 1. Output 4-6 tags, comma-separated, NO SPACES between tags 2. Use ONLY tags from the categories above 3. Match the requested genre and mood OUTPUT FORMAT (examples): piano,happy,uplifting,pop warm,reflection,pop,Cafe energetic,drums,driving,rock,electric guitar soft,Sad,Ballad,Longing Output ONLY the comma-separated tags, nothing else.""" # ============================================================ # ๐ง ์ ํธ๋ฆฌํฐ ํจ์ # ============================================================ def call_groq(api_key: str, system: str, user_prompt: str, context: str = "") -> str: """Groq API ํธ์ถ""" try: client = Groq(api_key=api_key) messages = [{"role": "system", "content": system}] if context: messages.append({"role": "user", "content": f"Previous work:\n{context}\n\nTask: {user_prompt}"}) else: messages.append({"role": "user", "content": user_prompt}) completion = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=messages, temperature=0.9, max_completion_tokens=4096, top_p=1, stream=False ) if completion and completion.choices and completion.choices[0].message: return completion.choices[0].message.content or "Error: Empty response" return "Error: Invalid response" except Exception as e: return f"Error: {str(e)}" def clean_lyrics(text: str) -> str: """๊ฐ์ฌ ํ์ฒ๋ฆฌ - HeartMuLa ํฌ๋งท ์ต์ ํ""" import re if not text or not isinstance(text, str): return "" lines = text.split('\n') cleaned_lines = [] for line in lines: if not line.strip(): cleaned_lines.append('') continue # ์คํต ํจํด skip_patterns = [r'^\s*\*', r'^\s*>', r'^\s*---', r'^\s*###', r'^\s*\*\*.*\*\*\s*$'] if any(re.match(p, line) for p in skip_patterns): continue # ์ ๋ฆฌ line = re.sub(r'\s*\([A-Za-z].*?\)\s*$', '', line) line = re.sub(r'\*\*(.*?)\*\*', r'\1', line) # HeartMuLa ํ๊ทธ ์ ๊ทํ line = re.sub(r'\[Pre.?Chorus\]', '[Prechorus]', line, flags=re.IGNORECASE) line = re.sub(r'\[Post.?Chorus\]', '[Chorus]', line, flags=re.IGNORECASE) line = re.sub(r'\[Build.?Up\]', '[Prechorus]', line, flags=re.IGNORECASE) line = re.sub(r'\[Break\]', '[Interlude]', line, flags=re.IGNORECASE) if line.strip(): cleaned_lines.append(line.strip()) result = '\n'.join(cleaned_lines) result = re.sub(r'\n{3,}', '\n\n', result) return result.strip() def clean_tags(tags: str) -> str: """ํ๊ทธ ์ ๋ฆฌ - HeartMuLa ํฌ๋งท (์ฝค๋ง ๊ตฌ๋ถ, ๊ณต๋ฐฑ ์์)""" if not tags: return "piano,happy,pop" tags = tags.lower().strip() tags = tags.replace(', ', ',').replace(' ,', ',').replace(' ', ' ') tags = tags.replace(' ', '_').replace(',,', ',') tags = tags.strip(',') tag_list = [t.strip() for t in tags.split(',') if t.strip()] unique_tags = list(dict.fromkeys(tag_list)) return ','.join(unique_tags) if unique_tags else "piano,happy,pop" # ============================================================ # ๐ต ๋ฉ์ธ ํจ์๋ค # ============================================================ @spaces.GPU(duration=120) def generate_lyrics_soma( api_key: str, theme: str, genre: str, mood: str, language: str, additional: str, progress=gr.Progress() ): """SOMA ๊ฐ์ฌ ์์ฑ""" if not api_key or not api_key.strip(): return "โ Groq API Key ํ์", "", "", "", "" if not theme or not theme.strip(): return "โ ์ฃผ์ ๋ฅผ ์ ๋ ฅํ์ธ์", "", "", "", "" base_prompt = f"""Create PROFESSIONAL lyrics optimized for HeartMuLa: - Theme: {theme} - Genre: {genre} - Mood: {mood} - Language: {language} โ CRITICAL: You MUST write ALL lyrics in {language} language! {f'- Additional: {additional}' if additional else ''} โ ๏ธ LANGUAGE REQUIREMENT: Write the ENTIRE lyrics in {language}! {"- ๋ฐ๋์ ํ๊ตญ์ด๋ก ๊ฐ์ฌ๋ฅผ ์์ฑํ์ธ์! ํ๊ธ๋ก๋ง ์์ฑ!" if language == "Korean" else ""} {"- ่ฏท็จไธญๆๅๆๆๆญ่ฏ๏ผๅฟ ้กปๅ จ้จ็จไธญๆ๏ผ" if language == "Chinese" else ""} {"- ๆฅๆฌ่ชใงๆญ่ฉใๆธใใฆใใ ใใ๏ผๅ จใฆๆฅๆฌ่ชใง๏ผ" if language == "Japanese" else ""} {"- ยกEscribe toda la letra en espaรฑol! ยกSolo en espaรฑol!" if language == "Spanish" else ""} USE HeartMuLa STRUCTURE TAGS: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Interlude], [Hook], [Outro] IMPORTANT: Use [Prechorus] NOT [Pre Chorus] - no space! REQUIRED STRUCTURE: 1. [Intro] - Set the mood 2. [Verse] x2-3 - Tell the story 3. [Prechorus] - Build tension 4. [Chorus] x2-3 - Main hook (MOST important!) 5. [Bridge] - Emotional contrast 6. [Outro] - Conclusion REMEMBER: ALL lyrics must be in {language}!""" try: progress(0.2, desc="๐ค ์์ฌ๊ฐ - ์ด์ ์์ฑ...") draft = call_groq(api_key, LYRICS_AGENTS["lyricist"], base_prompt) if draft.startswith("Error:"): return f"โ {draft}", "", "", "", "" progress(0.4, desc="๐น ํ๋ก๋์ - ๊ตฌ์กฐ ์ต์ ํ...") structured = call_groq(api_key, LYRICS_AGENTS["producer"], f"Optimize for {genre}. Use [Prechorus] not [Pre Chorus]! KEEP ALL LYRICS IN {language}!", draft) if structured.startswith("Error:"): return f"โ {structured}", draft, "", "", "" progress(0.6, desc="๐ซ ๊ฐ์ฑ ๋๋ ํฐ - ๊ฐ์ ๊ฐํ...") emotional = call_groq(api_key, LYRICS_AGENTS["emotion_director"], f"Enhance for {mood}. KEEP ALL LYRICS IN {language}!", structured) if emotional.startswith("Error:"): return f"โ {emotional}", draft, structured, "", "" progress(0.8, desc="โจ ์ต์ข ํธ์ง...") lang_instruction = { "Korean": "ํ๊ตญ์ด๋ก๋ง ์ถ๋ ฅํ์ธ์!", "Chinese": "็จไธญๆ่พๅบ๏ผ", "Japanese": "ๆฅๆฌ่ชใงๅบๅใใฆใใ ใใ๏ผ", "Spanish": "ยกEscribe en espaรฑol!" }.get(language, "") final = call_groq(api_key, LYRICS_AGENTS["final_editor"], f"Output ONLY clean lyrics in {language}. Use [Prechorus] not [Pre Chorus]! {lang_instruction}", emotional) if final.startswith("Error:"): return f"โ {final}", draft, structured, emotional, "" final_cleaned = clean_lyrics(final) progress(1.0, desc="โ ์๋ฃ!") return "โ ๊ฐ์ฌ ์์ฑ ์๋ฃ!", draft, structured, emotional, final_cleaned except Exception as e: return f"โ ์์ธ: {str(e)}", "", "", "", "" @spaces.GPU(duration=60) def quick_lyrics(api_key: str, theme: str, genre: str, mood: str, language: str, additional: str): """๋น ๋ฅธ ๊ฐ์ฌ ์์ฑ""" if not api_key or not api_key.strip(): return "โ API Key ํ์" if not theme or not theme.strip(): return "โ ์ฃผ์ ๋ฅผ ์ ๋ ฅํ์ธ์" prompt = f"""Create song lyrics for HeartMuLa: - Theme: {theme} - Genre: {genre} - Mood: {mood} - Language: {language} โ CRITICAL: Write ALL lyrics in {language}! {f'- Special: {additional}' if additional else ''} โ ๏ธ LANGUAGE: You MUST write the ENTIRE lyrics in {language}! {"๋ฐ๋์ ํ๊ตญ์ด๋ก ์์ฑํ์ธ์! ํ๊ธ ๊ฐ์ฌ๋ง!" if language == "Korean" else ""} {"่ฏท็จไธญๆๅๆญ่ฏ๏ผ" if language == "Chinese" else ""} {"ๆฅๆฌ่ชใงๆธใใฆใใ ใใ๏ผ" if language == "Japanese" else ""} {"ยกEscribe en espaรฑol!" if language == "Spanish" else ""} USE STRUCTURE (8-10 sections): [Intro] โ [Verse] โ [Prechorus] โ [Chorus] โ [Verse] โ [Prechorus] โ [Chorus] โ [Bridge] โ [Chorus] โ [Outro] IMPORTANT: Use [Prechorus] NOT [Pre Chorus]! OUTPUT ONLY lyrics with tags in {language}. NO explanations.""" try: result = call_groq(api_key, f"""Professional songwriter for HeartMuLa. {HEARTMULA_LYRICS_STRUCTURE} Output ONLY clean lyrics.""", prompt) if result.startswith("Error:"): return f"โ {result}" return clean_lyrics(result) except Exception as e: return f"โ {str(e)}" @spaces.GPU(duration=60) def generate_tags_ai(api_key: str, genre: str, mood: str, current_tags: str): """AI ํ๊ทธ ์์ฑ - HeartMuLa ํ์""" if not api_key: return "piano,happy,pop" prompt = f"""Generate HeartMuLa style tags based on: - Genre: {genre} - Mood: {mood} - Current hint: {current_tags} Use ONLY these tag categories (from HeartMuLa demo): - Instruments: Piano, Keyboard, synthesizer, drum machine, electric guitar, acoustic guitar, drums, Strings, bass - Genre: pop, R&B, rock, Ballad, electronic, acoustic - Mood: warm, reflection, energetic, soft, Sad, Regret, Longing, meditation, faith, peaceful, Romantic, emotional, hope, hopeful, powerful, epic, driving, happy, uplifting, dreamy - Situation: Cafe, wedding, Walking, self-discovery Output 4-6 comma-separated tags WITHOUT spaces between tags. Example: piano,happy,uplifting,pop,Romantic""" try: result = call_groq(api_key, TAG_AGENT, prompt) if result.startswith("Error:"): return "piano,happy,pop" return clean_tags(result) except: return "piano,happy,pop" @spaces.GPU(duration=180) def generate_music_heartmula( lyrics: str, tags: str, max_duration_seconds: int, temperature: float, topk: int, cfg_scale: float, progress=gr.Progress(track_tqdm=True), ): """HeartMuLa๋ก ์์ ์์ฑ""" if not lyrics or not lyrics.strip(): raise gr.Error("โ ๏ธ ๊ฐ์ฌ๋ฅผ ์ ๋ ฅํ์ธ์!") if not tags or not tags.strip(): raise gr.Error("โ ๏ธ ํ๊ทธ๋ฅผ ์ ๋ ฅํ์ธ์!") # ํ๊ทธ ์ ๋ฆฌ tags = clean_tags(tags) # ์์ ํ์ผ ์์ฑ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: output_path = f.name max_audio_length_ms = max_duration_seconds * 1000 try: with torch.no_grad(): pipe( { "lyrics": lyrics, "tags": tags, }, max_audio_length_ms=max_audio_length_ms, save_path=output_path, topk=topk, temperature=temperature, cfg_scale=cfg_scale, ) return output_path, f"โ ์์ ์์ฑ ์๋ฃ! ({max_duration_seconds}์ด, ํ๊ทธ: {tags})" except Exception as e: return None, f"โ ์์ฑ ์คํจ: {str(e)}" def load_tag_preset(preset_name): """ํ๊ทธ ํ๋ฆฌ์ ๋ก๋""" if preset_name in EXAMPLE_TAG_PRESETS: return EXAMPLE_TAG_PRESETS[preset_name] return "" # ============================================================ # ๐จ Comic Classic Theme CSS # ============================================================ css = """ @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap'); .gradio-container { background-color: #FEF9C3 !important; background-image: radial-gradient(#1F2937 1px, transparent 1px) !important; background-size: 20px 20px !important; font-family: 'Comic Neue', cursive, sans-serif !important; } .huggingface-space-header, footer, .footer { display: none !important; } .header-title h1 { font-family: 'Bangers', cursive !important; color: #1F2937 !important; font-size: 3rem !important; text-align: center !important; text-shadow: 4px 4px 0px #FACC15, 6px 6px 0px #1F2937 !important; letter-spacing: 3px !important; -webkit-text-stroke: 2px #1F2937 !important; } .section-title { font-family: 'Bangers', cursive !important; color: #1F2937 !important; font-size: 1.6rem !important; border-bottom: 4px solid #3B82F6 !important; padding-bottom: 8px !important; text-shadow: 2px 2px 0px #FACC15 !important; } .gr-panel, .gr-box, .block { background: #FFFFFF !important; border: 3px solid #1F2937 !important; border-radius: 8px !important; box-shadow: 6px 6px 0px #1F2937 !important; } textarea, input[type="text"], input[type="password"] { background: #FFFFFF !important; border: 3px solid #1F2937 !important; border-radius: 8px !important; font-family: 'Comic Neue', cursive !important; font-weight: 700 !important; } textarea:focus, input:focus { border-color: #3B82F6 !important; box-shadow: 4px 4px 0px #3B82F6 !important; } .gr-button-primary, button.primary { background: #3B82F6 !important; border: 3px solid #1F2937 !important; border-radius: 8px !important; color: #FFFFFF !important; font-family: 'Bangers', cursive !important; font-size: 1.2rem !important; letter-spacing: 2px !important; box-shadow: 5px 5px 0px #1F2937 !important; text-shadow: 1px 1px 0px #1F2937 !important; } .gr-button-primary:hover { background: #2563EB !important; } .gr-button-secondary, button.secondary { background: #EF4444 !important; border: 3px solid #1F2937 !important; color: #FFFFFF !important; font-family: 'Bangers', cursive !important; box-shadow: 4px 4px 0px #1F2937 !important; } .generate-btn { background: #10B981 !important; border: 3px solid #1F2937 !important; color: #FFFFFF !important; font-family: 'Bangers', cursive !important; font-size: 1.4rem !important; box-shadow: 5px 5px 0px #1F2937 !important; } .generate-btn:hover { background: #059669 !important; } .gr-accordion { background: #FACC15 !important; border: 3px solid #1F2937 !important; box-shadow: 4px 4px 0px #1F2937 !important; } .tag-input textarea { background: #FEF3C7 !important; border: 3px dashed #F59E0B !important; font-family: 'Courier New', monospace !important; } .status-box textarea { background: #1F2937 !important; color: #10B981 !important; font-family: 'Courier New', monospace !important; border: 3px solid #10B981 !important; } label { color: #1F2937 !important; font-family: 'Comic Neue', cursive !important; font-weight: 700 !important; } ::-webkit-scrollbar { width: 12px; } ::-webkit-scrollbar-track { background: #FEF9C3; border: 2px solid #1F2937; } ::-webkit-scrollbar-thumb { background: #3B82F6; border: 2px solid #1F2937; } ::selection { background: #FACC15; color: #1F2937; } """ # ============================================================ # ๐จ Gradio UI # ============================================================ with gr.Blocks(css=css, title="๐ต SOMA Music Studio - HeartMuLa") as demo: # Header - HOME Badge gr.HTML("""
""") gr.Markdown("# ๐ต SOMA MUSIC STUDIO ๐ถ", elem_classes="header-title") gr.Markdown("""๐ซ HeartMuLa-oss-3B + SOMA Multi-Agent = ์ต๊ณ ํ์ง AI ์์ ์์ฑ ๐ซ
""") # API Key with gr.Accordion("๐ Groq API Key (๊ฐ์ฌ ์์ฑ์ฉ)", open=False): GROQ_KEY = os.environ.get("GROQ_API_KEY", "") groq_key = gr.Textbox( label="Groq API Key", type="password", value=GROQ_KEY, placeholder="gsk_..." if not GROQ_KEY else "โ Secret ๋ก๋๋จ", interactive=not bool(GROQ_KEY) ) with gr.Row(equal_height=False): # ========== ์ข์ธก: ๊ฐ์ฌ ์์ฑ ========== with gr.Column(scale=1, min_width=400): gr.Markdown("## ๐ LYRICS GENERATOR", elem_classes="section-title") theme_input = gr.Textbox( label="๐ฏ ๋ ธ๋ ์ฃผ์ ", placeholder="์: ์ด๋ณ ํ ์ฑ์ฅ, ๊ฟ์ ํฅํ ๋์ , ์ฌ๋์ ๊ณ ๋ฐฑ...", lines=2 ) with gr.Row(): lyrics_genre = gr.Dropdown( label="๐ธ ์ฅ๋ฅด", choices=["K-Pop", "Pop", "R&B", "Hip-Hop", "Ballad", "Rock", "EDM", "Jazz", "Folk", "Disco", "Cinematic"], value="K-Pop" ) lyrics_mood = gr.Dropdown( label="๐ซ ๋ถ์๊ธฐ", choices=["Empowering", "Melancholic", "Joyful", "Romantic", "Energetic", "Dreamy", "Nostalgic", "Peaceful", "Confident"], value="Empowering" ) with gr.Row(): lyrics_language = gr.Dropdown( label="๐ ์ธ์ด (Language)", choices=["English", "Korean", "Chinese", "Japanese", "Spanish"], value="English", info="HeartMuLa ๊ณต์ ์ง์ 5๊ฐ ์ธ์ด" ) lyrics_additional = gr.Textbox( label="โจ ์ถ๊ฐ ์ง์ (์ ํ)", placeholder="ํน๋ณ ์์ฒญ: ํน์ ๊ฐ์ , ์คํ์ผ ๋ฑ...", lines=1 ) with gr.Row(): quick_btn = gr.Button("โก QUICK", variant="secondary") soma_lyrics_btn = gr.Button("๐ง SOMA GENERATE", variant="primary") lyrics_status = gr.Textbox(label="๐ ์ํ", interactive=False, max_lines=1, elem_classes="status-box") with gr.Accordion("๐ SOMA ์์ ๊ณผ์ ", open=False): step1_out = gr.Textbox(label="1๏ธโฃ ์์ฌ๊ฐ", lines=3, interactive=False) step2_out = gr.Textbox(label="2๏ธโฃ ํ๋ก๋์", lines=3, interactive=False) step3_out = gr.Textbox(label="3๏ธโฃ ๊ฐ์ฑ", lines=3, interactive=False) step4_out = gr.Textbox(label="4๏ธโฃ ์ต์ข ", lines=3, interactive=False) final_lyrics = gr.Textbox( label="โ๏ธ ๊ฐ์ฌ (ํธ์ง ๊ฐ๋ฅ)", lines=16, value=EXAMPLE_LYRICS, placeholder="๊ฐ์ฌ๊ฐ ์ฌ๊ธฐ ํ์๋ฉ๋๋ค..." ) # ========== ์ฐ์ธก: ์์ ์์ฑ ========== with gr.Column(scale=1, min_width=400): gr.Markdown("## ๐ต MUSIC GENERATOR", elem_classes="section-title") # ํ๊ทธ ํ๋ฆฌ์ gr.Markdown("### ๐ท๏ธ ํ๊ทธ ํ๋ฆฌ์ (ํด๋ฆญํ๋ฉด ์๋ ์ ๋ ฅ)") tag_preset = gr.Dropdown( label="ํ๋ฆฌ์ ์ ํ", choices=list(EXAMPLE_TAG_PRESETS.keys()), value=None ) tags_input = gr.Textbox( label="๐น ์คํ์ผ ํ๊ทธ (์ฝค๋ง ๊ตฌ๋ถ, ๊ณต๋ฐฑ ์์)", value=EXAMPLE_TAGS, placeholder="piano,happy,uplifting,pop", lines=1, elem_classes="tag-input" ) with gr.Row(): ai_tag_btn = gr.Button("๐ค AI ํ๊ทธ ์์ฑ", variant="secondary", size="sm") with gr.Accordion("โ๏ธ ์์ฑ ์ค์ ", open=True): max_duration = gr.Slider( minimum=30, maximum=240, value=120, step=10, label="โฑ๏ธ ์ต๋ ๊ธธ์ด (์ด)", info="30~240์ด" ) with gr.Row(): temperature = gr.Slider( minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="๐ก๏ธ Temperature", info="๋์์๋ก ์ฐฝ์์ " ) topk = gr.Slider( minimum=1, maximum=100, value=50, step=1, label="๐ฏ Top-K" ) cfg_scale = gr.Slider( minimum=1.0, maximum=3.0, value=1.5, step=0.1, label="๐ CFG Scale", info="Classifier-free guidance" ) generate_btn = gr.Button( "๐ถ GENERATE MUSIC!", variant="primary", size="lg", elem_classes="generate-btn" ) music_status = gr.Textbox(label="๐ ์ํ", interactive=False, max_lines=1, elem_classes="status-box") audio_output = gr.Audio(label="๐ง ์์ฑ๋ ์์ ", type="filepath") gr.Markdown(""" ### ๐ก Tips - **๊ตฌ์กฐ ํ๊ทธ:** [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Outro] - **์คํ์ผ ํ๊ทธ:** piano,happy,uplifting,pop (์ฝค๋ง ๊ตฌ๋ถ, ๊ณต๋ฐฑ ์์!) - **์ง์ ์ธ์ด:** ๐บ๐ธ EN | ๐ฐ๐ท KO | ๐จ๐ณ ZH | ๐ฏ๐ต JA | ๐ช๐ธ ES - โ ๏ธ **๋ณด์ปฌ ํ์ (๋์ฃ, ๊ทธ๋ฃน ๋ฑ)์ ์ง์ํ์ง ์์ต๋๋ค!** - **Temperature:** ๋์ผ๋ฉด ์ฐฝ์์ , ๋ฎ์ผ๋ฉด ์ผ๊ด์ฑ """) # ๊ฐ์ด๋ with gr.Accordion("๐ HeartMuLa ๊ฐ์ด๋", open=False): gr.Markdown(f""" ## โ ๏ธ HeartMuLa ์ ๋ ฅ ํ์ (์ค์!) HeartMuLa๋ **2๊ฐ์ง ์ ๋ ฅ๋ง** ๋ฐ์ต๋๋ค: 1. **๊ฐ์ฌ (Lyrics)** - ๊ตฌ์กฐ ํ๊ทธ ํฌํจ 2. **์คํ์ผ ํ๊ทธ (Tags)** - ์ฝค๋ง ๊ตฌ๋ถ โ **์ง์ํ์ง ์๋ ๊ฒ:** - ๋ณด์ปฌ ํ์ ์ ํ (๋์ฃ, ๊ทธ๋ฃน, ์ฝ๋ฌ์ค ๋ฑ) - ๋ ํผ๋ฐ์ค ์ค๋์ค (์์ง ๋ฏธ์ง์) - BPM/ํ ํฌ ์ง์ ์ง์ --- ## ๐ ์ง์ ์ธ์ด (5๊ฐ) | ์ธ์ด | ์ฝ๋ | ์ํ | |------|------|------| | ๐บ๐ธ English | EN | โ | | ๐ฐ๐ท Korean | KO | โ | | ๐จ๐ณ Chinese | ZH | โ | | ๐ฏ๐ต Japanese | JA | โ | | ๐ช๐ธ Spanish | ES | โ | --- {HEARTMULA_TAG_GUIDE} --- {HEARTMULA_LYRICS_STRUCTURE} --- **Model:** [HeartMuLa-oss-3B](https://huggingface.co/HeartMuLa/HeartMuLa-oss-3B) | **Paper:** [arXiv](https://arxiv.org/abs/2601.10547) | **Code:** [GitHub](https://github.com/HeartMuLa/heartlib) *Licensed under CC BY-NC 4.0 (Non-Commercial)* """) # ========== Event Handlers ========== # ํ๊ทธ ํ๋ฆฌ์ ๋ก๋ tag_preset.change( fn=load_tag_preset, inputs=[tag_preset], outputs=[tags_input] ) # AI ํ๊ทธ ์์ฑ ai_tag_btn.click( fn=generate_tags_ai, inputs=[groq_key, lyrics_genre, lyrics_mood, tags_input], outputs=[tags_input] ) # ๋น ๋ฅธ ๊ฐ์ฌ ์์ฑ quick_btn.click( fn=quick_lyrics, inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_additional], outputs=[final_lyrics] ) # SOMA ๊ฐ์ฌ ์์ฑ soma_lyrics_btn.click( fn=generate_lyrics_soma, inputs=[groq_key, theme_input, lyrics_genre, lyrics_mood, lyrics_language, lyrics_additional], outputs=[lyrics_status, step1_out, step2_out, step3_out, step4_out] ).then( fn=lambda x: x, inputs=[step4_out], outputs=[final_lyrics] ) # ์์ ์์ฑ (HeartMuLa) generate_btn.click( fn=generate_music_heartmula, inputs=[final_lyrics, tags_input, max_duration, temperature, topk, cfg_scale], outputs=[audio_output, music_status] ) if __name__ == "__main__": demo.launch()