# ==============================================================================
# Original app.py (for diff reference):
# ==============================================================================
# import gradio as gr
#
# def greet(name):
#     return "Hello " + name + "!!"
#
# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
# demo.launch()
# ==============================================================================

# HF Spaces compatibility: Add src to Python path for heartlib import
import sys
import os
import uuid
import subprocess
import tempfile
import wave
from pathlib import Path
from typing import Dict, Any, Iterator, Optional, Tuple, TYPE_CHECKING

import numpy as np

# HF Spaces: import spaces before any CUDA-related packages (e.g., torch).
try:
    import spaces
except Exception:
    spaces = None

IS_SPACE = spaces is not None and os.environ.get("SPACE_ID")

_gpu_duration_env = os.environ.get("GPU_MAX_DURATION")
if _gpu_duration_env is None:
    aoti_env = os.environ.get("ENABLE_AOTI")
    enable_aoti_default = aoti_env is None or aoti_env.strip().lower() in ("1", "true", "yes", "y", "on")
    GPU_MAX_DURATION = 600 if IS_SPACE and enable_aoti_default else 100
else:
    GPU_MAX_DURATION = int(_gpu_duration_env)


def _env_bool(name: str) -> Optional[bool]:
    val = os.environ.get(name)
    if val is None:
        return None
    return val.strip().lower() in ("1", "true", "yes", "y", "on")


_default_keep_model_loaded_env = _env_bool("KEEP_MODEL_LOADED_DEFAULT")
if _default_keep_model_loaded_env is None:
    DEFAULT_KEEP_MODEL_LOADED = not IS_SPACE
else:
    DEFAULT_KEEP_MODEL_LOADED = _default_keep_model_loaded_env


def _gpu_guard(fn):
    if spaces is None:
        return fn
    if GPU_MAX_DURATION > 0:
        try:
            return spaces.GPU(fn, duration=GPU_MAX_DURATION)
        except TypeError:
            return spaces.GPU(fn)
    return spaces.GPU(fn)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

# HF Spaces: disable SSR to avoid Node proxy/port issues.
os.environ.setdefault("GRADIO_SSR_MODE", "0")
# Mitigate CUDA memory fragmentation on small GPUs.
os.environ.setdefault("PYTORCH_ALLOC_CONF", "expandable_segments:True")
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

if TYPE_CHECKING:
    from heartlib import HeartMuLaGenPipeline
import gradio as gr
import re
try:
    from google import genai  # google-genai package
    from google.genai import types
except Exception:
    genai = None
    types = None
from openai import OpenAI
from transformers import BitsAndBytesConfig

# Global pipelines (managed lazily)

# Default model path (HF Spaces + local override via env)
MODEL_PATH = os.environ.get("MODEL_PATH", "./ckpt")

# LLM API Presets
LLM_PRESETS = {
    "gemini": {
        "name": "Google Gemini",
        "api_type": "gemini",
        "default_model": "gemini-2.0-flash-lite",
        "env_key": "GEMINI_API_KEY",
        "base_url": None,
    },
    "openai": {
        "name": "OpenAI",
        "api_type": "openai",
        "default_model": "gpt-4o-mini",
        "env_key": "OPENAI_API_KEY",
        "base_url": None,
    },
    "deepseek": {
        "name": "DeepSeek",
        "api_type": "openai",
        "default_model": "deepseek-chat",
        "env_key": "DEEPSEEK_API_KEY",
        "base_url": "https://api.deepseek.com",
    },
    "custom": {
        "name": "Custom OpenAI-Compatible",
        "api_type": "openai",
        "default_model": "custom-model",
        "env_key": None,
        "base_url": None,
    }
}

# Default example from assets
EXAMPLE_LYRICS = """[Intro]

[Verse]
The sun creeps in across the floor
I hear the traffic outside the door
The coffee pot begins to hiss
It is another morning just like this

[Prechorus]
The world keeps spinning round and round
Feet are planted on the ground
I find my rhythm in the sound

[Chorus]
Every day the light returns
Every day the fire burns
We keep on walking down this street
Moving to the same steady beat
It is the ordinary magic that we meet

[Verse]
The hours tick deeply into noon
Chasing shadows,chasing the moon
Work is done and the lights go low
Watching the city start to glow

[Bridge]
It is not always easy,not always bright
Sometimes we wrestle with the night
But we make it to the morning light

[Chorus]
Every day the light returns
Every day the fire burns
We keep on walking down this street
Moving to the same steady beat

[Outro]
Just another day
Every single day"""

EXAMPLE_TAGS = "piano,happy"

# Tag categories for selection
TAG_DATA = {
    "Gender": [
        "Male", "Female"
    ],
    "Genre": [
        "Pop", "Folk", "Ballad", "Electronic", "Rock", "Acoustic", "R&B",
        "Indie", "Dance", "Indie Pop", "J-Pop", "Hip-Hop", "Country",
        "Latin", "Alternative", "Christian", "Cantopop", "Gospel", "Soul",
        "Mandopop"
    ],
    "Instrument": [
        "Drums", "Piano", "Guitar", "Strings", "Synthesizer", "Bass",
        "Acoustic Guitar", "Keyboard", "Electronic Drums", "Vocals",
        "Drum Machine", "Electric Guitar", "Percussion", "Beat",
        "Orchestra", "Saxophone", "Accordion", "Voice", "String", "Vocal"
    ],
    "Mood": [
        "Melancholy", "Romantic", "Energetic", "Hopeful", "Dreamy",
        "Relaxed", "Sad", "Calm", "Cheerful", "Reflective", "Emotional",
        "Joyful", "Sentimental", "Uplifting", "Warm", "Peaceful", "Upbeat",
        "Gentle", "Nostalgic", "Epic"
    ],
    "Scene": [
        "Driving", "Road Trip", "Cafe", "Relaxing", "Wedding", "Meditation",
        "Workout", "Walking", "Alone", "Travel", "Reflection", "Rainy Day",
        "Night", "Church", "Coffee Shop", "Gym", "Gaming", "Study",
        "Dating", "Date"
    ],
    "Singer Timbre": [
        "Soft", "Clear", "Warm", "Gentle", "Smooth", "Sweet", "Emotional",
        "Mellow", "Powerful", "Youthful", "Bright", "Rough", "Raspy",
        "Melodic", "Deep", "Soulful", "Strong", "Energetic", "Breathy",
        "Passionate"
    ],
    "Topic": [
        "Love", "Relationship", "Hope", "Longing", "Loss", "Heartbreak",
        "Memory", "Reflection", "Life", "Faith", "Regret", "Freedom",
        "Breakup", "Nature", "Loneliness", "Dreams", "Nostalgia", "Romance",
        "Friendship", "Youth"
    ]
}

DATA_DIR = Path(os.environ.get("HEARTMULA_DATA_DIR", os.path.join(tempfile.gettempdir(), "heartmula_stream")))
DATA_DIR.mkdir(parents=True, exist_ok=True)
print(f"DATA_DIR = {DATA_DIR}")

# Clear ZeroGPU offload cache to avoid disk-full errors.
offload_path = "/data-nvme/zerogpu-offload"
try:
    if os.path.exists(offload_path):
        for name in os.listdir(offload_path):
            os.system(f"rm -rf {offload_path}/{name}")
except Exception as e:
    print(f"WARN: failed to clear ZeroGPU offload cache: {e}")

GRADIO_QUEUE_MAX_SIZE = int(os.environ.get("GRADIO_QUEUE_MAX_SIZE", "24"))
GRADIO_DEFAULT_CONCURRENCY = int(os.environ.get("GRADIO_DEFAULT_CONCURRENCY", "1"))
GPU_CONCURRENCY_LIMIT = int(os.environ.get("GRADIO_GPU_CONCURRENCY", "1"))
DEFAULT_DURATION_SEC = int(os.environ.get("DEFAULT_DURATION_SEC", "60" if IS_SPACE else "180"))
DEFAULT_QUANT_MODE = os.environ.get("DEFAULT_QUANT_MODE", "4bit" if IS_SPACE else "none")
DEFAULT_OFFLOAD_MODE = os.environ.get("DEFAULT_OFFLOAD_MODE", "aggressive" if IS_SPACE else "auto")
DEFAULT_GENERATION_MODE = os.environ.get(
    "DEFAULT_GENERATION_MODE",
    "Accelerated" if IS_SPACE else "Original (No Acceleration)",
)
DEFAULT_SPEED_SUBMODE = os.environ.get("DEFAULT_SPEED_SUBMODE", "Standard")
DEFAULT_PRESET = os.environ.get("DEFAULT_PRESET", "ZeroGPU AOTI FP8" if IS_SPACE else "Balanced")
MODEL_DETOKENIZE_INTERVAL_SEC = float(os.environ.get("MODEL_DETOKENIZE_INTERVAL_SEC", "29.76"))
AUDIO_SAMPLE_RATE = int(os.environ.get("AUDIO_SAMPLE_RATE", "48000"))
FRAME_MS = 80.0
BLOCK_FRAMES = max(1, int(round((MODEL_DETOKENIZE_INTERVAL_SEC * 1000.0) / FRAME_MS)))
BLOCK_SAMPLES = max(1, int(round(MODEL_DETOKENIZE_INTERVAL_SEC * AUDIO_SAMPLE_RATE)))
MIN_BUFFER_BLOCKS = int(os.environ.get("MIN_BUFFER_BLOCKS", "1"))
PREFETCH_BLOCKS = int(os.environ.get("PREFETCH_BLOCKS", "2"))
MAX_QUEUE_BLOCKS = int(os.environ.get("MAX_QUEUE_BLOCKS", "4"))
ALLOW_STREAMING_ZERO_GPU = _env_bool("ALLOW_STREAMING_ZERO_GPU")
STREAMING_ALLOWED = (not IS_SPACE) or bool(ALLOW_STREAMING_ZERO_GPU)

PRESET_CONFIGS = {
    "ZeroGPU Safe": {
        "duration": 60,
        "quant_mode": "4bit",
        "offload_mode": "aggressive",
        "keep_model_loaded": False,
        "temperature": 1.0,
        "topk": 50,
        "cfg_scale": 1.5,
        "generation_mode": "Accelerated",
        "speed_submode": "Standard",
    },
    "ZeroGPU AOTI FP8": {
        "duration": 60,
        "quant_mode": "fp8",
        "offload_mode": "aggressive",
        "keep_model_loaded": False,
        "temperature": 1.0,
        "topk": 50,
        "cfg_scale": 1.5,
        "generation_mode": "Accelerated",
        "speed_submode": "Standard",
    },
    "Balanced": {
        "duration": 120,
        "quant_mode": "4bit" if IS_SPACE else "none",
        "offload_mode": "auto",
        "keep_model_loaded": DEFAULT_KEEP_MODEL_LOADED,
        "temperature": 1.0,
        "topk": 50,
        "cfg_scale": 1.5,
        "generation_mode": "Accelerated",
        "speed_submode": "Standard",
    },
    "Quality": {
        "duration": 180,
        "quant_mode": "none",
        "offload_mode": "auto",
        "keep_model_loaded": DEFAULT_KEEP_MODEL_LOADED,
        "temperature": 0.9,
        "topk": 50,
        "cfg_scale": 2.0,
        "generation_mode": "Original (No Acceleration)",
        "speed_submode": "Standard",
    },
}
if DEFAULT_PRESET not in PRESET_CONFIGS:
    DEFAULT_PRESET = "Balanced"
_default_preset_config = PRESET_CONFIGS[DEFAULT_PRESET]
if "DEFAULT_DURATION_SEC" not in os.environ:
    DEFAULT_DURATION_SEC = _default_preset_config["duration"]
if "DEFAULT_QUANT_MODE" not in os.environ:
    DEFAULT_QUANT_MODE = _default_preset_config["quant_mode"]
if "DEFAULT_OFFLOAD_MODE" not in os.environ:
    DEFAULT_OFFLOAD_MODE = _default_preset_config["offload_mode"]
if "DEFAULT_GENERATION_MODE" not in os.environ:
    DEFAULT_GENERATION_MODE = _default_preset_config["generation_mode"]
if "DEFAULT_SPEED_SUBMODE" not in os.environ:
    DEFAULT_SPEED_SUBMODE = _default_preset_config["speed_submode"]


class ModelManager:
    def __init__(self, model_path: str, use_deepspeed_override: Optional[bool] = None):
        import torch
        from heartlib import HeartMuLaGenPipeline

        self.model_path = model_path
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
        self._gen_pipes: Dict[Tuple[str, str, str], "HeartMuLaGenPipeline"] = {}
        if use_deepspeed_override is None:
            self.use_deepspeed = os.getenv("USE_DEEPSPEED_INFERENCE", "0").lower() in ("1", "true", "yes")
        else:
            self.use_deepspeed = use_deepspeed_override
        self.ds_inference_config = self._make_ds_inference_config()
        self._HeartMuLaGenPipeline = HeartMuLaGenPipeline

    def _make_ds_inference_config(self) -> Dict[str, Any]:
        if not self.use_deepspeed:
            return {}
        mp_size = int(os.getenv("DEEPSPEED_TP_SIZE", os.getenv("WORLD_SIZE", "1")))
        replace_method = os.getenv("DEEPSPEED_REPLACE_METHOD", "auto")
        kernel_inject = os.getenv("DEEPSPEED_KERNEL_INJECT", "1").lower() in ("1", "true", "yes")
        return {
            "mp_size": mp_size,
            "dtype": self.dtype,
            "replace_method": replace_method,
            "replace_with_kernel_inject": kernel_inject,
        }

    def _make_bnb_config(self, quant_mode: str) -> Optional[BitsAndBytesConfig]:
        import torch
        if quant_mode == "none":
            return None
        if self.device.type != "cuda":
            raise gr.Error("Quantization requires CUDA.")
        if quant_mode == "fp8":
            return None
        if quant_mode == "4bit":
            quant_type = "nf4"
            try:
                major, _ = torch.cuda.get_device_capability()
                if major >= 10:
                    quant_type = "fp4"
            except Exception:
                pass
            return BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type=quant_type,
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_use_double_quant=True,
            )
        if quant_mode == "8bit":
            return BitsAndBytesConfig(load_in_8bit=True)
        raise gr.Error(f"Unknown quant mode: {quant_mode}")

    def get_gen_pipeline(self, version: str, codec_version: str, quant_mode: str) -> "HeartMuLaGenPipeline":
        key = (version, codec_version, quant_mode)
        if key not in self._gen_pipes:
            bnb_config = self._make_bnb_config(quant_mode)
            self._gen_pipes[key] = self._HeartMuLaGenPipeline.from_pretrained(
                self.model_path,
                device=self.device,
                dtype=self.dtype,
                version=version,
                codec_version=codec_version,
                bnb_config=bnb_config,
                torchao_quantize=quant_mode == "fp8",
                lazy_load=True,
                use_deepspeed=self.use_deepspeed,
                ds_inference_config=self.ds_inference_config,
            )
        return self._gen_pipes[key]


model_managers: Dict[str, ModelManager] = {}


def get_model_manager(use_acceleration: bool) -> ModelManager:
    key = "accelerated" if use_acceleration else "original"
    if key not in model_managers:
        os.makedirs(MODEL_PATH, exist_ok=True)
        download_models_if_needed(MODEL_PATH)
        use_deepspeed_override = None if use_acceleration else False
        model_managers[key] = ModelManager(MODEL_PATH, use_deepspeed_override=use_deepspeed_override)
    return model_managers[key]


def update_tag_string(*args):
    """
    Collects selected tags from all categories and joins them.
    args: list of lists (selections from each CheckboxGroup)
    """
    all_tags = []
    for selection in args:
        if selection:
            if isinstance(selection, list):
                all_tags.extend(selection)
            else:
                all_tags.append(selection)
    # Remove duplicates while preserving order
    seen = set()
    unique_tags = []
    for t in all_tags:
        if t not in seen:
            unique_tags.append(t)
            seen.add(t)
    return ",".join(unique_tags)


def process_lyrics_correct(content):
    """
    Correct lyrics processing logic aligned with training data.
    1. Removes timestamps [xx:xx].
    2. Split lines and strip whitespace from each line.
    3. Remove leading/trailing empty lines.
    4. Collapse multiple newlines (3 or more) into 2.
    """
    # 0. Convert to lowercase
    content = content.lower()

    # 1. Remove timestamps [00:12] or [00:12.34]
    content = re.sub(r'\[[^\]]*\d{1,2}:\d{2}[^\]]*\]', '', content)

    # 2. Split lines and strip whitespace from each line
    lines = [line.strip() for line in content.split('\n')]

    # 3. Remove leading empty lines
    while lines and lines[0] == '':
        lines.pop(0)

    # 4. Remove trailing empty lines
    while lines and lines[-1] == '':
        lines.pop()

    # 5. Join back to string
    content = '\n'.join(lines)

    # 6. Collapse multiple newlines (3 or more) into 2
    content = re.sub(r'\n{3,}', '\n\n', content)

    return content


def save_audio_to_wav(sample_rate: int, audio_np: np.ndarray, output_dir: Path) -> str:
    output_dir.mkdir(parents=True, exist_ok=True)
    unique_id = str(uuid.uuid4())
    wav_path = output_dir / f"{unique_id}.wav"
    audio_int16 = (audio_np * 32767).astype(np.int16)
    with wave.open(str(wav_path), "wb") as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        wav_file.writeframes(audio_int16.tobytes())
    return str(wav_path)


def convert_wav_to_mp3(wav_path: str, output_dir: Path) -> str:
    output_dir.mkdir(parents=True, exist_ok=True)
    unique_id = str(uuid.uuid4())
    mp3_path = output_dir / f"{unique_id}.mp3"
    cmd = [
        "ffmpeg",
        "-y",
        "-i",
        wav_path,
        "-codec:a",
        "libmp3lame",
        "-qscale:a",
        "2",
        str(mp3_path),
    ]
    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return str(mp3_path)


def check_models_exist(ckpt_dir):
    """Check if all required models exist"""
    required_files = [
        os.path.join(ckpt_dir, "gen_config.json"),
        os.path.join(ckpt_dir, "tokenizer.json"),
        os.path.join(ckpt_dir, "HeartCodec-oss"),
        os.path.join(ckpt_dir, "HeartMuLa-oss-3B"),
    ]

    for path in required_files:
        if not os.path.exists(path):
            return False
    return True


def download_models_if_needed(ckpt_dir):
    """Download models from ModelScope if not present"""
    if check_models_exist(ckpt_dir):
        print("=" * 50)
        print(f"✓ Checkpoints found in {ckpt_dir}")
        print("✓ Skipping download")
        print("=" * 50)
        return

    print("=" * 50)
    print("⬇  Starting model download from ModelScope")
    print("=" * 50)
    print("")

    from modelscope import snapshot_download

    # Download HeartMuLaGen (config and tokenizer)
    print("━" * 50)
    print("📦 [1/3] Downloading HeartMuLaGen config and tokenizer...")
    print("━" * 50)
    snapshot_download('HeartMuLa/HeartMuLaGen', local_dir=ckpt_dir)
    print("✓ HeartMuLaGen download completed")
    print("")

    # Download HeartMuLa-oss-3B
    print("━" * 50)
    print("📦 [2/3] Downloading HeartMuLa-oss-3B model...")
    print("━" * 50)
    snapshot_download('HeartMuLa/HeartMuLa-oss-3B',
                      local_dir=os.path.join(ckpt_dir, 'HeartMuLa-oss-3B'))
    print("✓ HeartMuLa-oss-3B download completed")
    print("")

    # Download HeartCodec-oss
    print("━" * 50)
    print("📦 [3/3] Downloading HeartCodec-oss model...")
    print("━" * 50)
    snapshot_download('HeartMuLa/HeartCodec-oss',
                      local_dir=os.path.join(ckpt_dir, 'HeartCodec-oss'))
    print("✓ HeartCodec-oss download completed")
    print("")

    print("=" * 50)
    print("✓ All models downloaded successfully!")
    print("=" * 50)
    print("")


def load_pipeline(model_path, version, codec_version, quant_mode, use_acceleration: bool):
    """Load HeartMuLa pipeline (lazy)"""
    manager = get_model_manager(use_acceleration)
    print(f"Using model from {model_path} on {manager.device}...")
    return manager.get_gen_pipeline(version, codec_version, quant_mode)


def _validate_generation_inputs(lyrics: str, tags: str) -> None:
    if not lyrics.strip():
        raise gr.Error("Please enter lyrics")
    if not tags.strip():
        raise gr.Error("Please enter tags")


def generate(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    output_format,
    keep_model_loaded,
    offload_mode,
    backend,
    use_acceleration,
):
    """Generate music"""
    import torch
    _validate_generation_inputs(lyrics, tags)

    max_audio_length_ms = int(duration_sec * 1000)

    try:
        if backend == "exllama_v2":
            raise gr.Error("ExLlamaV2 backend is not implemented yet.")

        pipe = load_pipeline(MODEL_PATH, version, codec_version, quant_mode, use_acceleration)
        output_path = os.path.join(DATA_DIR, f"gen_{uuid.uuid4().hex}.wav")

        with torch.inference_mode():
            pipe(
                {
                    "lyrics": lyrics,
                    "tags": tags,
                },
                max_audio_length_ms=max_audio_length_ms,
                save_path=output_path,
                topk=topk,
                temperature=temperature,
                cfg_scale=cfg_scale,
                keep_model_loaded=keep_model_loaded,
                offload_mode=offload_mode,
            )
        try:
            file_size = os.path.getsize(output_path)
            print(f"Generated file: {output_path} ({file_size} bytes)")
        except Exception as e:
            print(f"WARN: failed to stat output file {output_path}: {e}")

        if output_format == "mp3":
            mp3_path = convert_wav_to_mp3(output_path, DATA_DIR)
            return mp3_path

        return output_path

    except Exception as e:
        raise gr.Error(f"Generation error: {str(e)}")
    finally:
        if not keep_model_loaded and torch.cuda.is_available():
            torch.cuda.empty_cache()


@_gpu_guard
def generate_original(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    output_format,
    keep_model_loaded,
    offload_mode,
    backend,
):
    return generate(
        lyrics,
        tags,
        cfg_scale,
        duration_sec,
        temperature,
        topk,
        version,
        codec_version,
        quant_mode,
        output_format,
        keep_model_loaded,
        offload_mode,
        backend,
        False,
    )


@_gpu_guard
def generate_accelerated(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    output_format,
    keep_model_loaded,
    offload_mode,
    backend,
):
    return generate(
        lyrics,
        tags,
        cfg_scale,
        duration_sec,
        temperature,
        topk,
        version,
        codec_version,
        quant_mode,
        output_format,
        keep_model_loaded,
        offload_mode,
        backend,
        True,
    )


def _normalize_stream_chunk(chunk: np.ndarray) -> np.ndarray:
    chunk = np.nan_to_num(chunk, nan=0.0, posinf=0.0, neginf=0.0)
    return np.clip(chunk, -1.0, 1.0)


def generate_music_streaming(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    keep_model_loaded,
    offload_mode,
    backend,
    use_acceleration,
) -> Iterator[Tuple[int, np.ndarray]]:
    _validate_generation_inputs(lyrics, tags)
    if backend == "exllama_v2":
        raise gr.Error("ExLlamaV2 backend is not implemented yet.")
    pipe = load_pipeline(MODEL_PATH, version, codec_version, quant_mode, use_acceleration)
    max_audio_length_ms = int(duration_sec * 1000)
    for chunk in pipe.stream(
        {"lyrics": lyrics, "tags": tags},
        max_audio_length_ms=max_audio_length_ms,
        temperature=temperature,
        topk=topk,
        cfg_scale=cfg_scale,
        chunk_frames=BLOCK_FRAMES,
        keep_model_loaded=keep_model_loaded,
        offload_mode=offload_mode,
    ):
        if chunk.dim() == 2:
            chunk = chunk.squeeze(0)
        chunk_np = chunk.cpu().numpy()
        chunk_np = _normalize_stream_chunk(chunk_np)
        print(f"stream chunk: samples={chunk_np.shape[0]} sr={AUDIO_SAMPLE_RATE}")
        yield AUDIO_SAMPLE_RATE, chunk_np


def stream_generate(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    keep_model_loaded,
    offload_mode,
    backend,
    use_acceleration,
):
    try:
        _validate_generation_inputs(lyrics, tags)
        start_threshold = max(MIN_BUFFER_BLOCKS, PREFETCH_BLOCKS)
        queue = []
        started = False
        print("block stream start:", f"block_sec={MODEL_DETOKENIZE_INTERVAL_SEC}", f"duration_sec={duration_sec}")
        for sr, chunk_np in generate_music_streaming(
            lyrics=lyrics,
            tags=tags,
            cfg_scale=cfg_scale,
            duration_sec=duration_sec,
            temperature=temperature,
            topk=topk,
            version=version,
            codec_version=codec_version,
            quant_mode=quant_mode,
            keep_model_loaded=keep_model_loaded,
            offload_mode=offload_mode,
            backend=backend,
            use_acceleration=use_acceleration,
        ):
            chunk_np = chunk_np.astype("float32", copy=False)
            queue.append(chunk_np)
            if not started and len(queue) < start_threshold and len(queue) < MAX_QUEUE_BLOCKS:
                continue
            if not started:
                started = True
                print(f"block stream start playback: buffered_blocks={len(queue)}")
            while queue:
                block = queue.pop(0)
                print(f"block stream yield: samples={block.shape[0]}")
                yield sr, block
    except Exception as e:
        raise gr.Error(f"Streaming error: {str(e)}")


@_gpu_guard
def stream_generate_accelerated(
    lyrics,
    tags,
    cfg_scale,
    duration_sec,
    temperature,
    topk,
    version,
    codec_version,
    quant_mode,
    keep_model_loaded,
    offload_mode,
    backend,
):
    if IS_SPACE and not STREAMING_ALLOWED:
        raise gr.Error("Streaming is disabled on ZeroGPU. Use Standard mode or set ALLOW_STREAMING_ZERO_GPU=1.")
    return stream_generate(
        lyrics,
        tags,
        cfg_scale,
        duration_sec,
        temperature,
        topk,
        version,
        codec_version,
        quant_mode,
        keep_model_loaded,
        offload_mode,
        backend,
        True,
    )


def generate_lyrics(theme, tags, language, api_choice, api_key_input, custom_base_url, custom_model, progress=gr.Progress()):
    """Generate lyrics using selected LLM API"""

    if not theme.strip():
        raise gr.Error("Please enter a theme")

    progress(0.1, desc="Preparing request...")

    # Get preset configuration
    if api_choice not in LLM_PRESETS:
        raise gr.Error(f"Unknown API choice: {api_choice}")

    preset = LLM_PRESETS[api_choice]

    # Determine API key
    api_key = api_key_input.strip() if api_key_input and api_key_input.strip() else None
    if not api_key and preset["env_key"]:
        api_key = os.environ.get(preset["env_key"])

    if not api_key:
        raise gr.Error(f"No API key provided. Please enter your API key in the field above.")

    # Determine base URL and model
    base_url = custom_base_url.strip() if custom_base_url and custom_base_url.strip() else preset["base_url"]
    model_name = custom_model.strip() if custom_model and custom_model.strip() else preset["default_model"]


    # Language mapping
    language_names = {
        "en": "English",
        "zh": "Chinese",
        "jp": "Japanese",
        "kr": "Korean",
        "sp": "Spanish"
    }
    lang_name = language_names.get(language, "English")

    # Tags processing
    tags_text = tags.strip() if tags.strip() else "pop, emotional"

    # Create prompt
    prompt = f"""You are a professional songwriter. Generate song lyrics based on the following requirements:

**Theme**: {theme}
**Music Style/Tags**: {tags_text}
**Language**: {lang_name}

**Format Requirements** (CRITICAL):
1. Use lowercase for all lyrics text (except structure tags which are in brackets)
2. Include proper song structure tags: [Intro], [Verse], [Prechorus], [Chorus], [Bridge], [Outro]
3. Each structure tag should be on its own line
4. Separate different sections with a blank line (one empty line between sections)
5. NO timestamps like [00:12] - only structure tags allowed
6. Keep lyrics concise and suitable for a 3-4 minute song

**Structure Guidelines**:
- [Intro]: Optional, 1-2 lines if included
- [Verse]: Story-telling part, 4-6 lines, can repeat with different lyrics
- [Prechorus]: Optional, 2-4 lines, builds tension before chorus
- [Chorus]: Main hook, catchy and repetitive, 4-6 lines
- [Bridge]: Optional, provides contrast, 4-6 lines
- [Outro]: Closing, 1-2 lines

**Example Format**:
```
[Intro]

[Verse]
the sun creeps in across the floor
i hear the traffic outside the door
the coffee pot begins to hiss
it is another morning just like this

[Chorus]
every day the light returns
every day the fire burns
we keep on walking down this street
moving to the same steady beat
```

Now generate lyrics in {lang_name} based on the theme "{theme}" with style "{tags_text}".
Output ONLY the lyrics with structure tags, no explanations.
"""

    try:
        if preset["api_type"] == "gemini":
            if genai is None or types is None:
                raise gr.Error("Gemini SDK not available. Install `google-genai` or switch provider.")
            # Gemini API
            progress(0.3, desc=f"Connecting to {preset['name']}...")

            # Set proxy if needed
            try:
                proxy_host = os.environ.get("PROXY_HOST", "127.0.0.1")
                proxy_port = os.environ.get("PROXY_PORT", "7890")
                os.environ['http_proxy'] = f'http://{proxy_host}:{proxy_port}'
                os.environ['https_proxy'] = f'http://{proxy_host}:{proxy_port}'
            except Exception:
                pass  # Proxy is optional

            client = genai.Client(api_key=api_key)

            progress(0.5, desc=f"Generating lyrics with {preset['name']}...")

            response = client.models.generate_content(
                model=model_name,
                contents=[
                    types.Content(
                        role='user',
                        parts=[types.Part(text=prompt)]
                    )
                ],
                config=types.GenerateContentConfig(
                    temperature=0.8,
                    max_output_tokens=2000
                )
            )

            generated_lyrics = response.text.strip()

        elif preset["api_type"] == "openai":
            # OpenAI-compatible API (OpenAI, DeepSeek, Custom)
            progress(0.3, desc=f"Connecting to {preset['name']}...")

            # Create client with optional base_url
            if base_url:
                client = OpenAI(api_key=api_key, base_url=base_url)
            else:
                client = OpenAI(api_key=api_key)

            progress(0.5, desc=f"Generating lyrics with {preset['name']}...")

            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a professional songwriter who creates well-structured lyrics."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.8,
                max_tokens=2000
            )

            generated_lyrics = response.choices[0].message.content.strip()

        else:
            raise gr.Error(f"Unknown API type: {preset['api_type']}")

        progress(0.9, desc="Processing response...")

        # Clean up the response (remove markdown code blocks if present)
        if generated_lyrics.startswith("```"):
            lines = generated_lyrics.split("\n")
            generated_lyrics = "\n".join(lines[1:-1]) if len(lines) > 2 else generated_lyrics

        # Apply our lyrics processing function to ensure format consistency
        generated_lyrics = process_lyrics_correct(generated_lyrics)

        progress(1.0, desc="Done!")

        return generated_lyrics

    except Exception as e:
        raise gr.Error(f"Lyrics generation error: {str(e)}")


def create_ui():
    """Create Gradio UI"""
    speed_submode_value = DEFAULT_SPEED_SUBMODE if STREAMING_ALLOWED else "Standard"
    show_stream_default = (
        STREAMING_ALLOWED
        and DEFAULT_GENERATION_MODE != "Original (No Acceleration)"
        and speed_submode_value == "Streaming"
    )

    with gr.Blocks(title="HeartMuLa Music Generation") as demo:
        gr.Markdown("# HeartMuLa Music Generation")
        gr.Markdown("Generate music from lyrics and style tags")
        gr.Markdown("Tip: start with the **ZeroGPU Safe** preset for reliable generation on small GPUs.")

        with gr.Tabs():
            # Tab 1: Music Generation
            with gr.Tab("Music Generation"):
                with gr.Row():
                    with gr.Column():
                        lyrics = gr.Textbox(
                            label="Lyrics",
                            lines=15,
                            value=EXAMPLE_LYRICS,
                            placeholder="Enter lyrics here..."
                        )

                        # Add format button
                        format_btn = gr.Button("Format Lyrics", size="sm")

                        # Tag Selection
                        gr.Markdown("### Tags")

                        tags = gr.Textbox(
                            label="Selected Tags (comma-separated)",
                            value=EXAMPLE_TAGS,
                            placeholder="e.g., piano,happy,pop",
                            lines=2
                        )

                        # Tag categories in accordion
                        tag_checkboxes = []
                        with gr.Accordion("Tag Categories (Click to Expand)", open=False):
                            with gr.Row():
                                with gr.Column():
                                    t1 = gr.CheckboxGroup(choices=TAG_DATA["Gender"], label="Gender")
                                    tag_checkboxes.append(t1)
                                    t2 = gr.CheckboxGroup(choices=TAG_DATA["Genre"], label="Genre")
                                    tag_checkboxes.append(t2)
                                with gr.Column():
                                    t3 = gr.CheckboxGroup(choices=TAG_DATA["Instrument"], label="Instrument")
                                    tag_checkboxes.append(t3)
                                    t4 = gr.CheckboxGroup(choices=TAG_DATA["Mood"], label="Mood")
                                    tag_checkboxes.append(t4)
                                with gr.Column():
                                    t5 = gr.CheckboxGroup(choices=TAG_DATA["Scene"], label="Scene")
                                    tag_checkboxes.append(t5)
                                    t6 = gr.CheckboxGroup(choices=TAG_DATA["Singer Timbre"], label="Singer Timbre")
                                    tag_checkboxes.append(t6)
                                with gr.Column():
                                    t7 = gr.CheckboxGroup(choices=TAG_DATA["Topic"], label="Topic")
                                    tag_checkboxes.append(t7)

                        gr.Markdown("### Quick Presets")
                        with gr.Row():
                            preset_selector = gr.Dropdown(
                                choices=list(PRESET_CONFIGS.keys()),
                                value=DEFAULT_PRESET,
                                label="Preset"
                            )
                            apply_preset_btn = gr.Button("Apply Preset", size="sm")

                        # Generation parameters
                        with gr.Row():
                            cfg_scale = gr.Slider(0.0, 3.0, value=1.5, step=0.1, label="CFG Scale")
                            duration = gr.Slider(10, 300, value=DEFAULT_DURATION_SEC, step=10, label="Duration (sec)")

                        with gr.Row():
                            temperature = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Temperature")
                            topk = gr.Slider(1, 100, value=50, step=1, label="Top-K")

                        with gr.Accordion("Advanced Settings", open=False):
                            backend = gr.Dropdown(
                                choices=[("HF Pipeline", "hf"), ("ExLlamaV2 (Not Implemented)", "exllama_v2")],
                                value="hf",
                                label="Backend"
                            )
                            version = gr.Dropdown(
                                choices=["3B", "7B", "RL-oss-3B-20260123"],
                                value="3B",
                                label="Model Version"
                            )
                            codec_version = gr.Dropdown(
                                choices=["oss", "oss-20260123"],
                                value="oss",
                                label="Codec Version"
                            )
                            quant_mode = gr.Dropdown(
                                choices=[
                                    ("None", "none"),
                                    ("4-bit (NF4/FP4)", "4bit"),
                                    ("8-bit", "8bit"),
                                    ("FP8 (TorchAO)", "fp8"),
                                ],
                                value=DEFAULT_QUANT_MODE,
                                label="Quantization (ZeroGPU recommended: 4-bit)"
                            )
                            keep_model_loaded = gr.Checkbox(
                                value=DEFAULT_KEEP_MODEL_LOADED,
                                label="Keep Model Loaded"
                            )
                            offload_mode = gr.Dropdown(
                                choices=["auto", "aggressive"],
                                value=DEFAULT_OFFLOAD_MODE,
                                label="Offload Mode"
                            )
                            output_format = gr.Radio(
                                choices=[("WAV", "wav"), ("MP3", "mp3")],
                                value="wav",
                                label="Output Format",
                                visible=not show_stream_default,
                            )
                            gr.Markdown(
                                f"Streaming is block-based: {MODEL_DETOKENIZE_INTERVAL_SEC:.2f}s per block "
                                f"({BLOCK_FRAMES} frames, {BLOCK_SAMPLES} samples)."
                            )

                        gr.Markdown("### 🚀 Generation")

                        generation_mode = gr.Radio(
                            choices=["Original (No Acceleration)", "Accelerated"],
                            value=DEFAULT_GENERATION_MODE,
                            label="Generation Mode",
                        )

                        speed_choices = [("Standard", "Standard")]
                        if STREAMING_ALLOWED:
                            speed_choices.append(("Block Streaming (Preview)", "Streaming"))

                        speed_submode = gr.Radio(
                            choices=speed_choices,
                            value=speed_submode_value,
                            label="Accelerated Options",
                            visible=DEFAULT_GENERATION_MODE != "Original (No Acceleration)",
                        )

                        btn_original = gr.Button(
                            "🎼 Generate Music (Original)",
                            variant="primary",
                            size="lg",
                            visible=DEFAULT_GENERATION_MODE == "Original (No Acceleration)",
                        )
                        btn_accel = gr.Button(
                            "🎼 Generate Music (Accelerated)",
                            variant="primary",
                            size="lg",
                            visible=DEFAULT_GENERATION_MODE != "Original (No Acceleration)"
                            and not show_stream_default,
                        )
                        btn_stream = gr.Button(
                            "🎼 Generate Music (Block Streaming)",
                            variant="primary",
                            size="lg",
                            visible=show_stream_default,
                        )
                        cancel_stream_btn = gr.Button(
                            "Cancel Streaming",
                            variant="secondary",
                            size="lg",
                            visible=show_stream_default,
                        )
                        cancel_state = gr.State()

                    with gr.Column():
                        # Notice section
                        with gr.Accordion("Usage Notice", open=True):
                            gr.Markdown("""
### ZeroGPU Tips
- Default preset is **ZeroGPU AOTI FP8** for fastest H200 path
- Prefer **4-bit quantization** if FP8 or AOTI is unavailable
- Keep durations short (<= 60s) for faster first results
- **Streaming is block-based**, not token streaming: each block is ~29.76s
- Use Streaming for preview; Standard mode outputs a full downloadable file
- Try **FP8 + AOTI** (preset: ZeroGPU AOTI FP8) for the fastest H200 path
- ZeroGPU does not support generator streaming in forked GPU; streaming is for local deploy only

### Lyrics Format Requirements

**Automatic Processing:**
1. All text will be converted to **lowercase**
2. Timestamps (e.g., [00:12]) will be **automatically removed**
3. Leading/trailing whitespace on each line will be **stripped**
4. Leading/trailing empty lines will be **removed**
5. Multiple consecutive empty lines (3+) will be **collapsed to 2**

**Recommended Format:**
- Use standard song structure tags: `[Intro]`, `[Verse]`, `[Chorus]`, `[Bridge]`, `[Outro]`, etc.
- Separate sections with **blank lines**
- Case doesn't matter (will be auto-converted)

**Example:**
```
[Intro]

[Verse]
The sun creeps in across the floor
I hear the traffic outside the door

[Chorus]
Every day the light returns
Every day the fire burns
```

---

### Tags Format
- Use **commas** to separate multiple tags: `piano,happy,pop`
- Tags influence the style and mood of the generated music
- Select from categories below or type directly
                            """)

                        output_audio_file = gr.Audio(
                            label="Generated Music (Full)",
                            type="filepath",
                            interactive=False,
                            visible=not show_stream_default,
                        )
                        stream_audio = gr.Audio(
                            label="Block Streaming Audio (Preview)",
                            streaming=True,
                            autoplay=True,
                            type="numpy",
                            format="wav",
                            interactive=False,
                            visible=show_stream_default,
                        )

                # Event handlers for tag selection
                for cb in tag_checkboxes:
                    cb.change(fn=update_tag_string, inputs=tag_checkboxes, outputs=tags)

                # Button callbacks
                format_btn.click(
                    fn=process_lyrics_correct,
                    inputs=[lyrics],
                    outputs=[lyrics]
                )

                def update_visibility(gen_mode, spd_mode):
                    if gen_mode == "Original (No Acceleration)":
                        return (
                            gr.update(visible=False),  # speed_submode
                            gr.update(visible=True),  # btn_original
                            gr.update(visible=False),  # btn_accel
                            gr.update(visible=False),  # btn_stream
                            gr.update(visible=False),  # cancel_stream_btn
                            gr.update(visible=True),  # output_format
                            gr.update(visible=True),  # output_audio_file
                            gr.update(visible=False),  # stream_audio
                        )
                    show_stream = STREAMING_ALLOWED and spd_mode == "Streaming"
                    return (
                        gr.update(visible=True),  # speed_submode
                        gr.update(visible=False),  # btn_original
                        gr.update(visible=not show_stream),  # btn_accel
                        gr.update(visible=show_stream),  # btn_stream
                        gr.update(visible=show_stream),  # cancel_stream_btn
                        gr.update(visible=not show_stream),  # output_format
                        gr.update(visible=not show_stream),  # output_audio_file
                        gr.update(visible=show_stream),  # stream_audio
                    )

                def apply_preset(preset_name):
                    preset = PRESET_CONFIGS.get(preset_name, PRESET_CONFIGS[DEFAULT_PRESET])
                    return (
                        gr.update(value=preset["cfg_scale"]),
                        gr.update(value=preset["duration"]),
                        gr.update(value=preset["temperature"]),
                        gr.update(value=preset["topk"]),
                        gr.update(value=preset["quant_mode"]),
                        gr.update(value=preset["keep_model_loaded"]),
                        gr.update(value=preset["offload_mode"]),
                        gr.update(value=preset["generation_mode"]),
                        gr.update(value=preset["speed_submode"]),
                    )

                generation_mode.change(
                    fn=update_visibility,
                    inputs=[generation_mode, speed_submode],
                    outputs=[
                        speed_submode,
                        btn_original,
                        btn_accel,
                        btn_stream,
                        cancel_stream_btn,
                        output_format,
                        output_audio_file,
                        stream_audio,
                    ],
                )
                speed_submode.change(
                    fn=update_visibility,
                    inputs=[generation_mode, speed_submode],
                    outputs=[
                        speed_submode,
                        btn_original,
                        btn_accel,
                        btn_stream,
                        cancel_stream_btn,
                        output_format,
                        output_audio_file,
                        stream_audio,
                    ],
                )

                preset_event = apply_preset_btn.click(
                    fn=apply_preset,
                    inputs=[preset_selector],
                    outputs=[
                        cfg_scale,
                        duration,
                        temperature,
                        topk,
                        quant_mode,
                        keep_model_loaded,
                        offload_mode,
                        generation_mode,
                        speed_submode,
                    ],
                )

                preset_event.then(
                    fn=update_visibility,
                    inputs=[generation_mode, speed_submode],
                    outputs=[
                        speed_submode,
                        btn_original,
                        btn_accel,
                        btn_stream,
                        cancel_stream_btn,
                        output_format,
                        output_audio_file,
                        stream_audio,
                    ],
                )

                btn_original.click(
                    fn=generate_original,
                    inputs=[
                        lyrics,
                        tags,
                        cfg_scale,
                        duration,
                        temperature,
                        topk,
                        version,
                        codec_version,
                        quant_mode,
                        output_format,
                        keep_model_loaded,
                        offload_mode,
                        backend,
                    ],
                    outputs=[output_audio_file],
                    concurrency_id="gpu_queue",
                    concurrency_limit=GPU_CONCURRENCY_LIMIT,
                )

                btn_accel.click(
                    fn=generate_accelerated,
                    inputs=[
                        lyrics,
                        tags,
                        cfg_scale,
                        duration,
                        temperature,
                        topk,
                        version,
                        codec_version,
                        quant_mode,
                        output_format,
                        keep_model_loaded,
                        offload_mode,
                        backend,
                    ],
                    outputs=[output_audio_file],
                    concurrency_id="gpu_queue",
                    concurrency_limit=GPU_CONCURRENCY_LIMIT,
                )

                stream_event = btn_stream.click(
                    fn=stream_generate_accelerated,
                    inputs=[
                        lyrics,
                        tags,
                        cfg_scale,
                        duration,
                        temperature,
                        topk,
                        version,
                        codec_version,
                        quant_mode,
                        keep_model_loaded,
                        offload_mode,
                        backend,
                    ],
                    outputs=[stream_audio],
                    concurrency_id="gpu_queue",
                    concurrency_limit=GPU_CONCURRENCY_LIMIT,
                )

                cancel_stream_btn.click(
                    fn=lambda: None,
                    inputs=None,
                    outputs=[cancel_state],
                    cancels=[stream_event],
                )

            # Tab 2: Lyrics Generation
            with gr.Tab("Lyrics Generation"):
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("### Generate Lyrics with AI")

                        api_selector = gr.Radio(
                            choices=[
                                ("Google Gemini", "gemini"),
                                ("OpenAI", "openai"),
                                ("DeepSeek", "deepseek"),
                                ("Custom (OpenAI-compatible)", "custom")
                            ],
                            value="gemini",
                            label="Select LLM Provider"
                        )

                        with gr.Accordion("API Configuration", open=True):
                            api_key_input = gr.Textbox(
                                label="API Key (Required)",
                                type="password",
                                placeholder="Enter your API key or set environment variable",
                                info="Will use environment variable if not provided here"
                            )

                            custom_base_url = gr.Textbox(
                                label="Custom Base URL (Optional)",
                                placeholder="e.g., https://api.your-provider.com/v1",
                                info="Leave empty to use default. For custom providers only.",
                                visible=False
                            )

                            custom_model = gr.Textbox(
                                label="Model Name (Optional)",
                                placeholder="e.g., gpt-4o, deepseek-chat",
                                info="Leave empty to use recommended default",
                                visible=False
                            )

                        def update_custom_fields(choice):
                            """Show/hide custom fields based on API choice"""
                            if choice == "custom":
                                return gr.update(visible=True), gr.update(visible=True)
                            elif choice == "deepseek":
                                return gr.update(visible=False), gr.update(visible=True)
                            else:
                                return gr.update(visible=False), gr.update(visible=False)

                        api_selector.change(
                            fn=update_custom_fields,
                            inputs=[api_selector],
                            outputs=[custom_base_url, custom_model]
                        )

                        theme_input = gr.Textbox(
                            label="Theme",
                            placeholder="e.g., Love lost in the city, Dreams and hope, Rainy day memories...",
                            lines=2
                        )

                        tags_gen = gr.Textbox(
                            label="Music Style/Tags",
                            placeholder="e.g., piano, melancholy, pop",
                            value="pop,emotional"
                        )

                        language_select = gr.Radio(
                            choices=[
                                ("English", "en"),
                                ("中文 (Chinese)", "zh"),
                                ("日本語 (Japanese)", "jp"),
                                ("한국어 (Korean)", "kr"),
                                ("Español (Spanish)", "sp")
                            ],
                            value="en",
                            label="Language"
                        )

                        generate_lyrics_btn = gr.Button(
                            "Generate Lyrics",
                            variant="primary",
                            size="lg"
                        )

                    with gr.Column():
                        with gr.Accordion("How to Use", open=True):
                            gr.Markdown("""
### How to Generate Lyrics

**Theme**: Describe your song's story or emotion
- Examples: "Lost love in Tokyo", "Overcoming obstacles", "Summer road trip"

**Music Style/Tags**: Define mood and genre
- Examples: "piano,melancholy,ballad", "upbeat,electronic,dance"

**Tips**
- Generated lyrics follow standard song structure ([Intro], [Verse], [Chorus], etc.)
- Edit lyrics before using for music generation
- Be specific with themes for better results

                            """)

                        generated_lyrics_output = gr.Textbox(
                            label="Generated Lyrics",
                            lines=20,
                            placeholder="Generated lyrics will appear here...",
                            interactive=False
                        )

                        copy_to_music_gen = gr.Button(
                            "Copy to Music Generation Tab",
                            size="sm"
                        )

                # Lyrics generation button callback
                generate_lyrics_btn.click(
                    fn=generate_lyrics,
                    inputs=[theme_input, tags_gen, language_select, api_selector, api_key_input, custom_base_url, custom_model],
                    outputs=[generated_lyrics_output]
                )

                # Copy lyrics to music generation tab
                def copy_lyrics(lyrics_text):
                    return lyrics_text

                copy_to_music_gen.click(
                    fn=copy_lyrics,
                    inputs=[generated_lyrics_output],
                    outputs=[lyrics]
                )

    return demo


demo = create_ui()
demo.queue(max_size=GRADIO_QUEUE_MAX_SIZE, default_concurrency_limit=GRADIO_DEFAULT_CONCURRENCY)

if __name__ == "__main__":
    port = int(os.environ.get("PORT", "7860"))
    demo.launch(
        server_name="0.0.0.0",
        server_port=port,
        allowed_paths=[str(DATA_DIR), "/tmp"],
        ssr_mode=False,
    )