"""
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""

import os
import sys
import time
import gc
import random
import warnings
import typing as tp
from pathlib import Path
from tempfile import NamedTemporaryFile
import argparse
import subprocess

import torch
import gradio as gr
import librosa
from mutagen.mp4 import MP4

# Importy z lokalnych modułów
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
from audiocraft.utils import utils
import numpy as np

# Importy z modułów projektu
import modules.user_history
from modules.version_info import versions_html, commit_hash, get_xformers_version
from modules.gradio import *
from modules.file_utils import (
    get_file_parts,
    get_filename_from_filepath,
    convert_title_to_filename,
    get_unique_file_path,
    delete_file,
    download_and_save_image,
    download_and_save_file
)
from modules.constants import (
    IS_SHARED_SPACE,
    HF_REPO_ID,
    TMPDIR,
    HF_API_TOKEN
)
from modules.storage import upload_files_to_repo

# Inicjalizacja zmiennych globalnych
MODEL = None
MODELS = None
INTERRUPTED = False
UNLOAD_MODEL = False
MOVE_TO_CPU = False
MAX_PROMPT_INDEX = 0
git = os.environ.get('GIT', "git")

# Konfiguracja środowiska GPU (opcjonalne, dostosuj do swoich potrzeb)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
os.environ['USE_FLASH_ATTENTION'] = '1'
os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'

# Funkcja przerwania generacji
def interrupt_callback():
    return INTERRUPTED

def interrupt():
    global INTERRUPTING
    INTERRUPTING = True

# Klasa do czyszczenia plików tymczasowych
class FileCleaner:
    def __init__(self, file_lifetime: float = 3600):
        self.file_lifetime = file_lifetime
        self.files = []

    def add(self, path: tp.Union[str, Path]):
        self._cleanup()
        self.files.append((time.time(), Path(path)))

    def _cleanup(self):
        now = time.time()
        for time_added, path in list(self.files):
            if now - time_added > self.file_lifetime:
                if path.exists():
                    path.unlink()
                self.files.pop(0)
            else:
                break

# Funkcja ping (używana do sprawdzania dostępności API)
def ping():
    return True

# Funkcja przełączania źródła audio (mikrofon/plik)
def toggle_audio_src(choice):
    if choice == "mic":
        return gr.update(source="microphone", value=None, label="Microphone")
    else:
        return gr.update(source="upload", value=None, label="File")

# Funkcja generowania wideo z waveform
def get_waveform(*args, **kwargs):
    be = time.time()
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        out = gr.make_waveform(*args, **kwargs)
        print("Make a video took", time.time() - be)
        return out

# Funkcja ładowania modelu
def load_model(version, progress=gr.Progress(track_tqdm=True)):
    global MODEL, MODELS, UNLOAD_MODEL
    print(f"Loading model {version}")

    with tqdm(total=100, desc=f"Loading model '{version}'", unit="step") as pbar:
        if MODELS is None:
            pbar.update(50)
            result = MusicGen.get_pretrained(version)
            pbar.update(50)
            return result
        else:
            t1 = time.monotonic()
            if MODEL is not None:
                MODEL.to('cpu')
                print(f"Previous model moved to CPU in {time.monotonic() - t1:.2f}s")
                pbar.update(30)
                t1 = time.monotonic()
            if MODELS.get(version) is None:
                print(f"Loading model {version} from disk")
                result = MusicGen.get_pretrained(version)
                MODELS[version] = result
                print(f"Model loaded in {time.monotonic() - t1:.2f}s")
                pbar.update(70)
                return result
            result = MODELS[version].to('cuda')
            print(f"Cached model loaded in {time.monotonic() - t1:.2f}s")
            pbar.update(100)
            return result

# Funkcja pobierania melodii z pliku
def get_melody(melody_filepath):
    audio_data = list(librosa.load(melody_filepath, sr=None))
    audio_data[0], audio_data[1] = audio_data[1], audio_data[0]  # Swap channels
    melody = tuple(audio_data)
    return melody

# Funkcja pobierania tagu Git
def git_tag():
    try:
        return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
    except Exception:
        try:
            changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
            with changelog_md.open(encoding="utf-8") as file:
                return next((line.strip() for line in file if line.strip()), "<none>")
        except Exception:
            return "<none>"

# Funkcja ładowania obrazka tła
def load_background_filepath(video_orientation):
    if video_orientation == "Landscape":
        return "./assets/background.png"
    else:
        return "./assets/background_portrait.png"

# Funkcja aktualizacji UI po wyborze melodii
def load_melody_filepath(melody_filepath, title, assigned_model, topp, temperature, cfg_coef, segment_length=30):
    symbols = ['_', '.', '-']
    MAX_OVERLAP = int(segment_length // 2) - 1

    if (melody_filepath is None) or (melody_filepath == ""):
        return (
            title,
            gr.update(maximum=0, value=-1),
            gr.update(value="medium", interactive=True),
            gr.update(value=topp),
            gr.update(value=temperature),
            gr.update(value=cfg_coef),
            gr.update(maximum=MAX_OVERLAP)
        )

    if (title is None) or ("MusicGen" in title) or (title == ""):
        melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
        for symbol in symbols:
            melody_name = melody_name.replace(symbol, ' ').title()
        topp = 800
        temperature = 0.5
        cfg_coef = 3.25
    else:
        melody_name = title

    if "melody" not in assigned_model:
        assigned_model = "melody-large"

    print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}, Model: {assigned_model}\n")

    melody = get_melody(melody_filepath)
    sr, melody_data = melody[0], melody[1]
    segment_samples = sr * segment_length
    total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
    print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
    global MAX_PROMPT_INDEX
    MAX_PROMPT_INDEX = total_melodys

    return (
        gr.update(value=melody_name),
        gr.update(maximum=MAX_PROMPT_INDEX, value=-1),
        gr.update(value=assigned_model, interactive=True),
        gr.update(value=topp),
        gr.update(value=temperature),
        gr.update(value=cfg_coef),
        gr.update(maximum=MAX_OVERLAP)
    )

# Główna funkcja generacji muzyki i wideo
def predict(
    model, text, melody_filepath=None, duration=10, dimension=2, topk=200, topp=0,
    temperature=1.0, cfg_coef=4.0, background=None, title="UnlimitedMusicGen",
    settings_font="./assets/arial.ttf", settings_font_color="#c87f05", seed=-1,
    overlap=1, prompt_index=0, include_title=True, include_settings=True,
    harmony_only=False, profile=None, segment_length=30, settings_font_size=28,
    settings_animate_waveform=False, video_orientation="Landscape",
    excerpt_duration=3.5, return_history_json=False, progress=gr.Progress(track_tqdm=True)
):
    global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
    output_segments = None
    melody_name = "Not Used"
    melody_extension = "Not Used"
    melody = None

    if melody_filepath in ["None", ""]:
        melody_filepath = None

    # Pobieranie melodii z URL (jeśli podano)
    if melody_filepath and melody_filepath.startswith(("http://", "https://")):
        username = profile if isinstance(profile, str) else (
            profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
            else "default_user" if profile is None else profile
        )
        melody_filepath = download_and_save_file(
            melody_filepath,
            Path(TMPDIR) / str(username),
            HF_API_TOKEN
        )

    # Pobieranie tła z URL (jeśli podano)
    if background is None or background in ["None", ""]:
        background = load_background_filepath(video_orientation)

    if background.startswith(("http://", "https://")):
        username = profile if isinstance(profile, str) else (
            profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
            else "default_user" if profile is None else profile
        )
        background = download_and_save_image(
            background,
            Path(TMPDIR) / str(username),
            HF_API_TOKEN
        )

    # Pobieranie melodii z pliku
    if melody_filepath:
        melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
        melody = get_melody(melody_filepath)

    INTERRUPTED = False
    INTERRUPTING = False

    # Walidacja parametrów
    if temperature < 0:
        temperature = 0.1
        raise gr.Error("Temperature must be >= 0.")
    if topk < 0:
        topk = 1
        raise gr.Error("Topk must be non-negative.")
    if topp < 0:
        topp = 1
        raise gr.Error("Topp must be non-negative.")

    # Czyszczenie GPU przy zmianie modelu
    if MODEL is not None and model not in MODEL.name:
        print(f"Switching model from {MODEL.name} to {model}. Cleaning up resources.")
        del MODEL
        torch.cuda.empty_cache()
        gc.collect()
        MODEL = None

    try:
        if MODEL is None or model not in MODEL.name:
            MODEL = load_model(model)
        else:
            if MOVE_TO_CPU:
                MODEL.to('cuda')
    except Exception as e:
        raise gr.Error(f"Error loading model '{model}': {str(e)}. Try a different model.")

    # Ograniczenia parametrów
    duration = min(duration, 720)
    overlap = min(overlap, 15)

    output = None
    segment_duration = duration
    initial_duration = duration
    output_segments = []

    # Pętla generacji (obsługa długich utworów)
    while duration > 0:
        if not output_segments:
            segment_duration = min(segment_duration, MODEL.lm.cfg.dataset.segment_duration)
        else:
            segment_duration = min(
                duration + overlap,
                MODEL.lm.cfg.dataset.segment_duration
            )
        segment_duration = min(segment_duration, segment_length + overlap)

        if seed < 0:
            seed = random.randint(0, 0xffff_ffff_ffff)
        torch.manual_seed(seed)

        print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')

        # Ustawienia generacji
        MODEL.set_generation_params(
            use_sampling=True,
            top_k=topk,
            top_p=topp,
            temperature=temperature,
            cfg_coef=cfg_coef,
            duration=segment_duration,
            two_step_cfg=False,
            cfg_coef_beta=5 if ("style" in model) and melody else None,
            extend_stride=2 if not ("style" in model) else None,
            rep_penalty=0.5 if not ("style" in model) else None,
        )

        if ("style" in model) and melody:
            MODEL.set_style_conditioner_params(
                eval_q=3,
                excerpt_length=excerpt_duration,
            )

        MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True))

        try:
            if melody and ("melody" in model or "style" in model):
                if duration > MODEL.duration:
                    output_segments, duration = generate_music_segments(
                        text, melody, seed, MODEL, duration, overlap,
                        MODEL.duration, prompt_index, harmony_only,
                        excerpt_duration, progress=progress
                    )
                else:
                    sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
                    if melody.dim() == 2:
                        melody = melody[None]
                    melody = melody[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
                    output = MODEL.generate_with_chroma(
                        descriptions=[text],
                        melody_wavs=melody,
                        melody_sample_rate=sr,
                        progress=False,
                        progress_callback=progress
                    )
                break
            else:
                if not output_segments:
                    next_segment = MODEL.generate(
                        descriptions=[text],
                        progress=False,
                        progress_callback=progress
                    )
                    duration -= segment_duration
                else:
                    last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:]
                    next_segment = MODEL.generate_continuation(
                        last_chunk, MODEL.sample_rate,
                        descriptions=[text],
                        progress=False,
                        progress_callback=progress
                    )
                    duration -= segment_duration - overlap
                if next_segment is not None:
                    output_segments.append(next_segment)
        except Exception as e:
            print(f"Error generating audio: {e}")
            gr.Error(f"Error generating audio: {e}")
            return None, None, seed

        if INTERRUPTING:
            INTERRUPTED = True
            INTERRUPTING = False
            print("Function execution interrupted!")
            raise gr.Error("Interrupted.")

    # Łączenie segmentów
    if output_segments:
        try:
            output = output_segments[0]
            for i in range(1, len(output_segments)):
                if overlap > 0:
                    overlap_samples = overlap * MODEL.sample_rate
                    overlapping_output_fadeout = output[:, :, -overlap_samples:]
                    overlapping_output_fadeout = apply_tafade(
                        overlapping_output_fadeout,
                        sample_rate=MODEL.sample_rate,
                        duration=overlap,
                        out=True,
                        start=True,
                        shape="linear"
                    )
                    overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
                    overlapping_output_fadein = apply_tafade(
                        overlapping_output_fadein,
                        sample_rate=MODEL.sample_rate,
                        duration=overlap,
                        out=False,
                        start=False,
                        shape="linear"
                    )
                    overlapping_output = torch.cat(
                        [overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],
                        dim=2
                    )
                    output = torch.cat(
                        [output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]],
                        dim=dimension
                    )
                else:
                    output = torch.cat([output, output_segments[i]], dim=dimension)
            output = output.detach().cpu().float()[0]
        except Exception as e:
            print(f"Error combining segments: {e}. Using the first segment only.")
            output = output_segments[0].detach().cpu().float()[0]
    else:
        if output is None or output.dim() == 0:
            return None, None, seed
        else:
            output = output.detach().cpu().float()[0]

    # Generowanie wideo z waveform
    video_width, video_height = (768, 512) if video_orientation == "Landscape" else (512, 768)
    title_file_name = convert_title_to_filename(title)

    with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
        video_description = (
            f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n"
            f"Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n"
            f"cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n"
            f"Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
        )

        if include_settings or include_title:
            background = add_settings_to_image(
                title if include_title else "",
                video_description if include_settings else "",
                width=video_width,
                height=video_height,
                background_path=background,
                font=settings_font,
                font_color=settings_font_color,
                font_size=settings_font_size
            )

        audio_write(
            file.name, output, MODEL.sample_rate, strategy="loudness",
            loudness_headroom_db=18, loudness_compressor=True,
            add_suffix=False, channels=2
        )

        waveform_video_path = get_waveform(
            file.name, bg_image=background, bar_count=45,
            name=title_file_name, animate=settings_animate_waveform,
            progress=progress
        )

        # Zapisywanie metadanych do pliku MP4
        file_name_without_extension = os.path.splitext(file.name)[0]
        video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
        new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)

        mp4 = MP4(waveform_video_path)
        mp4["©nam"] = title_file_name
        mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}"

        commit = commit_hash()
        metadata = {
            "Title": title,
            "Year": time.strftime("%Y"),
            "prompt": text,
            "negative_prompt": "",
            "Seed": seed,
            "steps": 1,
            "wdth": video_width,
            "hght": video_height,
            "Dimension": dimension,
            "Top-k": topk,
            "Top-p": topp,
            "Randomness": temperature,
            "cfg": cfg_coef,
            "overlap": overlap,
            "Melody Condition": melody_name,
            "Sample Segment": prompt_index,
            "Duration": initial_duration,
            "Audio": file.name,
            "font": settings_font,
            "font_color": settings_font_color,
            "font_size": settings_font_size,
            "harmony_only": harmony_only,
            "background": background,
            "include_title": include_title,
            "include_settings": include_settings,
            "profile": (
                profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
                else "default_user" if profile is None else profile
            ),
            "commit": commit,
            "tag": git_tag(),
            "version": gr.__version__,
            "model_version": MODEL.version if MODEL else "Unknown",
            "model_name": MODEL.name if MODEL else "Unknown",
            "model_description": (
                f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz"
                if MODEL else "Unknown"
            ),
            "melody_name": melody_name if melody_name else "",
            "melody_extension": melody_extension if melody_extension else "",
            "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
            "version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
            "python": sys.version,
            "torch": getattr(torch, '__long_version__', torch.__version__),
            "xformers": get_xformers_version(),
            "gradio": gr.__version__,
            "huggingface_space": os.environ.get('SPACE_ID', ''),
            "CUDA": (
                f"CUDA is available. device: {torch.cuda.get_device_name(0)} version: {torch.version.cuda}"
                if torch.cuda.is_available() else "CUDA is not available."
            ),
        }

        for key, value in metadata.items():
            mp4[key] = str(value)

        mp4.save()

        try:
            os.replace(waveform_video_path, new_video_path)
            waveform_video_path = new_video_path
        except Exception as e:
            print(f"Error renaming file: {e}")

        if waveform_video_path:
            history_results = modules.user_history.save_file(
                profile=(
                    profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
                    else "default_user" if profile is None else profile
                ),
                image=background,
                audio=file.name,
                video=waveform_video_path,
                label=title,
                metadata=metadata,
                progress=progress
            )

    # Czyszczenie GPU
    if MOVE_TO_CPU:
        MODEL.to('cpu')
    if UNLOAD_MODEL:
        MODEL = None

    # Usuwanie dużych obiektów
    del output_segments, output, melody, melody_name, melody_extension, metadata, mp4
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    torch.cuda.ipc_collect()

    if return_history_json:
        return history_results
    else:
        return waveform_video_path, file.name, seed

# Funkcja uproszczonej predykcji (dla API)
def predict_simple(
    model: str, text: str, melody_filepath: str = None, duration: int = 10,
    dimension: int = 2, topk: int = 200, topp: float = 0.01, temperature: float = 1.0,
    cfg_coef: float = 4.0, background: str = "./assets/background.png",
    title: str = "UnlimitedMusicGen", settings_font: str = "./assets/arial.ttf",
    settings_font_color: str = "#c87f05", seed: int = -1, overlap: int = 1,
    prompt_index: int = -1, include_title: bool = True, include_settings: bool = True,
    harmony_only: bool = False, profile: str = "Satoshi Nakamoto",
    segment_length: int = 30, settings_font_size: int = 28,
    settings_animate_waveform: bool = False, video_orientation: str = "Landscape",
    return_history_json: bool = False
) -> tp.List[tp.Tuple[str, str, str]]:
    profile_username_to_send = "default_user"

    if not profile:
        profile = modules.user_history.get_profile

    if profile:
        actual_profile_data = profile
        if hasattr(profile, 'value') and profile.value is not None:
            actual_profile_data = profile.value

        if hasattr(actual_profile_data, 'username') and actual_profile_data.username:
            profile_username_to_send = actual_profile_data.username
        elif isinstance(actual_profile_data, str) and actual_profile_data:
            profile_username_to_send = actual_profile_data

    UMG_result = predict(
        model, text, melody_filepath=melody_filepath, duration=duration,
        dimension=dimension, topk=topk, topp=topp, temperature=temperature,
        cfg_coef=cfg_coef, background=background, title=title,
        settings_font=settings_font, settings_font_color=settings_font_color,
        seed=seed, overlap=overlap, prompt_index=prompt_index,
        include_title=include_title, include_settings=include_settings,
        harmony_only=harmony_only, profile=profile,
        segment_length=segment_length, settings_font_size=settings_font_size,
        settings_animate_waveform=settings_animate_waveform,
        video_orientation=video_orientation, excerpt_duration=3.5,
        return_history_json=return_history_json
    )

    folder_name = f"user_uploads/{convert_title_to_filename(profile_username_to_send)}/{convert_title_to_filename(title)}"
    if return_history_json:
        upload_result = upload_files_to_repo(
            files=[UMG_result["video_path"], UMG_result["audio_path"], UMG_result["image_path"]],
            repo_id=HF_REPO_ID,
            folder_name=f"{folder_name}/{UMG_result['metadata']['Seed']}/{time.strftime('%Y%m%d%H%M%S')}",
            create_permalink=False,
            repo_type="dataset"
        )
        if upload_result:
            UMG_result["video_path"] = upload_result[0][1]
            UMG_result["audio_path"] = upload_result[1][1]
            UMG_result["image_path"] = upload_result[2][1]
        content = UMG_result["video_path"], UMG_result["audio_path"], UMG_result["metadata"]["Seed"]
        UMG_result = content
    else:
        upload_result = upload_files_to_repo(
            files=[UMG_result[0], UMG_result[1]],
            repo_id=HF_REPO_ID,
            folder_name=f"{folder_name}/{UMG_result[2]}/{time.strftime('%Y%m%d%H%M%S')}",
            create_permalink=False,
            repo_type="dataset"
        )
        if upload_result:
            UMG_result = upload_result[0][1], upload_result[1][1], UMG_result[2]

    return UMG_result

# Ustawienie ścieżek statycznych
gr.set_static_paths(paths=["fonts/", "assets/", "images/"])

# Funkcja UI
def ui(**kwargs):
    with gr.Blocks(title="UnlimitedMusicGen", css_paths="style_20250331.css", theme='Surn/beeuty') as demo:
        with gr.Tab("UnlimitedMusicGen"):
            gr.Markdown(
                """
                # UnlimitedMusicGen
                This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
                presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)

                ⚠ **Disclaimer**: This won't run on CPU only. Clone this App and run on GPU instance!

                Todo: Working on improved Interrupt.
                Theme Available at ["Surn/Beeuty"](https://huggingface.co/spaces/Surn/Beeuty)
                """
            )

            if IS_SHARED_SPACE and not torch.cuda.is_available():
                gr.Markdown("""
                    ⚠ This Space doesn't work in this shared UI ⚠
                    <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
                    <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
                    to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
                """)

            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        with gr.Column():
                            text = gr.Text(
                                label="Describe your music",
                                interactive=True,
                                value="4/4 100bpm 320kbps 32khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out",
                                key="prompt",
                                lines=4
                            )
                            autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
                        with gr.Column():
                            duration = gr.Slider(
                                minimum=1, maximum=720, value=10,
                                label="Duration (s)", interactive=True,
                                key="total_duration", step=1
                            )
                            model = gr.Radio(
                                ["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large", "style"],
                                label="AI Model", value="medium", interactive=True,
                                key="chosen_model"
                            )
                    with gr.Row():
                        submit = gr.Button("Generate", elem_id="btn-generate")
                        _ = gr.Button("Interrupt", elem_id="btn-interrupt").click(fn=interrupt, queue=False)
                    with gr.Row():
                        with gr.Column():
                            radio = gr.Radio(
                                ["file", "mic"], value="file",
                                label="Condition on a melody (optional) File or Mic"
                            )
                            melody_filepath = gr.Audio(
                                value=None, sources=["upload"],
                                type="filepath", label="Melody Condition (optional)",
                                interactive=True, elem_id="melody-input", key="melody_input"
                            )
                        with gr.Column():
                            harmony_only = gr.Radio(
                                label="Use Harmony Only",
                                choices=["No", "Yes"], value="No",
                                interactive=True,
                                info="Remove Drums?"
                            )
                            prompt_index = gr.Slider(
                                label="Melody Condition Sample Segment",
                                minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=-1,
                                interactive=True,
                                info="Which 10-30 second segment to condition with, -1 = align with conditioning melody"
                            )
                    with gr.Accordion("Video", open=False):
                        with gr.Row():
                            background = gr.Image(
                                value="./assets/background.png", sources=["upload"],
                                label="Background", width=768, height=512,
                                type="filepath", interactive=True, key="background_imagepath"
                            )
                            with gr.Column():
                                include_title = gr.Checkbox(
                                    label="Add Title", value=True, interactive=True,
                                    key="add_title"
                                )
                                include_settings = gr.Checkbox(
                                    label="Add Settings to background", value=True,
                                    interactive=True, key="add_settings"
                                )
                                video_orientation = gr.Radio(
                                    label="Video Orientation",
                                    choices=["Landscape", "Portrait"], value="Landscape",
                                    interactive=True, key="video_orientation"
                                )
                        with gr.Row():
                            title = gr.Textbox(
                                label="Title", value="UnlimitedMusicGen",
                                interactive=True, key="song_title"
                            )
                            settings_font = gr.Text(
                                label="Settings Font", value="./assets/arial.ttf",
                                interactive=True
                            )
                            settings_font_color = gr.ColorPicker(
                                label="Settings Font Color", value="#c87f05",
                                interactive=True, key="settings_font_color"
                            )
                            settings_font_size = gr.Slider(
                                minimum=8, maximum=64, value=28, step=1,
                                label="Settings Font Size", interactive=True,
                                key="settings_font_size"
                            )
                            settings_animate_waveform = gr.Checkbox(
                                label="Animate Waveform", value=False,
                                interactive=True, key="animate_waveform"
                            )
                    with gr.Accordion("Expert", open=False):
                        with gr.Row():
                            segment_length = gr.Slider(
                                minimum=10, maximum=30, value=30, step=1,
                                label="Music Generation Segment Length (s)",
                                interactive=True, key="segment_length"
                            )
                            overlap = gr.Slider(
                                minimum=0, maximum=14, value=1, step=1,
                                label="Segment Overlap", interactive=True
                            )
                            dimension = gr.Slider(
                                minimum=-2, maximum=2, value=2, step=1,
                                label="Dimension",
                                info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)",
                                interactive=True
                            )
                        with gr.Row():
                            topk = gr.Number(
                                label="Top-k", value=280, precision=0,
                                interactive=True,
                                info="more structured"
                            )
                            topp = gr.Number(
                                label="Top-p", value=1150, precision=0,
                                interactive=True,
                                info="more variation, overwrites Top-k if not zero"
                            )
                            temperature = gr.Number(
                                label="Randomness Temperature", value=0.7,
                                precision=None, step=0.1, interactive=True,
                                info="less than one to follow Melody Condition song closely"
                            )
                            cfg_coef = gr.Number(
                                label="Classifier Free Guidance", value=3.75,
                                precision=None, step=0.1, interactive=True,
                                info="3.0-4.0, stereo and small need more"
                            )
                        with gr.Row():
                            seed = gr.Number(
                                label="Seed", value=-1, precision=0,
                                interactive=True, key="seed"
                            )
                            gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(
                                fn=lambda: -1, outputs=[seed], queue=False
                            )
                            reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")

            with gr.Column() as c:
                output = gr.Video(
                    label="Generated Music", interactive=False,
                    show_download_button=True, show_share_button=True,
                    autoplay=False
                )
                wave_file = gr.File(
                    label=".wav file", elem_id="output_wavefile",
                    interactive=True
                )
                seed_used = gr.Number(
                    label='Seed used', value=-1, interactive=False
                )

            # Powiązania UI
            radio.change(
                toggle_audio_src, radio, [melody_filepath],
                queue=False, show_progress=False, api_name="audio_src_change"
            )
            video_orientation.change(
                load_background_filepath, inputs=[video_orientation],
                outputs=[background], queue=False, show_progress=False,
                api_name="video_orientation_change"
            )
            melody_filepath.change(
                load_melody_filepath,
                inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
                outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
                api_name="melody_filepath_change", queue=False
            )
            reuse_seed.click(
                fn=lambda x: x, inputs=[seed_used], outputs=[seed],
                queue=False, api_name="reuse_seed_click"
            )
            autoplay_cb.change(
                fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb],
                outputs=[output], queue=False, api_name="autoplay_cb_change"
            )
            segment_length.release(
                fn=load_melody_filepath, queue=False, api_name="segment_length_change",
                trigger_mode="once",
                inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
                outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
                show_progress="minimal"
            )

            # Przykłady
            gr.Examples(
                examples=[
                    [
                        "4/4 120bpm 320kbps 32khz, An 80s driving pop song with heavy drums and synth pads in the background",
                        "./assets/bach.mp3",
                        "melody",
                        "80s Pop Synth",
                        950,
                        0.6,
                        3.5
                    ],
                    [
                        "4/4 120bpm 320kbps 32khz, A cheerful country song with acoustic guitars",
                        "./assets/bolero_ravel.mp3",
                        "stereo-melody-large",
                        "Country Guitar",
                        750,
                        0.7,
                        4.0
                    ],
                    [
                        "4/4 120bpm 320kbps 32khz, 90s rock song with electric guitar and heavy drums",
                        None,
                        "stereo-medium",
                        "90s Rock Guitar",
                        1150,
                        0.7,
                        3.75
                    ],
                    [
                        "4/4 120bpm 320kbps 32khz, a light and cheery EDM track, with syncopated drums, aery pads, and strong emotions",
                        "./assets/bach.mp3",
                        "melody-large",
                        "EDM my Bach",
                        500,
                        0.7,
                        3.75
                    ],
                    [
                        "4/4 320kbps 32khz, lofi slow bpm electro chill with organic samples",
                        None,
                        "medium",
                        "LoFi Chill",
                        0,
                        0.7,
                        4.0
                    ],
                ],
                inputs=[text, melody_filepath, model, title, topp, temperature, cfg_coef],
                outputs=[output]
            )

        with gr.Tab("User History") as history_tab:
            modules.user_history.setup(display_type="video_path")
            modules.user_history.render()

        user_profile = gr.State(None)

        with gr.Row("Versions") as versions_row:
            gr.HTML(value=versions_html(), visible=True, elem_id="versions")

        # Główne wywołanie predict
        submit.click(
            modules.user_history.get_profile,
            inputs=[],
            outputs=[user_profile],
            queue=True,
            api_name="submit"
        ).then(
            predict,
            inputs=[
                model, text, melody_filepath, duration, dimension, topk, topp,
                temperature, cfg_coef, background, title, settings_font,
                settings_font_color, seed, overlap, prompt_index, include_title,
                include_settings, harmony_only, user_profile, segment_length,
                settings_font_size, settings_animate_waveform, video_orientation
            ],
            outputs=[output, wave_file, seed_used],
            scroll_to_output=True, show_api=False
        )

        # Uruchomienie interfejsu
        launch_kwargs = {
            'server_name': kwargs.get('listen', '0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1'),
            'server_port': kwargs.get('server_port', 0),
            'share': kwargs.get('share', False),
            'allowed_paths': ["assets", "./assets", "images", "./images", 'e:/TMP'],
            'favicon_path': "./assets/favicon.ico",
            'mcp_server': True,
            'ssr_mode': False
        }

        if kwargs.get('server_port', 0) > 0:
            launch_kwargs['server_port'] = kwargs.get('server_port')

        if kwargs.get('share', False):
            launch_kwargs['share'] = True

        gr.api(ping, api_name="ping")
        gr.api(predict_simple)

        demo.queue(max_size=10, api_open=True).launch(**launch_kwargs)

# Funkcja główna
if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--listen',
        type=str,
        default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
        help='IP to listen on for connections to Gradio',
    )
    parser.add_argument(
        '--username', type=str, default='', help='Username for authentication'
    )
    parser.add_argument(
        '--password', type=str, default='', help='Password for authentication'
    )
    parser.add_argument(
        '--server_port',
        type=int,
        default=0,
        help='Port to run the server listener on',
    )
    parser.add_argument(
        '--inbrowser', action='store_true', help='Open in browser'
    )
    parser.add_argument(
        '--share', action='store_true', help='Share the gradio UI'
    )
    parser.add_argument(
        '--unload_model', action='store_true',
        help='Unload the model after every generation to save GPU memory'
    )
    parser.add_argument(
        '--unload_to_cpu', action='store_true',
        help='Move the model to main RAM after every generation to save GPU memory but reload faster than after full unload'
    )
    parser.add_argument(
        '--cache', action='store_true',
        help='Cache models in RAM to quickly switch between them'
    )

    args = parser.parse_args()

    # Ustawienie flag
    UNLOAD_MODEL = args.unload_model
    MOVE_TO_CPU = args.unload_to_cpu

    if args.cache:
        MODELS = {}

    # Uruchomienie interfejsu
    ui(
        unload_to_cpu=MOVE_TO_CPU,
        share=args.share,
        **vars(args)
    )