UnlimitedMusicGen

Runtime error

App Files Files Community

UnlimitedMusicGen / app.py

webnowa

Update app.py

69734b9 verified about 2 months ago

raw

history blame contribute delete

43.1 kB

	"""
	Copyright (c) Meta Platforms, Inc. and affiliates.
	All rights reserved.
	This source code is licensed under the license found in the
	LICENSE file in the root directory of this source tree.
	"""

	import os
	import sys
	import time
	import gc
	import random
	import warnings
	import typing as tp
	from pathlib import Path
	from tempfile import NamedTemporaryFile
	import argparse
	import subprocess

	import torch
	import gradio as gr
	import librosa
	from mutagen.mp4 import MP4

	# Importy z lokalnych modułów
	from audiocraft.models import MusicGen
	from audiocraft.data.audio import audio_write
	from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect
	from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
	from audiocraft.utils import utils
	import numpy as np

	# Importy z modułów projektu
	import modules.user_history
	from modules.version_info import versions_html, commit_hash, get_xformers_version
	from modules.gradio import *
	from modules.file_utils import (
	get_file_parts,
	get_filename_from_filepath,
	convert_title_to_filename,
	get_unique_file_path,
	delete_file,
	download_and_save_image,
	download_and_save_file
	)
	from modules.constants import (
	IS_SHARED_SPACE,
	HF_REPO_ID,
	TMPDIR,
	HF_API_TOKEN
	)
	from modules.storage import upload_files_to_repo

	# Inicjalizacja zmiennych globalnych
	MODEL = None
	MODELS = None
	INTERRUPTED = False
	UNLOAD_MODEL = False
	MOVE_TO_CPU = False
	MAX_PROMPT_INDEX = 0
	git = os.environ.get('GIT', "git")

	# Konfiguracja środowiska GPU (opcjonalne, dostosuj do swoich potrzeb)
	os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
	os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
	os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
	os.environ['USE_FLASH_ATTENTION'] = '1'
	os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'

	# Funkcja przerwania generacji
	def interrupt_callback():
	return INTERRUPTED

	def interrupt():
	global INTERRUPTING
	INTERRUPTING = True

	# Klasa do czyszczenia plików tymczasowych
	class FileCleaner:
	def __init__(self, file_lifetime: float = 3600):
	self.file_lifetime = file_lifetime
	self.files = []

	def add(self, path: tp.Union[str, Path]):
	self._cleanup()
	self.files.append((time.time(), Path(path)))

	def _cleanup(self):
	now = time.time()
	for time_added, path in list(self.files):
	if now - time_added > self.file_lifetime:
	if path.exists():
	path.unlink()
	self.files.pop(0)
	else:
	break

	# Funkcja ping (używana do sprawdzania dostępności API)
	def ping():
	return True

	# Funkcja przełączania źródła audio (mikrofon/plik)
	def toggle_audio_src(choice):
	if choice == "mic":
	return gr.update(source="microphone", value=None, label="Microphone")
	else:
	return gr.update(source="upload", value=None, label="File")

	# Funkcja generowania wideo z waveform
	def get_waveform(args, *kwargs):
	be = time.time()
	with warnings.catch_warnings():
	warnings.simplefilter('ignore')
	out = gr.make_waveform(args, *kwargs)
	print("Make a video took", time.time() - be)
	return out

	# Funkcja ładowania modelu
	def load_model(version, progress=gr.Progress(track_tqdm=True)):
	global MODEL, MODELS, UNLOAD_MODEL
	print(f"Loading model {version}")

	with tqdm(total=100, desc=f"Loading model '{version}'", unit="step") as pbar:
	if MODELS is None:
	pbar.update(50)
	result = MusicGen.get_pretrained(version)
	pbar.update(50)
	return result
	else:
	t1 = time.monotonic()
	if MODEL is not None:
	MODEL.to('cpu')
	print(f"Previous model moved to CPU in {time.monotonic() - t1:.2f}s")
	pbar.update(30)
	t1 = time.monotonic()
	if MODELS.get(version) is None:
	print(f"Loading model {version} from disk")
	result = MusicGen.get_pretrained(version)
	MODELS[version] = result
	print(f"Model loaded in {time.monotonic() - t1:.2f}s")
	pbar.update(70)
	return result
	result = MODELS[version].to('cuda')
	print(f"Cached model loaded in {time.monotonic() - t1:.2f}s")
	pbar.update(100)
	return result

	# Funkcja pobierania melodii z pliku
	def get_melody(melody_filepath):
	audio_data = list(librosa.load(melody_filepath, sr=None))
	audio_data[0], audio_data[1] = audio_data[1], audio_data[0] # Swap channels
	melody = tuple(audio_data)
	return melody

	# Funkcja pobierania tagu Git
	def git_tag():
	try:
	return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
	except Exception:
	try:
	changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
	with changelog_md.open(encoding="utf-8") as file:
	return next((line.strip() for line in file if line.strip()), "<none>")
	except Exception:
	return "<none>"

	# Funkcja ładowania obrazka tła
	def load_background_filepath(video_orientation):
	if video_orientation == "Landscape":
	return "./assets/background.png"
	else:
	return "./assets/background_portrait.png"

	# Funkcja aktualizacji UI po wyborze melodii
	def load_melody_filepath(melody_filepath, title, assigned_model, topp, temperature, cfg_coef, segment_length=30):
	symbols = ['_', '.', '-']
	MAX_OVERLAP = int(segment_length // 2) - 1

	if (melody_filepath is None) or (melody_filepath == ""):
	return (
	title,
	gr.update(maximum=0, value=-1),
	gr.update(value="medium", interactive=True),
	gr.update(value=topp),
	gr.update(value=temperature),
	gr.update(value=cfg_coef),
	gr.update(maximum=MAX_OVERLAP)
	)

	if (title is None) or ("MusicGen" in title) or (title == ""):
	melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
	for symbol in symbols:
	melody_name = melody_name.replace(symbol, ' ').title()
	topp = 800
	temperature = 0.5
	cfg_coef = 3.25
	else:
	melody_name = title

	if "melody" not in assigned_model:
	assigned_model = "melody-large"

	print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}, Model: {assigned_model}\n")

	melody = get_melody(melody_filepath)
	sr, melody_data = melody[0], melody[1]
	segment_samples = sr * segment_length
	total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
	print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
	global MAX_PROMPT_INDEX
	MAX_PROMPT_INDEX = total_melodys

	return (
	gr.update(value=melody_name),
	gr.update(maximum=MAX_PROMPT_INDEX, value=-1),
	gr.update(value=assigned_model, interactive=True),
	gr.update(value=topp),
	gr.update(value=temperature),
	gr.update(value=cfg_coef),
	gr.update(maximum=MAX_OVERLAP)
	)

	# Główna funkcja generacji muzyki i wideo
	def predict(
	model, text, melody_filepath=None, duration=10, dimension=2, topk=200, topp=0,
	temperature=1.0, cfg_coef=4.0, background=None, title="UnlimitedMusicGen",
	settings_font="./assets/arial.ttf", settings_font_color="#c87f05", seed=-1,
	overlap=1, prompt_index=0, include_title=True, include_settings=True,
	harmony_only=False, profile=None, segment_length=30, settings_font_size=28,
	settings_animate_waveform=False, video_orientation="Landscape",
	excerpt_duration=3.5, return_history_json=False, progress=gr.Progress(track_tqdm=True)
	):
	global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
	output_segments = None
	melody_name = "Not Used"
	melody_extension = "Not Used"
	melody = None

	if melody_filepath in ["None", ""]:
	melody_filepath = None

	# Pobieranie melodii z URL (jeśli podano)
	if melody_filepath and melody_filepath.startswith(("http://", "https://")):
	username = profile if isinstance(profile, str) else (
	profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
	else "default_user" if profile is None else profile
	)
	melody_filepath = download_and_save_file(
	melody_filepath,
	Path(TMPDIR) / str(username),
	HF_API_TOKEN
	)

	# Pobieranie tła z URL (jeśli podano)
	if background is None or background in ["None", ""]:
	background = load_background_filepath(video_orientation)

	if background.startswith(("http://", "https://")):
	username = profile if isinstance(profile, str) else (
	profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
	else "default_user" if profile is None else profile
	)
	background = download_and_save_image(
	background,
	Path(TMPDIR) / str(username),
	HF_API_TOKEN
	)

	# Pobieranie melodii z pliku
	if melody_filepath:
	melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
	melody = get_melody(melody_filepath)

	INTERRUPTED = False
	INTERRUPTING = False

	# Walidacja parametrów
	if temperature < 0:
	temperature = 0.1
	raise gr.Error("Temperature must be >= 0.")
	if topk < 0:
	topk = 1
	raise gr.Error("Topk must be non-negative.")
	if topp < 0:
	topp = 1
	raise gr.Error("Topp must be non-negative.")

	# Czyszczenie GPU przy zmianie modelu
	if MODEL is not None and model not in MODEL.name:
	print(f"Switching model from {MODEL.name} to {model}. Cleaning up resources.")
	del MODEL
	torch.cuda.empty_cache()
	gc.collect()
	MODEL = None

	try:
	if MODEL is None or model not in MODEL.name:
	MODEL = load_model(model)
	else:
	if MOVE_TO_CPU:
	MODEL.to('cuda')
	except Exception as e:
	raise gr.Error(f"Error loading model '{model}': {str(e)}. Try a different model.")

	# Ograniczenia parametrów
	duration = min(duration, 720)
	overlap = min(overlap, 15)

	output = None
	segment_duration = duration
	initial_duration = duration
	output_segments = []

	# Pętla generacji (obsługa długich utworów)
	while duration > 0:
	if not output_segments:
	segment_duration = min(segment_duration, MODEL.lm.cfg.dataset.segment_duration)
	else:
	segment_duration = min(
	duration + overlap,
	MODEL.lm.cfg.dataset.segment_duration
	)
	segment_duration = min(segment_duration, segment_length + overlap)

	if seed < 0:
	seed = random.randint(0, 0xffff_ffff_ffff)
	torch.manual_seed(seed)

	print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')

	# Ustawienia generacji
	MODEL.set_generation_params(
	use_sampling=True,
	top_k=topk,
	top_p=topp,
	temperature=temperature,
	cfg_coef=cfg_coef,
	duration=segment_duration,
	two_step_cfg=False,
	cfg_coef_beta=5 if ("style" in model) and melody else None,
	extend_stride=2 if not ("style" in model) else None,
	rep_penalty=0.5 if not ("style" in model) else None,
	)

	if ("style" in model) and melody:
	MODEL.set_style_conditioner_params(
	eval_q=3,
	excerpt_length=excerpt_duration,
	)

	MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True))

	try:
	if melody and ("melody" in model or "style" in model):
	if duration > MODEL.duration:
	output_segments, duration = generate_music_segments(
	text, melody, seed, MODEL, duration, overlap,
	MODEL.duration, prompt_index, harmony_only,
	excerpt_duration, progress=progress
	)
	else:
	sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
	if melody.dim() == 2:
	melody = melody[None]
	melody = melody[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
	output = MODEL.generate_with_chroma(
	descriptions=[text],
	melody_wavs=melody,
	melody_sample_rate=sr,
	progress=False,
	progress_callback=progress
	)
	break
	else:
	if not output_segments:
	next_segment = MODEL.generate(
	descriptions=[text],
	progress=False,
	progress_callback=progress
	)
	duration -= segment_duration
	else:
	last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:]
	next_segment = MODEL.generate_continuation(
	last_chunk, MODEL.sample_rate,
	descriptions=[text],
	progress=False,
	progress_callback=progress
	)
	duration -= segment_duration - overlap
	if next_segment is not None:
	output_segments.append(next_segment)
	except Exception as e:
	print(f"Error generating audio: {e}")
	gr.Error(f"Error generating audio: {e}")
	return None, None, seed

	if INTERRUPTING:
	INTERRUPTED = True
	INTERRUPTING = False
	print("Function execution interrupted!")
	raise gr.Error("Interrupted.")

	# Łączenie segmentów
	if output_segments:
	try:
	output = output_segments[0]
	for i in range(1, len(output_segments)):
	if overlap > 0:
	overlap_samples = overlap * MODEL.sample_rate
	overlapping_output_fadeout = output[:, :, -overlap_samples:]
	overlapping_output_fadeout = apply_tafade(
	overlapping_output_fadeout,
	sample_rate=MODEL.sample_rate,
	duration=overlap,
	out=True,
	start=True,
	shape="linear"
	)
	overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
	overlapping_output_fadein = apply_tafade(
	overlapping_output_fadein,
	sample_rate=MODEL.sample_rate,
	duration=overlap,
	out=False,
	start=False,
	shape="linear"
	)
	overlapping_output = torch.cat(
	[overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],
	dim=2
	)
	output = torch.cat(
	[output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]],
	dim=dimension
	)
	else:
	output = torch.cat([output, output_segments[i]], dim=dimension)
	output = output.detach().cpu().float()[0]
	except Exception as e:
	print(f"Error combining segments: {e}. Using the first segment only.")
	output = output_segments[0].detach().cpu().float()[0]
	else:
	if output is None or output.dim() == 0:
	return None, None, seed
	else:
	output = output.detach().cpu().float()[0]

	# Generowanie wideo z waveform
	video_width, video_height = (768, 512) if video_orientation == "Landscape" else (512, 768)
	title_file_name = convert_title_to_filename(title)

	with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
	video_description = (
	f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n"
	f"Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n"
	f"cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n"
	f"Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
	)

	if include_settings or include_title:
	background = add_settings_to_image(
	title if include_title else "",
	video_description if include_settings else "",
	width=video_width,
	height=video_height,
	background_path=background,
	font=settings_font,
	font_color=settings_font_color,
	font_size=settings_font_size
	)

	audio_write(
	file.name, output, MODEL.sample_rate, strategy="loudness",
	loudness_headroom_db=18, loudness_compressor=True,
	add_suffix=False, channels=2
	)

	waveform_video_path = get_waveform(
	file.name, bg_image=background, bar_count=45,
	name=title_file_name, animate=settings_animate_waveform,
	progress=progress
	)

	# Zapisywanie metadanych do pliku MP4
	file_name_without_extension = os.path.splitext(file.name)[0]
	video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
	new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)

	mp4 = MP4(waveform_video_path)
	mp4["©nam"] = title_file_name
	mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}"

	commit = commit_hash()
	metadata = {
	"Title": title,
	"Year": time.strftime("%Y"),
	"prompt": text,
	"negative_prompt": "",
	"Seed": seed,
	"steps": 1,
	"wdth": video_width,
	"hght": video_height,
	"Dimension": dimension,
	"Top-k": topk,
	"Top-p": topp,
	"Randomness": temperature,
	"cfg": cfg_coef,
	"overlap": overlap,
	"Melody Condition": melody_name,
	"Sample Segment": prompt_index,
	"Duration": initial_duration,
	"Audio": file.name,
	"font": settings_font,
	"font_color": settings_font_color,
	"font_size": settings_font_size,
	"harmony_only": harmony_only,
	"background": background,
	"include_title": include_title,
	"include_settings": include_settings,
	"profile": (
	profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
	else "default_user" if profile is None else profile
	),
	"commit": commit,
	"tag": git_tag(),
	"version": gr.__version__,
	"model_version": MODEL.version if MODEL else "Unknown",
	"model_name": MODEL.name if MODEL else "Unknown",
	"model_description": (
	f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz"
	if MODEL else "Unknown"
	),
	"melody_name": melody_name if melody_name else "",
	"melody_extension": melody_extension if melody_extension else "",
	"hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
	"version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
	"python": sys.version,
	"torch": getattr(torch, '__long_version__', torch.__version__),
	"xformers": get_xformers_version(),
	"gradio": gr.__version__,
	"huggingface_space": os.environ.get('SPACE_ID', ''),
	"CUDA": (
	f"CUDA is available. device: {torch.cuda.get_device_name(0)} version: {torch.version.cuda}"
	if torch.cuda.is_available() else "CUDA is not available."
	),
	}

	for key, value in metadata.items():
	mp4[key] = str(value)

	mp4.save()

	try:
	os.replace(waveform_video_path, new_video_path)
	waveform_video_path = new_video_path
	except Exception as e:
	print(f"Error renaming file: {e}")

	if waveform_video_path:
	history_results = modules.user_history.save_file(
	profile=(
	profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
	else "default_user" if profile is None else profile
	),
	image=background,
	audio=file.name,
	video=waveform_video_path,
	label=title,
	metadata=metadata,
	progress=progress
	)

	# Czyszczenie GPU
	if MOVE_TO_CPU:
	MODEL.to('cpu')
	if UNLOAD_MODEL:
	MODEL = None

	# Usuwanie dużych obiektów
	del output_segments, output, melody, melody_name, melody_extension, metadata, mp4
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.synchronize()
	torch.cuda.ipc_collect()

	if return_history_json:
	return history_results
	else:
	return waveform_video_path, file.name, seed

	# Funkcja uproszczonej predykcji (dla API)
	def predict_simple(
	model: str, text: str, melody_filepath: str = None, duration: int = 10,
	dimension: int = 2, topk: int = 200, topp: float = 0.01, temperature: float = 1.0,
	cfg_coef: float = 4.0, background: str = "./assets/background.png",
	title: str = "UnlimitedMusicGen", settings_font: str = "./assets/arial.ttf",
	settings_font_color: str = "#c87f05", seed: int = -1, overlap: int = 1,
	prompt_index: int = -1, include_title: bool = True, include_settings: bool = True,
	harmony_only: bool = False, profile: str = "Satoshi Nakamoto",
	segment_length: int = 30, settings_font_size: int = 28,
	settings_animate_waveform: bool = False, video_orientation: str = "Landscape",
	return_history_json: bool = False
	) -> tp.List[tp.Tuple[str, str, str]]:
	profile_username_to_send = "default_user"

	if not profile:
	profile = modules.user_history.get_profile

	if profile:
	actual_profile_data = profile
	if hasattr(profile, 'value') and profile.value is not None:
	actual_profile_data = profile.value

	if hasattr(actual_profile_data, 'username') and actual_profile_data.username:
	profile_username_to_send = actual_profile_data.username
	elif isinstance(actual_profile_data, str) and actual_profile_data:
	profile_username_to_send = actual_profile_data

	UMG_result = predict(
	model, text, melody_filepath=melody_filepath, duration=duration,
	dimension=dimension, topk=topk, topp=topp, temperature=temperature,
	cfg_coef=cfg_coef, background=background, title=title,
	settings_font=settings_font, settings_font_color=settings_font_color,
	seed=seed, overlap=overlap, prompt_index=prompt_index,
	include_title=include_title, include_settings=include_settings,
	harmony_only=harmony_only, profile=profile,
	segment_length=segment_length, settings_font_size=settings_font_size,
	settings_animate_waveform=settings_animate_waveform,
	video_orientation=video_orientation, excerpt_duration=3.5,
	return_history_json=return_history_json
	)

	folder_name = f"user_uploads/{convert_title_to_filename(profile_username_to_send)}/{convert_title_to_filename(title)}"
	if return_history_json:
	upload_result = upload_files_to_repo(
	files=[UMG_result["video_path"], UMG_result["audio_path"], UMG_result["image_path"]],
	repo_id=HF_REPO_ID,
	folder_name=f"{folder_name}/{UMG_result['metadata']['Seed']}/{time.strftime('%Y%m%d%H%M%S')}",
	create_permalink=False,
	repo_type="dataset"
	)
	if upload_result:
	UMG_result["video_path"] = upload_result[0][1]
	UMG_result["audio_path"] = upload_result[1][1]
	UMG_result["image_path"] = upload_result[2][1]
	content = UMG_result["video_path"], UMG_result["audio_path"], UMG_result["metadata"]["Seed"]
	UMG_result = content
	else:
	upload_result = upload_files_to_repo(
	files=[UMG_result[0], UMG_result[1]],
	repo_id=HF_REPO_ID,
	folder_name=f"{folder_name}/{UMG_result[2]}/{time.strftime('%Y%m%d%H%M%S')}",
	create_permalink=False,
	repo_type="dataset"
	)
	if upload_result:
	UMG_result = upload_result[0][1], upload_result[1][1], UMG_result[2]

	return UMG_result

	# Ustawienie ścieżek statycznych
	gr.set_static_paths(paths=["fonts/", "assets/", "images/"])

	# Funkcja UI
	def ui(**kwargs):
	with gr.Blocks(title="UnlimitedMusicGen", css_paths="style_20250331.css", theme='Surn/beeuty') as demo:
	with gr.Tab("UnlimitedMusicGen"):
	gr.Markdown(
	"""
	# UnlimitedMusicGen
	This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
	presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)

	⚠ Disclaimer: This won't run on CPU only. Clone this App and run on GPU instance!

	Todo: Working on improved Interrupt.
	Theme Available at ["Surn/Beeuty"](https://huggingface.co/spaces/Surn/Beeuty)
	"""
	)

	if IS_SHARED_SPACE and not torch.cuda.is_available():
	gr.Markdown("""
	⚠ This Space doesn't work in this shared UI ⚠
	<a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
	<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
	""")

	with gr.Row():
	with gr.Column():
	with gr.Row():
	with gr.Column():
	text = gr.Text(
	label="Describe your music",
	interactive=True,
	value="4/4 100bpm 320kbps 32khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out",
	key="prompt",
	lines=4
	)
	autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
	with gr.Column():
	duration = gr.Slider(
	minimum=1, maximum=720, value=10,
	label="Duration (s)", interactive=True,
	key="total_duration", step=1
	)
	model = gr.Radio(
	["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large", "style"],
	label="AI Model", value="medium", interactive=True,
	key="chosen_model"
	)
	with gr.Row():
	submit = gr.Button("Generate", elem_id="btn-generate")
	_ = gr.Button("Interrupt", elem_id="btn-interrupt").click(fn=interrupt, queue=False)
	with gr.Row():
	with gr.Column():
	radio = gr.Radio(
	["file", "mic"], value="file",
	label="Condition on a melody (optional) File or Mic"
	)
	melody_filepath = gr.Audio(
	value=None, sources=["upload"],
	type="filepath", label="Melody Condition (optional)",
	interactive=True, elem_id="melody-input", key="melody_input"
	)
	with gr.Column():
	harmony_only = gr.Radio(
	label="Use Harmony Only",
	choices=["No", "Yes"], value="No",
	interactive=True,
	info="Remove Drums?"
	)
	prompt_index = gr.Slider(
	label="Melody Condition Sample Segment",
	minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=-1,
	interactive=True,
	info="Which 10-30 second segment to condition with, -1 = align with conditioning melody"
	)
	with gr.Accordion("Video", open=False):
	with gr.Row():
	background = gr.Image(
	value="./assets/background.png", sources=["upload"],
	label="Background", width=768, height=512,
	type="filepath", interactive=True, key="background_imagepath"
	)
	with gr.Column():
	include_title = gr.Checkbox(
	label="Add Title", value=True, interactive=True,
	key="add_title"
	)
	include_settings = gr.Checkbox(
	label="Add Settings to background", value=True,
	interactive=True, key="add_settings"
	)
	video_orientation = gr.Radio(
	label="Video Orientation",
	choices=["Landscape", "Portrait"], value="Landscape",
	interactive=True, key="video_orientation"
	)
	with gr.Row():
	title = gr.Textbox(
	label="Title", value="UnlimitedMusicGen",
	interactive=True, key="song_title"
	)
	settings_font = gr.Text(
	label="Settings Font", value="./assets/arial.ttf",
	interactive=True
	)
	settings_font_color = gr.ColorPicker(
	label="Settings Font Color", value="#c87f05",
	interactive=True, key="settings_font_color"
	)
	settings_font_size = gr.Slider(
	minimum=8, maximum=64, value=28, step=1,
	label="Settings Font Size", interactive=True,
	key="settings_font_size"
	)
	settings_animate_waveform = gr.Checkbox(
	label="Animate Waveform", value=False,
	interactive=True, key="animate_waveform"
	)
	with gr.Accordion("Expert", open=False):
	with gr.Row():
	segment_length = gr.Slider(
	minimum=10, maximum=30, value=30, step=1,
	label="Music Generation Segment Length (s)",
	interactive=True, key="segment_length"
	)
	overlap = gr.Slider(
	minimum=0, maximum=14, value=1, step=1,
	label="Segment Overlap", interactive=True
	)
	dimension = gr.Slider(
	minimum=-2, maximum=2, value=2, step=1,
	label="Dimension",
	info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)",
	interactive=True
	)
	with gr.Row():
	topk = gr.Number(
	label="Top-k", value=280, precision=0,
	interactive=True,
	info="more structured"
	)
	topp = gr.Number(
	label="Top-p", value=1150, precision=0,
	interactive=True,
	info="more variation, overwrites Top-k if not zero"
	)
	temperature = gr.Number(
	label="Randomness Temperature", value=0.7,
	precision=None, step=0.1, interactive=True,
	info="less than one to follow Melody Condition song closely"
	)
	cfg_coef = gr.Number(
	label="Classifier Free Guidance", value=3.75,
	precision=None, step=0.1, interactive=True,
	info="3.0-4.0, stereo and small need more"
	)
	with gr.Row():
	seed = gr.Number(
	label="Seed", value=-1, precision=0,
	interactive=True, key="seed"
	)
	gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(
	fn=lambda: -1, outputs=[seed], queue=False
	)
	reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")

	with gr.Column() as c:
	output = gr.Video(
	label="Generated Music", interactive=False,
	show_download_button=True, show_share_button=True,
	autoplay=False
	)
	wave_file = gr.File(
	label=".wav file", elem_id="output_wavefile",
	interactive=True
	)
	seed_used = gr.Number(
	label='Seed used', value=-1, interactive=False
	)

	# Powiązania UI
	radio.change(
	toggle_audio_src, radio, [melody_filepath],
	queue=False, show_progress=False, api_name="audio_src_change"
	)
	video_orientation.change(
	load_background_filepath, inputs=[video_orientation],
	outputs=[background], queue=False, show_progress=False,
	api_name="video_orientation_change"
	)
	melody_filepath.change(
	load_melody_filepath,
	inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
	outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
	api_name="melody_filepath_change", queue=False
	)
	reuse_seed.click(
	fn=lambda x: x, inputs=[seed_used], outputs=[seed],
	queue=False, api_name="reuse_seed_click"
	)
	autoplay_cb.change(
	fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb],
	outputs=[output], queue=False, api_name="autoplay_cb_change"
	)
	segment_length.release(
	fn=load_melody_filepath, queue=False, api_name="segment_length_change",
	trigger_mode="once",
	inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
	outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
	show_progress="minimal"
	)

	# Przykłady
	gr.Examples(
	examples=[
	[
	"4/4 120bpm 320kbps 32khz, An 80s driving pop song with heavy drums and synth pads in the background",
	"./assets/bach.mp3",
	"melody",
	"80s Pop Synth",
	950,
	0.6,
	3.5
	],
	[
	"4/4 120bpm 320kbps 32khz, A cheerful country song with acoustic guitars",
	"./assets/bolero_ravel.mp3",
	"stereo-melody-large",
	"Country Guitar",
	750,
	0.7,
	4.0
	],
	[
	"4/4 120bpm 320kbps 32khz, 90s rock song with electric guitar and heavy drums",
	None,
	"stereo-medium",
	"90s Rock Guitar",
	1150,
	0.7,
	3.75
	],
	[
	"4/4 120bpm 320kbps 32khz, a light and cheery EDM track, with syncopated drums, aery pads, and strong emotions",
	"./assets/bach.mp3",
	"melody-large",
	"EDM my Bach",
	500,
	0.7,
	3.75
	],
	[
	"4/4 320kbps 32khz, lofi slow bpm electro chill with organic samples",
	None,
	"medium",
	"LoFi Chill",
	0,
	0.7,
	4.0
	],
	],
	inputs=[text, melody_filepath, model, title, topp, temperature, cfg_coef],
	outputs=[output]
	)

	with gr.Tab("User History") as history_tab:
	modules.user_history.setup(display_type="video_path")
	modules.user_history.render()

	user_profile = gr.State(None)

	with gr.Row("Versions") as versions_row:
	gr.HTML(value=versions_html(), visible=True, elem_id="versions")

	# Główne wywołanie predict
	submit.click(
	modules.user_history.get_profile,
	inputs=[],
	outputs=[user_profile],
	queue=True,
	api_name="submit"
	).then(
	predict,
	inputs=[
	model, text, melody_filepath, duration, dimension, topk, topp,
	temperature, cfg_coef, background, title, settings_font,
	settings_font_color, seed, overlap, prompt_index, include_title,
	include_settings, harmony_only, user_profile, segment_length,
	settings_font_size, settings_animate_waveform, video_orientation
	],
	outputs=[output, wave_file, seed_used],
	scroll_to_output=True, show_api=False
	)

	# Uruchomienie interfejsu
	launch_kwargs = {
	'server_name': kwargs.get('listen', '0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1'),
	'server_port': kwargs.get('server_port', 0),
	'share': kwargs.get('share', False),
	'allowed_paths': ["assets", "./assets", "images", "./images", 'e:/TMP'],
	'favicon_path': "./assets/favicon.ico",
	'mcp_server': True,
	'ssr_mode': False
	}

	if kwargs.get('server_port', 0) > 0:
	launch_kwargs['server_port'] = kwargs.get('server_port')

	if kwargs.get('share', False):
	launch_kwargs['share'] = True

	gr.api(ping, api_name="ping")
	gr.api(predict_simple)

	demo.queue(max_size=10, api_open=True).launch(**launch_kwargs)

	# Funkcja główna
	if __name__ == "__main__":
	parser = argparse.ArgumentParser()

	parser.add_argument(
	'--listen',
	type=str,
	default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
	help='IP to listen on for connections to Gradio',
	)
	parser.add_argument(
	'--username', type=str, default='', help='Username for authentication'
	)
	parser.add_argument(
	'--password', type=str, default='', help='Password for authentication'
	)
	parser.add_argument(
	'--server_port',
	type=int,
	default=0,
	help='Port to run the server listener on',
	)
	parser.add_argument(
	'--inbrowser', action='store_true', help='Open in browser'
	)
	parser.add_argument(
	'--share', action='store_true', help='Share the gradio UI'
	)
	parser.add_argument(
	'--unload_model', action='store_true',
	help='Unload the model after every generation to save GPU memory'
	)
	parser.add_argument(
	'--unload_to_cpu', action='store_true',
	help='Move the model to main RAM after every generation to save GPU memory but reload faster than after full unload'
	)
	parser.add_argument(
	'--cache', action='store_true',
	help='Cache models in RAM to quickly switch between them'
	)

	args = parser.parse_args()

	# Ustawienie flag
	UNLOAD_MODEL = args.unload_model
	MOVE_TO_CPU = args.unload_to_cpu

	if args.cache:
	MODELS = {}

	# Uruchomienie interfejsu
	ui(
	unload_to_cpu=MOVE_TO_CPU,
	share=args.share,
	**vars(args)
	)