webnowa's picture
Update app.py
69734b9 verified
"""
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import os
import sys
import time
import gc
import random
import warnings
import typing as tp
from pathlib import Path
from tempfile import NamedTemporaryFile
import argparse
import subprocess
import torch
import gradio as gr
import librosa
from mutagen.mp4 import MP4
# Importy z lokalnych modułów
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
from audiocraft.utils import utils
import numpy as np
# Importy z modułów projektu
import modules.user_history
from modules.version_info import versions_html, commit_hash, get_xformers_version
from modules.gradio import *
from modules.file_utils import (
get_file_parts,
get_filename_from_filepath,
convert_title_to_filename,
get_unique_file_path,
delete_file,
download_and_save_image,
download_and_save_file
)
from modules.constants import (
IS_SHARED_SPACE,
HF_REPO_ID,
TMPDIR,
HF_API_TOKEN
)
from modules.storage import upload_files_to_repo
# Inicjalizacja zmiennych globalnych
MODEL = None
MODELS = None
INTERRUPTED = False
UNLOAD_MODEL = False
MOVE_TO_CPU = False
MAX_PROMPT_INDEX = 0
git = os.environ.get('GIT', "git")
# Konfiguracja środowiska GPU (opcjonalne, dostosuj do swoich potrzeb)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
os.environ['USE_FLASH_ATTENTION'] = '1'
os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
# Funkcja przerwania generacji
def interrupt_callback():
return INTERRUPTED
def interrupt():
global INTERRUPTING
INTERRUPTING = True
# Klasa do czyszczenia plików tymczasowych
class FileCleaner:
def __init__(self, file_lifetime: float = 3600):
self.file_lifetime = file_lifetime
self.files = []
def add(self, path: tp.Union[str, Path]):
self._cleanup()
self.files.append((time.time(), Path(path)))
def _cleanup(self):
now = time.time()
for time_added, path in list(self.files):
if now - time_added > self.file_lifetime:
if path.exists():
path.unlink()
self.files.pop(0)
else:
break
# Funkcja ping (używana do sprawdzania dostępności API)
def ping():
return True
# Funkcja przełączania źródła audio (mikrofon/plik)
def toggle_audio_src(choice):
if choice == "mic":
return gr.update(source="microphone", value=None, label="Microphone")
else:
return gr.update(source="upload", value=None, label="File")
# Funkcja generowania wideo z waveform
def get_waveform(*args, **kwargs):
be = time.time()
with warnings.catch_warnings():
warnings.simplefilter('ignore')
out = gr.make_waveform(*args, **kwargs)
print("Make a video took", time.time() - be)
return out
# Funkcja ładowania modelu
def load_model(version, progress=gr.Progress(track_tqdm=True)):
global MODEL, MODELS, UNLOAD_MODEL
print(f"Loading model {version}")
with tqdm(total=100, desc=f"Loading model '{version}'", unit="step") as pbar:
if MODELS is None:
pbar.update(50)
result = MusicGen.get_pretrained(version)
pbar.update(50)
return result
else:
t1 = time.monotonic()
if MODEL is not None:
MODEL.to('cpu')
print(f"Previous model moved to CPU in {time.monotonic() - t1:.2f}s")
pbar.update(30)
t1 = time.monotonic()
if MODELS.get(version) is None:
print(f"Loading model {version} from disk")
result = MusicGen.get_pretrained(version)
MODELS[version] = result
print(f"Model loaded in {time.monotonic() - t1:.2f}s")
pbar.update(70)
return result
result = MODELS[version].to('cuda')
print(f"Cached model loaded in {time.monotonic() - t1:.2f}s")
pbar.update(100)
return result
# Funkcja pobierania melodii z pliku
def get_melody(melody_filepath):
audio_data = list(librosa.load(melody_filepath, sr=None))
audio_data[0], audio_data[1] = audio_data[1], audio_data[0] # Swap channels
melody = tuple(audio_data)
return melody
# Funkcja pobierania tagu Git
def git_tag():
try:
return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip()
except Exception:
try:
changelog_md = Path(__file__).parent.parent / "CHANGELOG.md"
with changelog_md.open(encoding="utf-8") as file:
return next((line.strip() for line in file if line.strip()), "<none>")
except Exception:
return "<none>"
# Funkcja ładowania obrazka tła
def load_background_filepath(video_orientation):
if video_orientation == "Landscape":
return "./assets/background.png"
else:
return "./assets/background_portrait.png"
# Funkcja aktualizacji UI po wyborze melodii
def load_melody_filepath(melody_filepath, title, assigned_model, topp, temperature, cfg_coef, segment_length=30):
symbols = ['_', '.', '-']
MAX_OVERLAP = int(segment_length // 2) - 1
if (melody_filepath is None) or (melody_filepath == ""):
return (
title,
gr.update(maximum=0, value=-1),
gr.update(value="medium", interactive=True),
gr.update(value=topp),
gr.update(value=temperature),
gr.update(value=cfg_coef),
gr.update(maximum=MAX_OVERLAP)
)
if (title is None) or ("MusicGen" in title) or (title == ""):
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
for symbol in symbols:
melody_name = melody_name.replace(symbol, ' ').title()
topp = 800
temperature = 0.5
cfg_coef = 3.25
else:
melody_name = title
if "melody" not in assigned_model:
assigned_model = "melody-large"
print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}, Model: {assigned_model}\n")
melody = get_melody(melody_filepath)
sr, melody_data = melody[0], melody[1]
segment_samples = sr * segment_length
total_melodys = max(min((len(melody_data) // segment_samples), 25), 0)
print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n")
global MAX_PROMPT_INDEX
MAX_PROMPT_INDEX = total_melodys
return (
gr.update(value=melody_name),
gr.update(maximum=MAX_PROMPT_INDEX, value=-1),
gr.update(value=assigned_model, interactive=True),
gr.update(value=topp),
gr.update(value=temperature),
gr.update(value=cfg_coef),
gr.update(maximum=MAX_OVERLAP)
)
# Główna funkcja generacji muzyki i wideo
def predict(
model, text, melody_filepath=None, duration=10, dimension=2, topk=200, topp=0,
temperature=1.0, cfg_coef=4.0, background=None, title="UnlimitedMusicGen",
settings_font="./assets/arial.ttf", settings_font_color="#c87f05", seed=-1,
overlap=1, prompt_index=0, include_title=True, include_settings=True,
harmony_only=False, profile=None, segment_length=30, settings_font_size=28,
settings_animate_waveform=False, video_orientation="Landscape",
excerpt_duration=3.5, return_history_json=False, progress=gr.Progress(track_tqdm=True)
):
global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU
output_segments = None
melody_name = "Not Used"
melody_extension = "Not Used"
melody = None
if melody_filepath in ["None", ""]:
melody_filepath = None
# Pobieranie melodii z URL (jeśli podano)
if melody_filepath and melody_filepath.startswith(("http://", "https://")):
username = profile if isinstance(profile, str) else (
profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
else "default_user" if profile is None else profile
)
melody_filepath = download_and_save_file(
melody_filepath,
Path(TMPDIR) / str(username),
HF_API_TOKEN
)
# Pobieranie tła z URL (jeśli podano)
if background is None or background in ["None", ""]:
background = load_background_filepath(video_orientation)
if background.startswith(("http://", "https://")):
username = profile if isinstance(profile, str) else (
profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
else "default_user" if profile is None else profile
)
background = download_and_save_image(
background,
Path(TMPDIR) / str(username),
HF_API_TOKEN
)
# Pobieranie melodii z pliku
if melody_filepath:
melody_name, melody_extension = get_filename_from_filepath(melody_filepath)
melody = get_melody(melody_filepath)
INTERRUPTED = False
INTERRUPTING = False
# Walidacja parametrów
if temperature < 0:
temperature = 0.1
raise gr.Error("Temperature must be >= 0.")
if topk < 0:
topk = 1
raise gr.Error("Topk must be non-negative.")
if topp < 0:
topp = 1
raise gr.Error("Topp must be non-negative.")
# Czyszczenie GPU przy zmianie modelu
if MODEL is not None and model not in MODEL.name:
print(f"Switching model from {MODEL.name} to {model}. Cleaning up resources.")
del MODEL
torch.cuda.empty_cache()
gc.collect()
MODEL = None
try:
if MODEL is None or model not in MODEL.name:
MODEL = load_model(model)
else:
if MOVE_TO_CPU:
MODEL.to('cuda')
except Exception as e:
raise gr.Error(f"Error loading model '{model}': {str(e)}. Try a different model.")
# Ograniczenia parametrów
duration = min(duration, 720)
overlap = min(overlap, 15)
output = None
segment_duration = duration
initial_duration = duration
output_segments = []
# Pętla generacji (obsługa długich utworów)
while duration > 0:
if not output_segments:
segment_duration = min(segment_duration, MODEL.lm.cfg.dataset.segment_duration)
else:
segment_duration = min(
duration + overlap,
MODEL.lm.cfg.dataset.segment_duration
)
segment_duration = min(segment_duration, segment_length + overlap)
if seed < 0:
seed = random.randint(0, 0xffff_ffff_ffff)
torch.manual_seed(seed)
print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}')
# Ustawienia generacji
MODEL.set_generation_params(
use_sampling=True,
top_k=topk,
top_p=topp,
temperature=temperature,
cfg_coef=cfg_coef,
duration=segment_duration,
two_step_cfg=False,
cfg_coef_beta=5 if ("style" in model) and melody else None,
extend_stride=2 if not ("style" in model) else None,
rep_penalty=0.5 if not ("style" in model) else None,
)
if ("style" in model) and melody:
MODEL.set_style_conditioner_params(
eval_q=3,
excerpt_length=excerpt_duration,
)
MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True))
try:
if melody and ("melody" in model or "style" in model):
if duration > MODEL.duration:
output_segments, duration = generate_music_segments(
text, melody, seed, MODEL, duration, overlap,
MODEL.duration, prompt_index, harmony_only,
excerpt_duration, progress=progress
)
else:
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0)
if melody.dim() == 2:
melody = melody[None]
melody = melody[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)]
output = MODEL.generate_with_chroma(
descriptions=[text],
melody_wavs=melody,
melody_sample_rate=sr,
progress=False,
progress_callback=progress
)
break
else:
if not output_segments:
next_segment = MODEL.generate(
descriptions=[text],
progress=False,
progress_callback=progress
)
duration -= segment_duration
else:
last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:]
next_segment = MODEL.generate_continuation(
last_chunk, MODEL.sample_rate,
descriptions=[text],
progress=False,
progress_callback=progress
)
duration -= segment_duration - overlap
if next_segment is not None:
output_segments.append(next_segment)
except Exception as e:
print(f"Error generating audio: {e}")
gr.Error(f"Error generating audio: {e}")
return None, None, seed
if INTERRUPTING:
INTERRUPTED = True
INTERRUPTING = False
print("Function execution interrupted!")
raise gr.Error("Interrupted.")
# Łączenie segmentów
if output_segments:
try:
output = output_segments[0]
for i in range(1, len(output_segments)):
if overlap > 0:
overlap_samples = overlap * MODEL.sample_rate
overlapping_output_fadeout = output[:, :, -overlap_samples:]
overlapping_output_fadeout = apply_tafade(
overlapping_output_fadeout,
sample_rate=MODEL.sample_rate,
duration=overlap,
out=True,
start=True,
shape="linear"
)
overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
overlapping_output_fadein = apply_tafade(
overlapping_output_fadein,
sample_rate=MODEL.sample_rate,
duration=overlap,
out=False,
start=False,
shape="linear"
)
overlapping_output = torch.cat(
[overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein],
dim=2
)
output = torch.cat(
[output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]],
dim=dimension
)
else:
output = torch.cat([output, output_segments[i]], dim=dimension)
output = output.detach().cpu().float()[0]
except Exception as e:
print(f"Error combining segments: {e}. Using the first segment only.")
output = output_segments[0].detach().cpu().float()[0]
else:
if output is None or output.dim() == 0:
return None, None, seed
else:
output = output.detach().cpu().float()[0]
# Generowanie wideo z waveform
video_width, video_height = (768, 512) if video_orientation == "Landscape" else (512, 768)
title_file_name = convert_title_to_filename(title)
with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
video_description = (
f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n"
f"Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n"
f"cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n"
f"Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
)
if include_settings or include_title:
background = add_settings_to_image(
title if include_title else "",
video_description if include_settings else "",
width=video_width,
height=video_height,
background_path=background,
font=settings_font,
font_color=settings_font_color,
font_size=settings_font_size
)
audio_write(
file.name, output, MODEL.sample_rate, strategy="loudness",
loudness_headroom_db=18, loudness_compressor=True,
add_suffix=False, channels=2
)
waveform_video_path = get_waveform(
file.name, bg_image=background, bar_count=45,
name=title_file_name, animate=settings_animate_waveform,
progress=progress
)
# Zapisywanie metadanych do pliku MP4
file_name_without_extension = os.path.splitext(file.name)[0]
video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)
mp4 = MP4(waveform_video_path)
mp4["©nam"] = title_file_name
mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}"
commit = commit_hash()
metadata = {
"Title": title,
"Year": time.strftime("%Y"),
"prompt": text,
"negative_prompt": "",
"Seed": seed,
"steps": 1,
"wdth": video_width,
"hght": video_height,
"Dimension": dimension,
"Top-k": topk,
"Top-p": topp,
"Randomness": temperature,
"cfg": cfg_coef,
"overlap": overlap,
"Melody Condition": melody_name,
"Sample Segment": prompt_index,
"Duration": initial_duration,
"Audio": file.name,
"font": settings_font,
"font_color": settings_font_color,
"font_size": settings_font_size,
"harmony_only": harmony_only,
"background": background,
"include_title": include_title,
"include_settings": include_settings,
"profile": (
profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
else "default_user" if profile is None else profile
),
"commit": commit,
"tag": git_tag(),
"version": gr.__version__,
"model_version": MODEL.version if MODEL else "Unknown",
"model_name": MODEL.name if MODEL else "Unknown",
"model_description": (
f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz"
if MODEL else "Unknown"
),
"melody_name": melody_name if melody_name else "",
"melody_extension": melody_extension if melody_extension else "",
"hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
"version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
"python": sys.version,
"torch": getattr(torch, '__long_version__', torch.__version__),
"xformers": get_xformers_version(),
"gradio": gr.__version__,
"huggingface_space": os.environ.get('SPACE_ID', ''),
"CUDA": (
f"CUDA is available. device: {torch.cuda.get_device_name(0)} version: {torch.version.cuda}"
if torch.cuda.is_available() else "CUDA is not available."
),
}
for key, value in metadata.items():
mp4[key] = str(value)
mp4.save()
try:
os.replace(waveform_video_path, new_video_path)
waveform_video_path = new_video_path
except Exception as e:
print(f"Error renaming file: {e}")
if waveform_video_path:
history_results = modules.user_history.save_file(
profile=(
profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username')
else "default_user" if profile is None else profile
),
image=background,
audio=file.name,
video=waveform_video_path,
label=title,
metadata=metadata,
progress=progress
)
# Czyszczenie GPU
if MOVE_TO_CPU:
MODEL.to('cpu')
if UNLOAD_MODEL:
MODEL = None
# Usuwanie dużych obiektów
del output_segments, output, melody, melody_name, melody_extension, metadata, mp4
gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()
torch.cuda.ipc_collect()
if return_history_json:
return history_results
else:
return waveform_video_path, file.name, seed
# Funkcja uproszczonej predykcji (dla API)
def predict_simple(
model: str, text: str, melody_filepath: str = None, duration: int = 10,
dimension: int = 2, topk: int = 200, topp: float = 0.01, temperature: float = 1.0,
cfg_coef: float = 4.0, background: str = "./assets/background.png",
title: str = "UnlimitedMusicGen", settings_font: str = "./assets/arial.ttf",
settings_font_color: str = "#c87f05", seed: int = -1, overlap: int = 1,
prompt_index: int = -1, include_title: bool = True, include_settings: bool = True,
harmony_only: bool = False, profile: str = "Satoshi Nakamoto",
segment_length: int = 30, settings_font_size: int = 28,
settings_animate_waveform: bool = False, video_orientation: str = "Landscape",
return_history_json: bool = False
) -> tp.List[tp.Tuple[str, str, str]]:
profile_username_to_send = "default_user"
if not profile:
profile = modules.user_history.get_profile
if profile:
actual_profile_data = profile
if hasattr(profile, 'value') and profile.value is not None:
actual_profile_data = profile.value
if hasattr(actual_profile_data, 'username') and actual_profile_data.username:
profile_username_to_send = actual_profile_data.username
elif isinstance(actual_profile_data, str) and actual_profile_data:
profile_username_to_send = actual_profile_data
UMG_result = predict(
model, text, melody_filepath=melody_filepath, duration=duration,
dimension=dimension, topk=topk, topp=topp, temperature=temperature,
cfg_coef=cfg_coef, background=background, title=title,
settings_font=settings_font, settings_font_color=settings_font_color,
seed=seed, overlap=overlap, prompt_index=prompt_index,
include_title=include_title, include_settings=include_settings,
harmony_only=harmony_only, profile=profile,
segment_length=segment_length, settings_font_size=settings_font_size,
settings_animate_waveform=settings_animate_waveform,
video_orientation=video_orientation, excerpt_duration=3.5,
return_history_json=return_history_json
)
folder_name = f"user_uploads/{convert_title_to_filename(profile_username_to_send)}/{convert_title_to_filename(title)}"
if return_history_json:
upload_result = upload_files_to_repo(
files=[UMG_result["video_path"], UMG_result["audio_path"], UMG_result["image_path"]],
repo_id=HF_REPO_ID,
folder_name=f"{folder_name}/{UMG_result['metadata']['Seed']}/{time.strftime('%Y%m%d%H%M%S')}",
create_permalink=False,
repo_type="dataset"
)
if upload_result:
UMG_result["video_path"] = upload_result[0][1]
UMG_result["audio_path"] = upload_result[1][1]
UMG_result["image_path"] = upload_result[2][1]
content = UMG_result["video_path"], UMG_result["audio_path"], UMG_result["metadata"]["Seed"]
UMG_result = content
else:
upload_result = upload_files_to_repo(
files=[UMG_result[0], UMG_result[1]],
repo_id=HF_REPO_ID,
folder_name=f"{folder_name}/{UMG_result[2]}/{time.strftime('%Y%m%d%H%M%S')}",
create_permalink=False,
repo_type="dataset"
)
if upload_result:
UMG_result = upload_result[0][1], upload_result[1][1], UMG_result[2]
return UMG_result
# Ustawienie ścieżek statycznych
gr.set_static_paths(paths=["fonts/", "assets/", "images/"])
# Funkcja UI
def ui(**kwargs):
with gr.Blocks(title="UnlimitedMusicGen", css_paths="style_20250331.css", theme='Surn/beeuty') as demo:
with gr.Tab("UnlimitedMusicGen"):
gr.Markdown(
"""
# UnlimitedMusicGen
This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
⚠ **Disclaimer**: This won't run on CPU only. Clone this App and run on GPU instance!
Todo: Working on improved Interrupt.
Theme Available at ["Surn/Beeuty"](https://huggingface.co/spaces/Surn/Beeuty)
"""
)
if IS_SHARED_SPACE and not torch.cuda.is_available():
gr.Markdown("""
⚠ This Space doesn't work in this shared UI ⚠
<a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
""")
with gr.Row():
with gr.Column():
with gr.Row():
with gr.Column():
text = gr.Text(
label="Describe your music",
interactive=True,
value="4/4 100bpm 320kbps 32khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out",
key="prompt",
lines=4
)
autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
with gr.Column():
duration = gr.Slider(
minimum=1, maximum=720, value=10,
label="Duration (s)", interactive=True,
key="total_duration", step=1
)
model = gr.Radio(
["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large", "style"],
label="AI Model", value="medium", interactive=True,
key="chosen_model"
)
with gr.Row():
submit = gr.Button("Generate", elem_id="btn-generate")
_ = gr.Button("Interrupt", elem_id="btn-interrupt").click(fn=interrupt, queue=False)
with gr.Row():
with gr.Column():
radio = gr.Radio(
["file", "mic"], value="file",
label="Condition on a melody (optional) File or Mic"
)
melody_filepath = gr.Audio(
value=None, sources=["upload"],
type="filepath", label="Melody Condition (optional)",
interactive=True, elem_id="melody-input", key="melody_input"
)
with gr.Column():
harmony_only = gr.Radio(
label="Use Harmony Only",
choices=["No", "Yes"], value="No",
interactive=True,
info="Remove Drums?"
)
prompt_index = gr.Slider(
label="Melody Condition Sample Segment",
minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=-1,
interactive=True,
info="Which 10-30 second segment to condition with, -1 = align with conditioning melody"
)
with gr.Accordion("Video", open=False):
with gr.Row():
background = gr.Image(
value="./assets/background.png", sources=["upload"],
label="Background", width=768, height=512,
type="filepath", interactive=True, key="background_imagepath"
)
with gr.Column():
include_title = gr.Checkbox(
label="Add Title", value=True, interactive=True,
key="add_title"
)
include_settings = gr.Checkbox(
label="Add Settings to background", value=True,
interactive=True, key="add_settings"
)
video_orientation = gr.Radio(
label="Video Orientation",
choices=["Landscape", "Portrait"], value="Landscape",
interactive=True, key="video_orientation"
)
with gr.Row():
title = gr.Textbox(
label="Title", value="UnlimitedMusicGen",
interactive=True, key="song_title"
)
settings_font = gr.Text(
label="Settings Font", value="./assets/arial.ttf",
interactive=True
)
settings_font_color = gr.ColorPicker(
label="Settings Font Color", value="#c87f05",
interactive=True, key="settings_font_color"
)
settings_font_size = gr.Slider(
minimum=8, maximum=64, value=28, step=1,
label="Settings Font Size", interactive=True,
key="settings_font_size"
)
settings_animate_waveform = gr.Checkbox(
label="Animate Waveform", value=False,
interactive=True, key="animate_waveform"
)
with gr.Accordion("Expert", open=False):
with gr.Row():
segment_length = gr.Slider(
minimum=10, maximum=30, value=30, step=1,
label="Music Generation Segment Length (s)",
interactive=True, key="segment_length"
)
overlap = gr.Slider(
minimum=0, maximum=14, value=1, step=1,
label="Segment Overlap", interactive=True
)
dimension = gr.Slider(
minimum=-2, maximum=2, value=2, step=1,
label="Dimension",
info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)",
interactive=True
)
with gr.Row():
topk = gr.Number(
label="Top-k", value=280, precision=0,
interactive=True,
info="more structured"
)
topp = gr.Number(
label="Top-p", value=1150, precision=0,
interactive=True,
info="more variation, overwrites Top-k if not zero"
)
temperature = gr.Number(
label="Randomness Temperature", value=0.7,
precision=None, step=0.1, interactive=True,
info="less than one to follow Melody Condition song closely"
)
cfg_coef = gr.Number(
label="Classifier Free Guidance", value=3.75,
precision=None, step=0.1, interactive=True,
info="3.0-4.0, stereo and small need more"
)
with gr.Row():
seed = gr.Number(
label="Seed", value=-1, precision=0,
interactive=True, key="seed"
)
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(
fn=lambda: -1, outputs=[seed], queue=False
)
reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn")
with gr.Column() as c:
output = gr.Video(
label="Generated Music", interactive=False,
show_download_button=True, show_share_button=True,
autoplay=False
)
wave_file = gr.File(
label=".wav file", elem_id="output_wavefile",
interactive=True
)
seed_used = gr.Number(
label='Seed used', value=-1, interactive=False
)
# Powiązania UI
radio.change(
toggle_audio_src, radio, [melody_filepath],
queue=False, show_progress=False, api_name="audio_src_change"
)
video_orientation.change(
load_background_filepath, inputs=[video_orientation],
outputs=[background], queue=False, show_progress=False,
api_name="video_orientation_change"
)
melody_filepath.change(
load_melody_filepath,
inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
api_name="melody_filepath_change", queue=False
)
reuse_seed.click(
fn=lambda x: x, inputs=[seed_used], outputs=[seed],
queue=False, api_name="reuse_seed_click"
)
autoplay_cb.change(
fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb],
outputs=[output], queue=False, api_name="autoplay_cb_change"
)
segment_length.release(
fn=load_melody_filepath, queue=False, api_name="segment_length_change",
trigger_mode="once",
inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length],
outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap],
show_progress="minimal"
)
# Przykłady
gr.Examples(
examples=[
[
"4/4 120bpm 320kbps 32khz, An 80s driving pop song with heavy drums and synth pads in the background",
"./assets/bach.mp3",
"melody",
"80s Pop Synth",
950,
0.6,
3.5
],
[
"4/4 120bpm 320kbps 32khz, A cheerful country song with acoustic guitars",
"./assets/bolero_ravel.mp3",
"stereo-melody-large",
"Country Guitar",
750,
0.7,
4.0
],
[
"4/4 120bpm 320kbps 32khz, 90s rock song with electric guitar and heavy drums",
None,
"stereo-medium",
"90s Rock Guitar",
1150,
0.7,
3.75
],
[
"4/4 120bpm 320kbps 32khz, a light and cheery EDM track, with syncopated drums, aery pads, and strong emotions",
"./assets/bach.mp3",
"melody-large",
"EDM my Bach",
500,
0.7,
3.75
],
[
"4/4 320kbps 32khz, lofi slow bpm electro chill with organic samples",
None,
"medium",
"LoFi Chill",
0,
0.7,
4.0
],
],
inputs=[text, melody_filepath, model, title, topp, temperature, cfg_coef],
outputs=[output]
)
with gr.Tab("User History") as history_tab:
modules.user_history.setup(display_type="video_path")
modules.user_history.render()
user_profile = gr.State(None)
with gr.Row("Versions") as versions_row:
gr.HTML(value=versions_html(), visible=True, elem_id="versions")
# Główne wywołanie predict
submit.click(
modules.user_history.get_profile,
inputs=[],
outputs=[user_profile],
queue=True,
api_name="submit"
).then(
predict,
inputs=[
model, text, melody_filepath, duration, dimension, topk, topp,
temperature, cfg_coef, background, title, settings_font,
settings_font_color, seed, overlap, prompt_index, include_title,
include_settings, harmony_only, user_profile, segment_length,
settings_font_size, settings_animate_waveform, video_orientation
],
outputs=[output, wave_file, seed_used],
scroll_to_output=True, show_api=False
)
# Uruchomienie interfejsu
launch_kwargs = {
'server_name': kwargs.get('listen', '0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1'),
'server_port': kwargs.get('server_port', 0),
'share': kwargs.get('share', False),
'allowed_paths': ["assets", "./assets", "images", "./images", 'e:/TMP'],
'favicon_path': "./assets/favicon.ico",
'mcp_server': True,
'ssr_mode': False
}
if kwargs.get('server_port', 0) > 0:
launch_kwargs['server_port'] = kwargs.get('server_port')
if kwargs.get('share', False):
launch_kwargs['share'] = True
gr.api(ping, api_name="ping")
gr.api(predict_simple)
demo.queue(max_size=10, api_open=True).launch(**launch_kwargs)
# Funkcja główna
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--listen',
type=str,
default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
help='IP to listen on for connections to Gradio',
)
parser.add_argument(
'--username', type=str, default='', help='Username for authentication'
)
parser.add_argument(
'--password', type=str, default='', help='Password for authentication'
)
parser.add_argument(
'--server_port',
type=int,
default=0,
help='Port to run the server listener on',
)
parser.add_argument(
'--inbrowser', action='store_true', help='Open in browser'
)
parser.add_argument(
'--share', action='store_true', help='Share the gradio UI'
)
parser.add_argument(
'--unload_model', action='store_true',
help='Unload the model after every generation to save GPU memory'
)
parser.add_argument(
'--unload_to_cpu', action='store_true',
help='Move the model to main RAM after every generation to save GPU memory but reload faster than after full unload'
)
parser.add_argument(
'--cache', action='store_true',
help='Cache models in RAM to quickly switch between them'
)
args = parser.parse_args()
# Ustawienie flag
UNLOAD_MODEL = args.unload_model
MOVE_TO_CPU = args.unload_to_cpu
if args.cache:
MODELS = {}
# Uruchomienie interfejsu
ui(
unload_to_cpu=MOVE_TO_CPU,
share=args.share,
**vars(args)
)