""" Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the license found in the LICENSE file in the root directory of this source tree. """ import os import sys import time import gc import random import warnings import typing as tp from pathlib import Path from tempfile import NamedTemporaryFile import argparse import subprocess import torch import gradio as gr import librosa from mutagen.mp4 import MP4 # Importy z lokalnych modułów from audiocraft.models import MusicGen from audiocraft.data.audio import audio_write from audiocraft.data.audio_utils import apply_fade, apply_tafade, apply_splice_effect from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING from audiocraft.utils import utils import numpy as np # Importy z modułów projektu import modules.user_history from modules.version_info import versions_html, commit_hash, get_xformers_version from modules.gradio import * from modules.file_utils import ( get_file_parts, get_filename_from_filepath, convert_title_to_filename, get_unique_file_path, delete_file, download_and_save_image, download_and_save_file ) from modules.constants import ( IS_SHARED_SPACE, HF_REPO_ID, TMPDIR, HF_API_TOKEN ) from modules.storage import upload_files_to_repo # Inicjalizacja zmiennych globalnych MODEL = None MODELS = None INTERRUPTED = False UNLOAD_MODEL = False MOVE_TO_CPU = False MAX_PROMPT_INDEX = 0 git = os.environ.get('GIT', "git") # Konfiguracja środowiska GPU (opcjonalne, dostosuj do swoich potrzeb) os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True" os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' os.environ['CUDA_MODULE_LOADING'] = 'LAZY' os.environ['USE_FLASH_ATTENTION'] = '1' os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1' # Funkcja przerwania generacji def interrupt_callback(): return INTERRUPTED def interrupt(): global INTERRUPTING INTERRUPTING = True # Klasa do czyszczenia plików tymczasowych class FileCleaner: def __init__(self, file_lifetime: float = 3600): self.file_lifetime = file_lifetime self.files = [] def add(self, path: tp.Union[str, Path]): self._cleanup() self.files.append((time.time(), Path(path))) def _cleanup(self): now = time.time() for time_added, path in list(self.files): if now - time_added > self.file_lifetime: if path.exists(): path.unlink() self.files.pop(0) else: break # Funkcja ping (używana do sprawdzania dostępności API) def ping(): return True # Funkcja przełączania źródła audio (mikrofon/plik) def toggle_audio_src(choice): if choice == "mic": return gr.update(source="microphone", value=None, label="Microphone") else: return gr.update(source="upload", value=None, label="File") # Funkcja generowania wideo z waveform def get_waveform(*args, **kwargs): be = time.time() with warnings.catch_warnings(): warnings.simplefilter('ignore') out = gr.make_waveform(*args, **kwargs) print("Make a video took", time.time() - be) return out # Funkcja ładowania modelu def load_model(version, progress=gr.Progress(track_tqdm=True)): global MODEL, MODELS, UNLOAD_MODEL print(f"Loading model {version}") with tqdm(total=100, desc=f"Loading model '{version}'", unit="step") as pbar: if MODELS is None: pbar.update(50) result = MusicGen.get_pretrained(version) pbar.update(50) return result else: t1 = time.monotonic() if MODEL is not None: MODEL.to('cpu') print(f"Previous model moved to CPU in {time.monotonic() - t1:.2f}s") pbar.update(30) t1 = time.monotonic() if MODELS.get(version) is None: print(f"Loading model {version} from disk") result = MusicGen.get_pretrained(version) MODELS[version] = result print(f"Model loaded in {time.monotonic() - t1:.2f}s") pbar.update(70) return result result = MODELS[version].to('cuda') print(f"Cached model loaded in {time.monotonic() - t1:.2f}s") pbar.update(100) return result # Funkcja pobierania melodii z pliku def get_melody(melody_filepath): audio_data = list(librosa.load(melody_filepath, sr=None)) audio_data[0], audio_data[1] = audio_data[1], audio_data[0] # Swap channels melody = tuple(audio_data) return melody # Funkcja pobierania tagu Git def git_tag(): try: return subprocess.check_output([git, "describe", "--tags"], shell=False, encoding='utf8').strip() except Exception: try: changelog_md = Path(__file__).parent.parent / "CHANGELOG.md" with changelog_md.open(encoding="utf-8") as file: return next((line.strip() for line in file if line.strip()), "") except Exception: return "" # Funkcja ładowania obrazka tła def load_background_filepath(video_orientation): if video_orientation == "Landscape": return "./assets/background.png" else: return "./assets/background_portrait.png" # Funkcja aktualizacji UI po wyborze melodii def load_melody_filepath(melody_filepath, title, assigned_model, topp, temperature, cfg_coef, segment_length=30): symbols = ['_', '.', '-'] MAX_OVERLAP = int(segment_length // 2) - 1 if (melody_filepath is None) or (melody_filepath == ""): return ( title, gr.update(maximum=0, value=-1), gr.update(value="medium", interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP) ) if (title is None) or ("MusicGen" in title) or (title == ""): melody_name, melody_extension = get_filename_from_filepath(melody_filepath) for symbol in symbols: melody_name = melody_name.replace(symbol, ' ').title() topp = 800 temperature = 0.5 cfg_coef = 3.25 else: melody_name = title if "melody" not in assigned_model: assigned_model = "melody-large" print(f"Melody name: {melody_name}, Melody Filepath: {melody_filepath}, Model: {assigned_model}\n") melody = get_melody(melody_filepath) sr, melody_data = melody[0], melody[1] segment_samples = sr * segment_length total_melodys = max(min((len(melody_data) // segment_samples), 25), 0) print(f"Melody length: {len(melody_data)}, Melody segments: {total_melodys}\n") global MAX_PROMPT_INDEX MAX_PROMPT_INDEX = total_melodys return ( gr.update(value=melody_name), gr.update(maximum=MAX_PROMPT_INDEX, value=-1), gr.update(value=assigned_model, interactive=True), gr.update(value=topp), gr.update(value=temperature), gr.update(value=cfg_coef), gr.update(maximum=MAX_OVERLAP) ) # Główna funkcja generacji muzyki i wideo def predict( model, text, melody_filepath=None, duration=10, dimension=2, topk=200, topp=0, temperature=1.0, cfg_coef=4.0, background=None, title="UnlimitedMusicGen", settings_font="./assets/arial.ttf", settings_font_color="#c87f05", seed=-1, overlap=1, prompt_index=0, include_title=True, include_settings=True, harmony_only=False, profile=None, segment_length=30, settings_font_size=28, settings_animate_waveform=False, video_orientation="Landscape", excerpt_duration=3.5, return_history_json=False, progress=gr.Progress(track_tqdm=True) ): global MODEL, INTERRUPTED, INTERRUPTING, MOVE_TO_CPU output_segments = None melody_name = "Not Used" melody_extension = "Not Used" melody = None if melody_filepath in ["None", ""]: melody_filepath = None # Pobieranie melodii z URL (jeśli podano) if melody_filepath and melody_filepath.startswith(("http://", "https://")): username = profile if isinstance(profile, str) else ( profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if profile is None else profile ) melody_filepath = download_and_save_file( melody_filepath, Path(TMPDIR) / str(username), HF_API_TOKEN ) # Pobieranie tła z URL (jeśli podano) if background is None or background in ["None", ""]: background = load_background_filepath(video_orientation) if background.startswith(("http://", "https://")): username = profile if isinstance(profile, str) else ( profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if profile is None else profile ) background = download_and_save_image( background, Path(TMPDIR) / str(username), HF_API_TOKEN ) # Pobieranie melodii z pliku if melody_filepath: melody_name, melody_extension = get_filename_from_filepath(melody_filepath) melody = get_melody(melody_filepath) INTERRUPTED = False INTERRUPTING = False # Walidacja parametrów if temperature < 0: temperature = 0.1 raise gr.Error("Temperature must be >= 0.") if topk < 0: topk = 1 raise gr.Error("Topk must be non-negative.") if topp < 0: topp = 1 raise gr.Error("Topp must be non-negative.") # Czyszczenie GPU przy zmianie modelu if MODEL is not None and model not in MODEL.name: print(f"Switching model from {MODEL.name} to {model}. Cleaning up resources.") del MODEL torch.cuda.empty_cache() gc.collect() MODEL = None try: if MODEL is None or model not in MODEL.name: MODEL = load_model(model) else: if MOVE_TO_CPU: MODEL.to('cuda') except Exception as e: raise gr.Error(f"Error loading model '{model}': {str(e)}. Try a different model.") # Ograniczenia parametrów duration = min(duration, 720) overlap = min(overlap, 15) output = None segment_duration = duration initial_duration = duration output_segments = [] # Pętla generacji (obsługa długich utworów) while duration > 0: if not output_segments: segment_duration = min(segment_duration, MODEL.lm.cfg.dataset.segment_duration) else: segment_duration = min( duration + overlap, MODEL.lm.cfg.dataset.segment_duration ) segment_duration = min(segment_duration, segment_length + overlap) if seed < 0: seed = random.randint(0, 0xffff_ffff_ffff) torch.manual_seed(seed) print(f'Segment duration: {segment_duration}, duration: {duration}, overlap: {overlap}') # Ustawienia generacji MODEL.set_generation_params( use_sampling=True, top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef, duration=segment_duration, two_step_cfg=False, cfg_coef_beta=5 if ("style" in model) and melody else None, extend_stride=2 if not ("style" in model) else None, rep_penalty=0.5 if not ("style" in model) else None, ) if ("style" in model) and melody: MODEL.set_style_conditioner_params( eval_q=3, excerpt_length=excerpt_duration, ) MODEL.set_custom_progress_callback(gr.Progress(track_tqdm=True)) try: if melody and ("melody" in model or "style" in model): if duration > MODEL.duration: output_segments, duration = generate_music_segments( text, melody, seed, MODEL, duration, overlap, MODEL.duration, prompt_index, harmony_only, excerpt_duration, progress=progress ) else: sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t().unsqueeze(0) if melody.dim() == 2: melody = melody[None] melody = melody[..., :int(sr * MODEL.lm.cfg.dataset.segment_duration)] output = MODEL.generate_with_chroma( descriptions=[text], melody_wavs=melody, melody_sample_rate=sr, progress=False, progress_callback=progress ) break else: if not output_segments: next_segment = MODEL.generate( descriptions=[text], progress=False, progress_callback=progress ) duration -= segment_duration else: last_chunk = output_segments[-1][:, :, -overlap*MODEL.sample_rate:] next_segment = MODEL.generate_continuation( last_chunk, MODEL.sample_rate, descriptions=[text], progress=False, progress_callback=progress ) duration -= segment_duration - overlap if next_segment is not None: output_segments.append(next_segment) except Exception as e: print(f"Error generating audio: {e}") gr.Error(f"Error generating audio: {e}") return None, None, seed if INTERRUPTING: INTERRUPTED = True INTERRUPTING = False print("Function execution interrupted!") raise gr.Error("Interrupted.") # Łączenie segmentów if output_segments: try: output = output_segments[0] for i in range(1, len(output_segments)): if overlap > 0: overlap_samples = overlap * MODEL.sample_rate overlapping_output_fadeout = output[:, :, -overlap_samples:] overlapping_output_fadeout = apply_tafade( overlapping_output_fadeout, sample_rate=MODEL.sample_rate, duration=overlap, out=True, start=True, shape="linear" ) overlapping_output_fadein = output_segments[i][:, :, :overlap_samples] overlapping_output_fadein = apply_tafade( overlapping_output_fadein, sample_rate=MODEL.sample_rate, duration=overlap, out=False, start=False, shape="linear" ) overlapping_output = torch.cat( [overlapping_output_fadeout[:, :, :-(overlap_samples // 2)], overlapping_output_fadein], dim=2 ) output = torch.cat( [output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]], dim=dimension ) else: output = torch.cat([output, output_segments[i]], dim=dimension) output = output.detach().cpu().float()[0] except Exception as e: print(f"Error combining segments: {e}. Using the first segment only.") output = output_segments[0].detach().cpu().float()[0] else: if output is None or output.dim() == 0: return None, None, seed else: output = output.detach().cpu().float()[0] # Generowanie wideo z waveform video_width, video_height = (768, 512) if video_orientation == "Landscape" else (512, 768) title_file_name = convert_title_to_filename(title) with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file: video_description = ( f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n" f"Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n" f"cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n" f"Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}" ) if include_settings or include_title: background = add_settings_to_image( title if include_title else "", video_description if include_settings else "", width=video_width, height=video_height, background_path=background, font=settings_font, font_color=settings_font_color, font_size=settings_font_size ) audio_write( file.name, output, MODEL.sample_rate, strategy="loudness", loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2 ) waveform_video_path = get_waveform( file.name, bg_image=background, bar_count=45, name=title_file_name, animate=settings_animate_waveform, progress=progress ) # Zapisywanie metadanych do pliku MP4 file_name_without_extension = os.path.splitext(file.name)[0] video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path) new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext) mp4 = MP4(waveform_video_path) mp4["©nam"] = title_file_name mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" commit = commit_hash() metadata = { "Title": title, "Year": time.strftime("%Y"), "prompt": text, "negative_prompt": "", "Seed": seed, "steps": 1, "wdth": video_width, "hght": video_height, "Dimension": dimension, "Top-k": topk, "Top-p": topp, "Randomness": temperature, "cfg": cfg_coef, "overlap": overlap, "Melody Condition": melody_name, "Sample Segment": prompt_index, "Duration": initial_duration, "Audio": file.name, "font": settings_font, "font_color": settings_font_color, "font_size": settings_font_size, "harmony_only": harmony_only, "background": background, "include_title": include_title, "include_settings": include_settings, "profile": ( profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if profile is None else profile ), "commit": commit, "tag": git_tag(), "version": gr.__version__, "model_version": MODEL.version if MODEL else "Unknown", "model_name": MODEL.name if MODEL else "Unknown", "model_description": ( f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz" if MODEL else "Unknown" ), "melody_name": melody_name if melody_name else "", "melody_extension": melody_extension if melody_extension else "", "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen", "version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '' else commit}", "python": sys.version, "torch": getattr(torch, '__long_version__', torch.__version__), "xformers": get_xformers_version(), "gradio": gr.__version__, "huggingface_space": os.environ.get('SPACE_ID', ''), "CUDA": ( f"CUDA is available. device: {torch.cuda.get_device_name(0)} version: {torch.version.cuda}" if torch.cuda.is_available() else "CUDA is not available." ), } for key, value in metadata.items(): mp4[key] = str(value) mp4.save() try: os.replace(waveform_video_path, new_video_path) waveform_video_path = new_video_path except Exception as e: print(f"Error renaming file: {e}") if waveform_video_path: history_results = modules.user_history.save_file( profile=( profile.value.username if hasattr(profile, 'value') and hasattr(profile.value, 'username') else "default_user" if profile is None else profile ), image=background, audio=file.name, video=waveform_video_path, label=title, metadata=metadata, progress=progress ) # Czyszczenie GPU if MOVE_TO_CPU: MODEL.to('cpu') if UNLOAD_MODEL: MODEL = None # Usuwanie dużych obiektów del output_segments, output, melody, melody_name, melody_extension, metadata, mp4 gc.collect() torch.cuda.empty_cache() torch.cuda.synchronize() torch.cuda.ipc_collect() if return_history_json: return history_results else: return waveform_video_path, file.name, seed # Funkcja uproszczonej predykcji (dla API) def predict_simple( model: str, text: str, melody_filepath: str = None, duration: int = 10, dimension: int = 2, topk: int = 200, topp: float = 0.01, temperature: float = 1.0, cfg_coef: float = 4.0, background: str = "./assets/background.png", title: str = "UnlimitedMusicGen", settings_font: str = "./assets/arial.ttf", settings_font_color: str = "#c87f05", seed: int = -1, overlap: int = 1, prompt_index: int = -1, include_title: bool = True, include_settings: bool = True, harmony_only: bool = False, profile: str = "Satoshi Nakamoto", segment_length: int = 30, settings_font_size: int = 28, settings_animate_waveform: bool = False, video_orientation: str = "Landscape", return_history_json: bool = False ) -> tp.List[tp.Tuple[str, str, str]]: profile_username_to_send = "default_user" if not profile: profile = modules.user_history.get_profile if profile: actual_profile_data = profile if hasattr(profile, 'value') and profile.value is not None: actual_profile_data = profile.value if hasattr(actual_profile_data, 'username') and actual_profile_data.username: profile_username_to_send = actual_profile_data.username elif isinstance(actual_profile_data, str) and actual_profile_data: profile_username_to_send = actual_profile_data UMG_result = predict( model, text, melody_filepath=melody_filepath, duration=duration, dimension=dimension, topk=topk, topp=topp, temperature=temperature, cfg_coef=cfg_coef, background=background, title=title, settings_font=settings_font, settings_font_color=settings_font_color, seed=seed, overlap=overlap, prompt_index=prompt_index, include_title=include_title, include_settings=include_settings, harmony_only=harmony_only, profile=profile, segment_length=segment_length, settings_font_size=settings_font_size, settings_animate_waveform=settings_animate_waveform, video_orientation=video_orientation, excerpt_duration=3.5, return_history_json=return_history_json ) folder_name = f"user_uploads/{convert_title_to_filename(profile_username_to_send)}/{convert_title_to_filename(title)}" if return_history_json: upload_result = upload_files_to_repo( files=[UMG_result["video_path"], UMG_result["audio_path"], UMG_result["image_path"]], repo_id=HF_REPO_ID, folder_name=f"{folder_name}/{UMG_result['metadata']['Seed']}/{time.strftime('%Y%m%d%H%M%S')}", create_permalink=False, repo_type="dataset" ) if upload_result: UMG_result["video_path"] = upload_result[0][1] UMG_result["audio_path"] = upload_result[1][1] UMG_result["image_path"] = upload_result[2][1] content = UMG_result["video_path"], UMG_result["audio_path"], UMG_result["metadata"]["Seed"] UMG_result = content else: upload_result = upload_files_to_repo( files=[UMG_result[0], UMG_result[1]], repo_id=HF_REPO_ID, folder_name=f"{folder_name}/{UMG_result[2]}/{time.strftime('%Y%m%d%H%M%S')}", create_permalink=False, repo_type="dataset" ) if upload_result: UMG_result = upload_result[0][1], upload_result[1][1], UMG_result[2] return UMG_result # Ustawienie ścieżek statycznych gr.set_static_paths(paths=["fonts/", "assets/", "images/"]) # Funkcja UI def ui(**kwargs): with gr.Blocks(title="UnlimitedMusicGen", css_paths="style_20250331.css", theme='Surn/beeuty') as demo: with gr.Tab("UnlimitedMusicGen"): gr.Markdown( """ # UnlimitedMusicGen This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284) ⚠ **Disclaimer**: This won't run on CPU only. Clone this App and run on GPU instance! Todo: Working on improved Interrupt. Theme Available at ["Surn/Beeuty"](https://huggingface.co/spaces/Surn/Beeuty) """ ) if IS_SHARED_SPACE and not torch.cuda.is_available(): gr.Markdown(""" ⚠ This Space doesn't work in this shared UI ⚠ Duplicate Space to use it privately, or use the public demo """) with gr.Row(): with gr.Column(): with gr.Row(): with gr.Column(): text = gr.Text( label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 32khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4 ) autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb") with gr.Column(): duration = gr.Slider( minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration", step=1 ) model = gr.Radio( ["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large", "style"], label="AI Model", value="medium", interactive=True, key="chosen_model" ) with gr.Row(): submit = gr.Button("Generate", elem_id="btn-generate") _ = gr.Button("Interrupt", elem_id="btn-interrupt").click(fn=interrupt, queue=False) with gr.Row(): with gr.Column(): radio = gr.Radio( ["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic" ) melody_filepath = gr.Audio( value=None, sources=["upload"], type="filepath", label="Melody Condition (optional)", interactive=True, elem_id="melody-input", key="melody_input" ) with gr.Column(): harmony_only = gr.Radio( label="Use Harmony Only", choices=["No", "Yes"], value="No", interactive=True, info="Remove Drums?" ) prompt_index = gr.Slider( label="Melody Condition Sample Segment", minimum=-1, maximum=MAX_PROMPT_INDEX, step=1, value=-1, interactive=True, info="Which 10-30 second segment to condition with, -1 = align with conditioning melody" ) with gr.Accordion("Video", open=False): with gr.Row(): background = gr.Image( value="./assets/background.png", sources=["upload"], label="Background", width=768, height=512, type="filepath", interactive=True, key="background_imagepath" ) with gr.Column(): include_title = gr.Checkbox( label="Add Title", value=True, interactive=True, key="add_title" ) include_settings = gr.Checkbox( label="Add Settings to background", value=True, interactive=True, key="add_settings" ) video_orientation = gr.Radio( label="Video Orientation", choices=["Landscape", "Portrait"], value="Landscape", interactive=True, key="video_orientation" ) with gr.Row(): title = gr.Textbox( label="Title", value="UnlimitedMusicGen", interactive=True, key="song_title" ) settings_font = gr.Text( label="Settings Font", value="./assets/arial.ttf", interactive=True ) settings_font_color = gr.ColorPicker( label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color" ) settings_font_size = gr.Slider( minimum=8, maximum=64, value=28, step=1, label="Settings Font Size", interactive=True, key="settings_font_size" ) settings_animate_waveform = gr.Checkbox( label="Animate Waveform", value=False, interactive=True, key="animate_waveform" ) with gr.Accordion("Expert", open=False): with gr.Row(): segment_length = gr.Slider( minimum=10, maximum=30, value=30, step=1, label="Music Generation Segment Length (s)", interactive=True, key="segment_length" ) overlap = gr.Slider( minimum=0, maximum=14, value=1, step=1, label="Segment Overlap", interactive=True ) dimension = gr.Slider( minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True ) with gr.Row(): topk = gr.Number( label="Top-k", value=280, precision=0, interactive=True, info="more structured" ) topp = gr.Number( label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero" ) temperature = gr.Number( label="Randomness Temperature", value=0.7, precision=None, step=0.1, interactive=True, info="less than one to follow Melody Condition song closely" ) cfg_coef = gr.Number( label="Classifier Free Guidance", value=3.75, precision=None, step=0.1, interactive=True, info="3.0-4.0, stereo and small need more" ) with gr.Row(): seed = gr.Number( label="Seed", value=-1, precision=0, interactive=True, key="seed" ) gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click( fn=lambda: -1, outputs=[seed], queue=False ) reuse_seed = gr.Button('\u267b\ufe0f', elem_classes="small-btn") with gr.Column() as c: output = gr.Video( label="Generated Music", interactive=False, show_download_button=True, show_share_button=True, autoplay=False ) wave_file = gr.File( label=".wav file", elem_id="output_wavefile", interactive=True ) seed_used = gr.Number( label='Seed used', value=-1, interactive=False ) # Powiązania UI radio.change( toggle_audio_src, radio, [melody_filepath], queue=False, show_progress=False, api_name="audio_src_change" ) video_orientation.change( load_background_filepath, inputs=[video_orientation], outputs=[background], queue=False, show_progress=False, api_name="video_orientation_change" ) melody_filepath.change( load_melody_filepath, inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length], outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap], api_name="melody_filepath_change", queue=False ) reuse_seed.click( fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False, api_name="reuse_seed_click" ) autoplay_cb.change( fn=lambda x: gr.update(autoplay=x), inputs=[autoplay_cb], outputs=[output], queue=False, api_name="autoplay_cb_change" ) segment_length.release( fn=load_melody_filepath, queue=False, api_name="segment_length_change", trigger_mode="once", inputs=[melody_filepath, title, model, topp, temperature, cfg_coef, segment_length], outputs=[title, prompt_index, model, topp, temperature, cfg_coef, overlap], show_progress="minimal" ) # Przykłady gr.Examples( examples=[ [ "4/4 120bpm 320kbps 32khz, An 80s driving pop song with heavy drums and synth pads in the background", "./assets/bach.mp3", "melody", "80s Pop Synth", 950, 0.6, 3.5 ], [ "4/4 120bpm 320kbps 32khz, A cheerful country song with acoustic guitars", "./assets/bolero_ravel.mp3", "stereo-melody-large", "Country Guitar", 750, 0.7, 4.0 ], [ "4/4 120bpm 320kbps 32khz, 90s rock song with electric guitar and heavy drums", None, "stereo-medium", "90s Rock Guitar", 1150, 0.7, 3.75 ], [ "4/4 120bpm 320kbps 32khz, a light and cheery EDM track, with syncopated drums, aery pads, and strong emotions", "./assets/bach.mp3", "melody-large", "EDM my Bach", 500, 0.7, 3.75 ], [ "4/4 320kbps 32khz, lofi slow bpm electro chill with organic samples", None, "medium", "LoFi Chill", 0, 0.7, 4.0 ], ], inputs=[text, melody_filepath, model, title, topp, temperature, cfg_coef], outputs=[output] ) with gr.Tab("User History") as history_tab: modules.user_history.setup(display_type="video_path") modules.user_history.render() user_profile = gr.State(None) with gr.Row("Versions") as versions_row: gr.HTML(value=versions_html(), visible=True, elem_id="versions") # Główne wywołanie predict submit.click( modules.user_history.get_profile, inputs=[], outputs=[user_profile], queue=True, api_name="submit" ).then( predict, inputs=[ model, text, melody_filepath, duration, dimension, topk, topp, temperature, cfg_coef, background, title, settings_font, settings_font_color, seed, overlap, prompt_index, include_title, include_settings, harmony_only, user_profile, segment_length, settings_font_size, settings_animate_waveform, video_orientation ], outputs=[output, wave_file, seed_used], scroll_to_output=True, show_api=False ) # Uruchomienie interfejsu launch_kwargs = { 'server_name': kwargs.get('listen', '0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1'), 'server_port': kwargs.get('server_port', 0), 'share': kwargs.get('share', False), 'allowed_paths': ["assets", "./assets", "images", "./images", 'e:/TMP'], 'favicon_path': "./assets/favicon.ico", 'mcp_server': True, 'ssr_mode': False } if kwargs.get('server_port', 0) > 0: launch_kwargs['server_port'] = kwargs.get('server_port') if kwargs.get('share', False): launch_kwargs['share'] = True gr.api(ping, api_name="ping") gr.api(predict_simple) demo.queue(max_size=10, api_open=True).launch(**launch_kwargs) # Funkcja główna if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( '--listen', type=str, default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1', help='IP to listen on for connections to Gradio', ) parser.add_argument( '--username', type=str, default='', help='Username for authentication' ) parser.add_argument( '--password', type=str, default='', help='Password for authentication' ) parser.add_argument( '--server_port', type=int, default=0, help='Port to run the server listener on', ) parser.add_argument( '--inbrowser', action='store_true', help='Open in browser' ) parser.add_argument( '--share', action='store_true', help='Share the gradio UI' ) parser.add_argument( '--unload_model', action='store_true', help='Unload the model after every generation to save GPU memory' ) parser.add_argument( '--unload_to_cpu', action='store_true', help='Move the model to main RAM after every generation to save GPU memory but reload faster than after full unload' ) parser.add_argument( '--cache', action='store_true', help='Cache models in RAM to quickly switch between them' ) args = parser.parse_args() # Ustawienie flag UNLOAD_MODEL = args.unload_model MOVE_TO_CPU = args.unload_to_cpu if args.cache: MODELS = {} # Uruchomienie interfejsu ui( unload_to_cpu=MOVE_TO_CPU, share=args.share, **vars(args) )