{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "collapsed": true, "id": "bMJNEsdIUHYM" }, "outputs": [], "source": [ "import os\n", "# @title # Установка\n", "repo_url = \"https://huggingface.co/noblebarkrr/mvsepless_colab\"\n", "home_dir = os.path.join(os.sep, \"content\")\n", "mvsepless_dir = os.path.join(home_dir, \"mvsepless-epsilon\")\n", "%cd $home_dir\n", "!git clone $repo_url $mvsepless_dir\n", "%cd $mvsepless_dir\n", "!pip install --no-cache-dir uv\n", "req = \"\"\"\n", "torch\n", "torchvision\n", "torchaudio\n", "numpy==2.0.2\n", "pandas\n", "scipy\n", "librosa\n", "samplerate==0.1.0\n", "matplotlib\n", "tqdm\n", "einops\n", "protobuf\n", "soundfile\n", "pydub\n", "webrtcvad\n", "audiomentations\n", "pedalboard==0.8.2\n", "ml_collections\n", "timm\n", "wandb\n", "accelerate\n", "bitsandbytes\n", "tokenizers\n", "huggingface-hub\n", "transformers\n", "torchseg\n", "demucs==4.0.0\n", "asteroid\n", "prodigyopt\n", "torch_log_wmse\n", "rotary_embedding_torch\n", "gradio<=6.0\n", "omegaconf\n", "beartype\n", "spafe\n", "torch_audiomentations\n", "auraloss\n", "onnx>=1.17\n", "onnx2torch>=0.3.0\n", "onnxruntime-gpu>=1.17\n", "ml_dtypes\n", "resampy\n", "yt_dlp\n", "pyngrok\n", "tabulate\n", "neuraloperator==1.0.2\n", "torchcrepe\n", "praat-parselmouth\n", "faiss-cpu==1.11\n", "local-attention\n", "tenacity\n", "pyworld\n", "gdown\n", "\"\"\"\n", "with open(\"requirements.txt\", \"w\", encoding=\"utf-8\") as f:\n", " f.write(req)\n", "!uv pip install --no-cache-dir -qq -r requirements.txt\n", "%cd $mvsepless_dir\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "3l7EYo3oZ76h" }, "outputs": [], "source": [ "import os\n", "import yt_dlp\n", "\n", "DOWNLOAD_DIR = os.environ.get(\n", " \"MVSEPLESS_DOWNLOAD_DIR\", os.path.join(os.getcwd(), \"downloaded\")\n", ")\n", "\n", "def dw_yt_dlp(\n", " url,\n", " output_dir=None,\n", " cookie=None,\n", " output_format=\"mp3\",\n", " output_bitrate=\"320\",\n", " title=None,\n", "):\n", " # Подготовка шаблона имени файла\n", " outtmpl = \"%(title)s.%(ext)s\" if title is None else f\"{title}.%(ext)s\"\n", "\n", " ydl_opts = {\n", " \"format\": \"bestaudio/best\",\n", " \"outtmpl\": os.path.join(DOWNLOAD_DIR if not output_dir else output_dir, outtmpl),\n", " \"postprocessors\": [\n", " {\n", " \"key\": \"FFmpegExtractAudio\",\n", " \"preferredcodec\": output_format,\n", " \"preferredquality\": output_bitrate,\n", " }\n", " ],\n", " \"noplaylist\": True, # Скачивать только одно видео, не плейлист\n", " \"quiet\": True, # Отключить вывод в консоль\n", " \"no_warnings\": True, # Скрыть предупреждения\n", " }\n", "\n", " # Добавляем cookies если указаны\n", " if cookie and os.path.exists(cookie):\n", " ydl_opts[\"cookiefile\"] = cookie\n", "\n", " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " try:\n", " info = ydl.extract_info(url, download=True)\n", " if \"_type\" in info and info[\"_type\"] == \"playlist\":\n", " # Для плейлистов берем первое видео\n", " entry = info[\"entries\"][0]\n", " filename = ydl.prepare_filename(entry)\n", " else:\n", " # Для одиночного видео\n", " filename = ydl.prepare_filename(info)\n", "\n", " # Заменяем оригинальное расширение на выбранный формат\n", " base, _ = os.path.splitext(filename)\n", " audio_file = base + f\".{output_format}\"\n", "\n", " return os.path.join(DOWNLOAD_DIR, audio_file)\n", " except Exception as e:\n", " return None\n", "\n", "#@title # Скачивание аудио с интернета\n", "\n", "input_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Ссылка на аудио/видео\"}\n", "output_dir = \"/content/downloaded\" # @param {\"type\":\"string\",\"placeholder\":\"Директория для сохранения скачанного аудио\"}\n", "cookies_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Путь к cookies (для успешного скачивания с ютуба)\"}\n", "downloaded_file = dw_yt_dlp(url=input_url, output_dir=output_dir, cookie=cookies_path)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "QMG9TEdMtHJ6" }, "outputs": [], "source": [ "import os\n", "from pyngrok import ngrok\n", "import random\n", "import string\n", "import re\n", "import urllib\n", "import time\n", "import ipywidgets as widgets\n", "from IPython.display import display, Javascript\n", "import threading\n", "import subprocess\n", "\n", "%cd $mvsepless_dir\n", "#@title # Web-UI\n", "#@markdown ---\n", "#@markdown ### Общий доступ\n", "port = 7862\n", "#@markdown * Способ поделится приложением\n", "sharing_method = \"gradio\" # @param [\"gradio\",\"ngrok\",\"localtunnel\",\"not\"]\n", "#@markdown * Токен для ngrok *(где взять его - https://dashboard.ngrok.com/get-started/your-authtoken)*\n", "ngrok_token = \"\" # @param {\"type\":\"string\"}\n", "\n", "lt_sub_domain = \"mvsepless\"\n", "def generate_subdomain(length=8):\n", " \"\"\"Генерация случайного субдомена заданной длины\"\"\"\n", " chars = string.ascii_lowercase + string.digits\n", " return ''.join(random.choice(chars) for _ in range(length))\n", "\n", "if sharing_method == \"ngrok\":\n", " try:\n", " ngrok.set_auth_token(ngrok_token)\n", " ngrok.kill()\n", " tunnel = ngrok.connect(port)\n", " print(f\"Публичная ссылка: {tunnel.public_url}\")\n", " except KeyboardInterrupt:\n", " ngrok.kill()\n", "\n", "if sharing_method == \"localtunnel\":\n", " os.system(\"npm install -g localtunnel &>/dev/null\")\n", " time.sleep(7)\n", " with open('url.txt', 'w') as file:\n", " file.write('')\n", " subdomain = f\"{re.sub(r'[^a-zA-Z0-9]', '', lt_sub_domain)}-{generate_subdomain(25)}\"\n", "\n", " # Флаг для контроля работы потока\n", " tunnel_running = True\n", "\n", " def run_tunnel():\n", " while tunnel_running:\n", " print(\"localtunnel включается...\")\n", " try:\n", " # Используем subprocess вместо os.system для лучшего контроля\n", " process = subprocess.Popen(\n", " f'lt --port {port} '\n", " f'{f\"--subdomain {subdomain}\" if lt_sub_domain != \"\" and not lt_sub_domain.isspace() else \"\"}',\n", " shell=True,\n", " stdout=subprocess.PIPE,\n", " stderr=subprocess.PIPE\n", " )\n", " process.wait() # Ждем завершения процесса\n", " if not tunnel_running:\n", " break\n", " time.sleep(5) # Пауза перед перезапуском\n", " except Exception as e:\n", " if tunnel_running:\n", " print(f\"Ошибка в localtunnel: {e}\")\n", " time.sleep(5)\n", "\n", " tunnel_thread = threading.Thread(target=run_tunnel, daemon=True)\n", " tunnel_thread.start()\n", "\n", " time.sleep(3)\n", " try:\n", " endpoint_ip = urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip()\n", " tunnel_url = f\"https://{subdomain}.loca.lt\"\n", " print(f\"Публичная ссылка: {tunnel_url}\")\n", "\n", " # Создаем текстовое поле с URL, а не IP\n", " text_field = widgets.Text(\n", " value=endpoint_ip, # Исправлено: показываем URL, а не IP\n", " description='URL:',\n", " disabled=True\n", " )\n", " text_field.add_class(\"copy-enabled\")\n", "\n", " display(text_field)\n", "\n", " # Исправленный JavaScript для копирования\n", " display(Javascript(\"\"\"\n", " setTimeout(() => {\n", " const input = document.querySelector('.copy-enabled input');\n", " if (!input) return;\n", "\n", " const btn = document.createElement('button');\n", " btn.innerHTML = '📋';\n", " btn.style.cssText = `\n", " margin-left: 8px;\n", " border: none;\n", " background: none;\n", " cursor: pointer;\n", " font-size: 1.2em;\n", " `;\n", " input.parentNode.appendChild(btn);\n", "\n", " btn.addEventListener('click', () => {\n", " navigator.clipboard.writeText(input.value) // Исправлено: input.value вместо input\n", " .then(() => {\n", " btn.innerHTML = '✓';\n", " setTimeout(() => btn.innerHTML = '📋', 2000);\n", " })\n", " .catch(err => {\n", " console.error('Ошибка копирования: ', err);\n", " });\n", " });\n", " }, 300);\n", " \"\"\"))\n", "\n", " except Exception as e:\n", " print(f\"Ошибка при старте localtunnel: {e}\")\n", "\n", " # Функция для корректного завершения\n", " def stop_tunnel():\n", " global tunnel_running\n", " tunnel_running = False\n", " print(\"Localtunnel завершает работу...\")\n", "\n", " # Регистрируем обработчик для Ctrl+C\n", " import signal\n", " original_signal_handler = signal.getsignal(signal.SIGINT)\n", "\n", " def signal_handler(sig, frame):\n", " stop_tunnel()\n", " # Восстанавливаем оригинальный обработчик и вызываем его\n", " signal.signal(signal.SIGINT, original_signal_handler)\n", " raise KeyboardInterrupt\n", "\n", " signal.signal(signal.SIGINT, signal_handler)\n", "\n", "share_arg = \"--share\" if sharing_method == \"gradio\" else \"\"\n", "!python mvsepless app --port $port $share_arg" ] }, { "cell_type": "markdown", "metadata": { "id": "ydA_93K1Yt4D" }, "source": [ "# MVSepLess CLI" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "KASVe41W6Fk-" }, "outputs": [], "source": [ "#@markdown ---\n", "#@markdown ### Входные данные\n", "#@markdown * Путь к входной папке/файлу:\n", "input_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"/путь/к/файлу\"}\n", "#@markdown ---\n", "#@markdown ### Выбор модели\n", "#@markdown * Тип / Имя модели:\n", "model_name = \"bs_roformer / bs_6stem\" # @param ['mel_band_roformer / mbr_vocals_kim', 'mel_band_roformer / mbr_wsa', 'mel_band_roformer / mbr_instvoc_duality1_unwa', 'mel_band_roformer / mbr_instvoc_duality2_unwa', 'mel_band_roformer / mbr_kimft1_unwa', 'mel_band_roformer / mbr_kimft2_unwa', 'mel_band_roformer / mbr_kimft2b_unwa', 'mel_band_roformer / mbr_kimft3_prev_unwa', 'mel_band_roformer / mbr_bigbeta1_unwa', 'mel_band_roformer / mbr_bigbeta2_unwa', 'mel_band_roformer / mbr_bigbeta3_unwa', 'mel_band_roformer / mbr_bigbeta4_unwa', 'mel_band_roformer / mbr_bigbeta5e_unwa', 'mel_band_roformer / mbr_bigbeta6_unwa', 'mel_band_roformer / mbr_bigbeta6x_unwa', 'mel_band_roformer / mbr_inst1_unwa', 'mel_band_roformer / mbr_inst1+_unwa', 'mel_band_roformer / mbr_inst1e_unwa', 'mel_band_roformer / mbr_inst1e+_unwa', 'mel_band_roformer / mbr_inst2_unwa', 'mel_band_roformer / mbr_small_unwa', 'mel_band_roformer / mbr_bleed_supressor_unwa_97chris', 'mel_band_roformer / mbr_inst_becruily', 'mel_band_roformer / mbr_guitar_becruily', 'mel_band_roformer / mbr_karaoke_becruily', 'mel_band_roformer / mbr_vocals_becruily', 'mel_band_roformer / mbr_syhft1', 'mel_band_roformer / mbr_syhft2', 'mel_band_roformer / mbr_syhft2.5', 'mel_band_roformer / mbr_syhft3', 'mel_band_roformer / mbr_bigsyhft1fast', 'mel_band_roformer / mbr_syhftbeta1', 'mel_band_roformer / mbr_syhftB1_1', 'mel_band_roformer / mbr_syhftB1_2', 'mel_band_roformer / mbr_syhftB1_3', 'mel_band_roformer / mbr_syhft_4stem', 'mel_band_roformer / mbr_syhft_4stem2', 'mel_band_roformer / mbr_inst_1652_essid', 'mel_band_roformer / mbr_inst_1681_essid', 'mel_band_roformer / mbr_instfv1_gabox', 'mel_band_roformer / mbr_instfv2_gabox', 'mel_band_roformer / mbr_instfv3_gabox', 'mel_band_roformer / mbr_instfv4_gabox', 'mel_band_roformer / mbr_instfv4n_gabox', 'mel_band_roformer / mbr_instfv5_gabox', 'mel_band_roformer / mbr_instfv5n_gabox', 'mel_band_roformer / mbr_instfv6_gabox', 'mel_band_roformer / mbr_instfv6n_gabox', 'mel_band_roformer / mbr_instfv7_gabox', 'mel_band_roformer / mbr_instfv7n_gabox', 'mel_band_roformer / mbr_instfv7+_gabox', 'mel_band_roformer / mbr_instfv7z_gabox', 'mel_band_roformer / mbr_instfv8_gabox', 'mel_band_roformer / mbr_instfv8b_gabox', 'mel_band_roformer / mbr_instfv9_gabox', 'mel_band_roformer / mbr_instfv10_gabox', 'mel_band_roformer / mbr_instfvx_gabox', 'mel_band_roformer / mbr_instbv1_gabox', 'mel_band_roformer / mbr_instbv2_gabox', 'mel_band_roformer / mbr_instbv3_gabox', 'mel_band_roformer / mbr_vocalsfv1_gabox', 'mel_band_roformer / mbr_vocalsfv2_gabox', 'mel_band_roformer / mbr_vocalsfv3_gabox', 'mel_band_roformer / mbr_vocalsfv4_gabox', 'mel_band_roformer / mbr_vocalsfv5_gabox', 'mel_band_roformer / mbr_vocalsfv6_gabox', 'mel_band_roformer / mbr_karaoke25022025_gabox', 'mel_band_roformer / mbr_karaoke28022025_gabox', 'mel_band_roformer / mbr_karaoke1_gabox', 'mel_band_roformer / mbr_karaoke2_gabox', 'mel_band_roformer / mbr_leadvoc_dereverb_gabox', 'mel_band_roformer / mbr_denoise_debleed_gabox', 'mel_band_roformer / mbr_karaoke_fusion_gonzaluigi', 'mel_band_roformer / mbr_karaoke_fusion_aggr_gonzaluigi', 'mel_band_roformer / mbr_bve_gonzaluigi', 'mel_band_roformer / mbr_karaoke_fusion2_aggr_gonzaluigi', 'mel_band_roformer / mbr_karaoke_fusion_total_aggr_gonzaluigi', 'mel_band_roformer / mbr_dereverb_anvuew', 'mel_band_roformer / mbr_dereverb_less_aggr_anvuew', 'mel_band_roformer / mbr_dereverb_mono_anvuew', 'mel_band_roformer / mbr_aspiration_sucial', 'mel_band_roformer / mbr_derverb_echo1_sucial', 'mel_band_roformer / mbr_debigreverb_sucial', 'mel_band_roformer / mbr_desuperbigreverb_sucial', 'mel_band_roformer / mbr_dereverb-echo_fused_sucial', 'mel_band_roformer / mbr_dereverb-echo2_sucial', 'mel_band_roformer / mbr_karaoke_aufr33_viperx', 'mel_band_roformer / mbr_denoise_aufr33', 'mel_band_roformer / mbr_denoise_aggr_aufr33', 'mel_band_roformer / mbr_crowd_aufr33_viperx', 'mel_band_roformer / mbr_vocals_viperx', 'mel_band_roformer / mbr_vocalsf_aname', 'mel_band_roformer / mbr_kinft1_aname', 'mel_band_roformer / mbr_kinft2_aname', 'mel_band_roformer / mbr_kinft2f_aname', 'mel_band_roformer / mbr_kinft3_aname', 'mel_band_roformer / mbr_small_aname', 'mel_band_roformer / mbr_duality1_aname', 'mel_band_roformer / mbr_4stemlarge1_aname', 'mel_band_roformer / mbr_4stemlarge2_aname', 'mel_band_roformer / mbr_4stemxl1_aname', 'mel_band_roformer / mbr_percussion_yolkispaliks', 'mel_band_roformer / mbr_inst_metal_prev_meskvlla33', 'mel_band_roformer / mbr_neo_inst_vfx', 'bs_roformer / bs_drums_beatloo_labs', 'bs_roformer / bs_bass_beatloo_labs', 'bs_roformer / bs_vocals_1296_viperx', 'bs_roformer / bs_other_viperx', 'bs_roformer / bs_revive1_unwa', 'bs_roformer / bs_revive2_unwa', 'bs_roformer / bs_revive3e_unwa', 'bs_roformer / bs_resurrection_unwa', 'bs_roformer / bs_resurrection_inst_unwa', 'bs_roformer / bs_inst_fno_unwa', 'bs_roformer / bs_inst_hyperace_unwa', 'bs_roformer / bs_inst_hyperace2_unwa', 'bs_roformer / bs_voc_hyperace2_unwa', 'bs_roformer / bs_karaoke_becruily', 'bs_roformer / bs_voctest_gabox', 'bs_roformer / bs_karaoke_gabox', 'bs_roformer / bs_6stem', 'bs_roformer / bs_6stem_fixed', 'bs_roformer / bs_4stem_zfturbo', 'bs_roformer / bs_4stemft_syh99999', 'bs_roformer / bs_male_female_146_sucial', 'bs_roformer / bs_male_female_267_sucial', 'bs_roformer / bs_male_female_aufr33', 'bs_roformer / bs_deverb_256_8_anvuew', 'bs_roformer / bs_deverb_384_10_anvuew', 'bs_roformer / bs_karaoke_anvuew', 'bs_roformer / bs_vocals_anvuew', 'bs_roformer / bs_4stem_aname', 'mdx23c / mdx23c_instvoc_zfturbo', 'mdx23c / mdx23c_instvoc_hq1', 'mdx23c / mdx23c_instvoc_hq2', 'mdx23c / mdx23c_d1581', 'mdx23c / mdx23c_drumsep_6stem_aufr33_jarredou', 'mdx23c / mdx23c_drumsep_5stem_aufr33_jarredou', 'mdx23c / mdx23c_derverb_aufr33_jarredou', 'mdx23c / mdx23c_mid_side_wesleyr36', 'mdx23c / mdx23c_4stem_zfturbo', 'mdx23c / mdx23c_orch_verosment', 'mdxnet / mdx_kim_inst', 'mdxnet / mdx_kim_vocal1', 'mdxnet / mdx_kim_vocal2', 'mdxnet / mdx_kuielab_a_bass', 'mdxnet / mdx_kuielab_a_drums', 'mdxnet / mdx_kuielab_a_other', 'mdxnet / mdx_kuielab_a_vocals', 'mdxnet / mdx_kuielab_b_bass', 'mdxnet / mdx_kuielab_b_drums', 'mdxnet / mdx_kuielab_b_other', 'mdxnet / mdx_kuielab_b_vocals', 'mdxnet / mdx_reverb_hq_foxjoy', 'mdxnet / mdx_inst1', 'mdxnet / mdx_inst2', 'mdxnet / mdx_inst3', 'mdxnet / mdx_inst_full_292', 'mdxnet / mdx_inst_hq1', 'mdxnet / mdx_inst_hq2', 'mdxnet / mdx_inst_hq3', 'mdxnet / mdx_inst_hq4', 'mdxnet / mdx_inst_hq5', 'mdxnet / mdx_inst_main', 'mdxnet / mdx_vocft', 'mdxnet / mdx_crowd_hq1', 'mdxnet / mdx_inst_187_beta', 'mdxnet / mdx_inst_82_beta', 'mdxnet / mdx_inst_90_beta', 'mdxnet / mdx_main_340', 'mdxnet / mdx_main_390', 'mdxnet / mdx_main_406', 'mdxnet / mdx_main_427', 'mdxnet / mdx_main_438', 'mdxnet / mdx_1_9703', 'mdxnet / mdx_2_9682', 'mdxnet / mdx_3_9662', 'mdxnet / mdx_9482', 'mdxnet / mdx_karaoke1', 'mdxnet / mdx_karaoke2', 'mdxnet / mdx_main', 'vr / 1_hp-uvr', 'vr / 2_hp-uvr', 'vr / 3_hp-vocal-uvr', 'vr / 4_hp-vocal-uvr', 'vr / 5_hp-karaoke-uvr', 'vr / 6_hp-karaoke-uvr', 'vr / 7_hp2-uvr', 'vr / 8_hp2-uvr', 'vr / 9_hp2-uvr', 'vr / 10_sp-uvr-2b-32000-1', 'vr / 11_sp-uvr-2b-32000-2', 'vr / 12_sp-uvr-3b-44100', 'vr / 13_sp-uvr-4b-44100-1', 'vr / 14_sp-uvr-4b-44100-2', 'vr / 15_sp-uvr-mid-44100-1', 'vr / 16_sp-uvr-mid-44100-2', 'vr / 17_hp-wind_inst-uvr', 'vr / uvr-de-echo-aggressive', 'vr / uvr-de-echo-normal', 'vr / uvr-deecho-dereverb', 'vr / uvr-denoise-lite', 'vr / uvr-denoise', 'vr / uvr-bve-4b_sn-44100-1', 'vr / uvr-bve-v2-4b-sn-44100', 'vr / mgm-v5-karokee-32000-beta1', 'vr / mgm-v5-karokee-32000-beta2-agr', 'vr / mgm_highend_v4', 'vr / mgm_lowend_a_v4', 'vr / mgm_lowend_b_v4', 'vr / mgm_main_v4', 'vr / uvr-de-reverb-aufr33-jarredou', 'vr / uvr-de-breath-sucial-v1', 'vr / uvr-de-breath-sucial-v2', 'vr / vr_harmonic_noise_sep', 'scnet / scnet_4stem_zfturbo', 'scnet / scnet_xl_ihf_4stem_zfturbo', 'scnet / scnet_xl_4stem_starrytong', 'scnet / scnet_xl_4stem_zftrubo', 'scnet / scnet_jazz_4stem_jorisvaneyghen', 'scnet / scnet_xl_jazz_4stem_jorisvaneyghen', 'scnet / scnet_choirsep_exp', 'htdemucs / demucs4_mvsep_vocals', 'htdemucs / demucs4_4stem', 'htdemucs / demucs4_6stem', 'htdemucs / demucs3_mmi', 'htdemucs / demucs4_ft_bass', 'htdemucs / demucs4_ft_drums', 'htdemucs / demucs4_ft_vocals', 'htdemucs / demucs4_ft_other', 'htdemucs / demucs_mid_side_wesleyr36', 'htdemucs / demucs4_choirsep', 'bandit / bandit_plus', 'bandit_v2 / bandit_v2_multi']\n", "# @markdown ---\n", "# @markdown ### Настройки разделения\n", "# @markdown * Извлечь инструментал:\n", "instrumental = True # @param {type:\"boolean\"}\n", "#@markdown ---\n", "#@markdown ### Выходные данные\n", "#@markdown * Формат:\n", "output_format = \"mp3\" # @param [\"mp3\", \"wav\", \"flac\", \"ogg\", \"opus\", \"m4a\", \"aac\", \"aiff\"]\n", "# @markdown * Битрейт\n", "bitrate = 320 # @param {\"type\":\"slider\",\"min\":32,\"max\":320,\"step\":1}\n", "# @markdown * Выбрать выходные стемы(через пробел, например (\"vocal\" \"instrumental\")):\n", "stems_to_extract = \"\" # @param {type:\"string\"}\n", "# @markdown * Шаблон именования выходных файлов:\n", "output_template = \"NAME (STEM) MODEL\" # @param {type:\"string\"}\n", "#@markdown * Путь к выходной папке:\n", "output_dir = \"/content/output\" # @param {\"type\":\"string\",\"placeholder\":\"/путь/к/папке\"}\n", "\n", "model = model_name.split(\" / \")\n", "\n", "%cd $mvsepless_dir\n", "\n", "cmd = [\n", " \"python\",\n", " \"mvsepless\", \"cli\",\n", " f\"--input \\\"{input_path}\\\"\",\n", " f\"--output_dir \\\"{output_dir}\\\"\",\n", " f\"--model_type \\\"{model[0]}\\\"\",\n", " f\"--model_name \\\"{model[1]}\\\"\",\n", " f\"--output_format \\\"{output_format}\\\"\",\n", " f\"--output_bitrate \\\"{bitrate}k\\\"\",\n", " f\"--template \\\"{output_template}\\\"\"\n", "]\n", "\n", "if instrumental:\n", " cmd.append(\"--ext_inst\")\n", "\n", "if stems_to_extract:\n", " cmd.append(f\"--selected_stems {stems_to_extract}\")\n", "\n", "!{\" \".join(cmd)}" ] }, { "cell_type": "markdown", "metadata": { "id": "H24RtVB1YlE0" }, "source": [ "# Vbach CLI" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "JGme2oZOf_W3" }, "outputs": [], "source": [ "#@title Показать список установленных моделей для преобразования\n", "%cd $mvsepless_dir\n", "!python mvsepless/model_manager.py vbach list" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#@title Удаление голосовой модели\n", "%cd $mvsepless_dir\n", "voicemodel_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Имя модели\"}\n", "!python mvsepless/model_manager.py vbach remove --model_name \"$voicemodel_name\"" ] }, { "cell_type": "markdown", "metadata": { "id": "VaGl6jKVYZfU" }, "source": [ "## Установка голосовой модели" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "WiGjOEkaMtXi" }, "outputs": [], "source": [ "#@title Через локальные файлы\n", "%cd $mvsepless_dir\n", "pth_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Путь к *.pth файлу\"}\n", "index_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Путь к *.index файлу\"}\n", "voicemodel_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Имя модели\"}\n", "index = f\"--index \\\"{index_path}\\\"\" if index_path != \"\" else \"\"\n", "if pth_path != \"\" and voicemodel_name != \"\":\n", " !python mvsepless/model_manager.py vbach install_local --model_name \"$voicemodel_name\" --pth \"$pth_path\" $index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "PqrYh-j_Qh98" }, "outputs": [], "source": [ "#@title Через файлы с интернета\n", "%cd $mvsepless_dir\n", "pth_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Ссылка на *.pth файл\"}\n", "index_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Ссылка на *.index файл\"}\n", "voicemodel_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Имя модели\"}\n", "index = f\"--index_url \\\"{index_url}\\\"\" if index_url != \"\" else \"\"\n", "if pth_url != \"\" and voicemodel_name != \"\":\n", " !python mvsepless/model_manager.py vbach install_url_files --model_name \"$voicemodel_name\" --pth_url \"$pth_url\" $index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "J2nic-gYR2Bf" }, "outputs": [], "source": [ "#@title Через zip файл с интернета\n", "%cd $mvsepless_dir\n", "zip_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Ссылка на zip файл\"}\n", "voicemodel_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Имя модели\"}\n", "if zip_url != \"\" and voicemodel_name != \"\":\n", " !python mvsepless/model_manager.py vbach install_url_zip --model_name \"$voicemodel_name\" --url \"$zip_url\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Инференс" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "kdXg39EiSeLE" }, "outputs": [], "source": [ "#@markdown ### Входные данные\n", "#@markdown * Путь к входной папке/файлу:\n", "input_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"/путь/к/файлу\"}\n", "#@markdown * Имя модели:\n", "voicemodel_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Имя модели\"}\n", "# @markdown ---\n", "# @markdown ### Hubert\n", "# @markdown * Стэк\n", "stack = \"fairseq\" # @param [\"fairseq\",\"transformers\"]\n", "# @markdown * Имя модели для fairseq\n", "fairseq_embedder = \"hubert_base\" # @param [\"hubert_base\",\"contentvec_base\",\"korean_hubert_base\",\"chinese_hubert_base\",\"portuguese_hubert_base\",\"japanese_hubert_base\"]\n", "# @markdown * Имя модели для transformers\n", "transformers_embedder = \"contentvec\" # @param [\"contentvec\",\"spin\",\"spin-v2\",\"chinese-hubert-base\",\"japanese-hubert-base\",\"korean-hubert-base\"]\n", "# @markdown ---\n", "# @markdown ### Настройки преобразования\n", "# @markdown * Влияние индекса\n", "index_rate = 1 # @param {\"type\":\"slider\",\"min\":0,\"max\":1,\"step\":0.01}\n", "# @markdown * Стерео режим\n", "stereo_mode = \"mono\" # @param [\"mono\",\"left/right\",\"sim/dif\"]\n", "# @markdown * Метод определения тона\n", "method_pitch = \"rmvpe+\" # @param [\"rmvpe+\",\"mangio-crepe\",\"mangio-crepe-tiny\",\"fcpe\",'harvest\",\"pm\",\"pyin\"]\n", "# @markdown * Изменение высоты тона (полутона)\n", "pitch = 0 # @param {\"type\":\"slider\",\"min\":-48,\"max\":48,\"step\":1}\n", "# @markdown * Длина шага (для mangio-crepe)\n", "hop_length = 128 # @param {\"type\":\"slider\",\"min\":8,\"max\":512,\"step\":8}\n", "# @markdown * Радиус фильтра\n", "filter_radius = 3 # @param {\"type\":\"slider\",\"min\":1,\"max\":7,\"step\":1}\n", "# @markdown * Соотношение огибающих громкости\n", "rms = 0.25 # @param {\"type\":\"slider\",\"min\":0,\"max\":1,\"step\":0.01}\n", "# @markdown * Защита согласных\n", "protect = 0.33 # @param {\"type\":\"slider\",\"min\":0,\"max\":0.5,\"step\":0.01}\n", "# @markdown ---\n", "#@markdown ### Дополнительные настройки\n", "# @markdown * Минимальная частота F0\n", "f0_min = 50 # @param {type:\"integer\"}\n", "# @markdown * Максимальная частота F0\n", "f0_max = 1100 # @param {type:\"integer\"}\n", "# @markdown ---\n", "#@markdown ### Выходные данные\n", "#@markdown * Формат:\n", "output_format = \"mp3\" # @param [\"mp3\", \"wav\", \"flac\", \"ogg\", \"opus\", \"m4a\", \"aac\", \"aiff\"]\n", "# @markdown * Имя выходного файла:\n", "output_name = \"F0METHOD_PITCH_(MODEL)_NAME\" # @param {type:\"string\"}\n", "#@markdown * Путь к выходной папке:\n", "output_dir = \"/content/vbach_output\" # @param {\"type\":\"string\",\"placeholder\":\"/путь/к/папке\"}\n", "\n", "\n", "\n", "%cd $mvsepless_dir\n", "\n", "cmd = [\n", " \"python\",\n", " \"mvsepless/vbach_infer.py\",\n", " f\"--input \\\"{input_path}\\\"\",\n", " f\"--output_dir \\\"{output_dir}\\\"\",\n", " f\"--model_name \\\"{voicemodel_name}\\\"\",\n", " f\"--output_format \\\"{output_format}\\\"\",\n", " f\"--index_rate {index_rate}\",\n", " f\"--output_name \\\"{output_name}\\\"\",\n", " \"--format_name\",\n", " f\"--stereo_mode {stereo_mode}\",\n", " f\"--method_pitch {method_pitch}\",\n", " f\"--pitch {pitch}\",\n", " f\"--hop_length {hop_length}\",\n", " f\"--filter_radius {filter_radius}\",\n", " f\"--rms {rms}\",\n", " f\"--protect {protect}\",\n", " f\"--f0_min {f0_min}\",\n", " f\"--f0_max {f0_max}\",\n", " f\"--embedder_name {fairseq_embedder}\" if stack == \"fairseq\" else f\"--embedder_name {transformers_embedder} --use_transformers\"\n", "]\n", "\n", "!{\" \".join(cmd)}" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [ "ydA_93K1Yt4D", "H24RtVB1YlE0", "VaGl6jKVYZfU", "4GSGHM4rYSVp" ], "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }