Upload RVC_Datasets_Maker.ipynb
Browse files- RVC_Datasets_Maker.ipynb +434 -0
RVC_Datasets_Maker.ipynb
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4"
|
| 8 |
+
},
|
| 9 |
+
"kernelspec": {
|
| 10 |
+
"name": "python3",
|
| 11 |
+
"display_name": "Python 3"
|
| 12 |
+
},
|
| 13 |
+
"language_info": {
|
| 14 |
+
"name": "python"
|
| 15 |
+
},
|
| 16 |
+
"accelerator": "GPU"
|
| 17 |
+
},
|
| 18 |
+
"cells": [
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"source": [
|
| 22 |
+
"#@title Mount Google Drive\n",
|
| 23 |
+
"from google.colab import drive\n",
|
| 24 |
+
"drive.mount('/content/drive')"
|
| 25 |
+
],
|
| 26 |
+
"metadata": {
|
| 27 |
+
"id": "RkuSSLb7t39L",
|
| 28 |
+
"cellView": "form"
|
| 29 |
+
},
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"outputs": []
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"source": [
|
| 36 |
+
"#@title Parameters\n",
|
| 37 |
+
"#@markdown ### **1. General Settings**\n",
|
| 38 |
+
"project_name = \"\" #@param {type:\"string\"}\n",
|
| 39 |
+
"mode = \"Splitting\" #@param [\"Splitting\", \"Separate\"]\n",
|
| 40 |
+
"demucs_model = \"htdemucs\" #@param [\"htdemucs\", \"demucs\", \"htdemucs_ft\", \"demucs_extra\"]\n",
|
| 41 |
+
"\n",
|
| 42 |
+
"#@markdown ---\n",
|
| 43 |
+
"#@markdown ### **2. Input Source**\n",
|
| 44 |
+
"dataset_source = \"Youtube\" #@param [\"Youtube\", \"Drive\"]\n",
|
| 45 |
+
"#@markdown **If YouTube:** Provide one or more URLs, separated by commas.\n",
|
| 46 |
+
"youtube_urls = \"\" #@param {type:\"string\"}\n",
|
| 47 |
+
"#@markdown **If Drive:** Provide the full path to the FOLDER containing your audio files.\n",
|
| 48 |
+
"google_drive_folder_path = \"\" #@param {type:\"string\"}\n",
|
| 49 |
+
"\n",
|
| 50 |
+
"#@markdown ---\n",
|
| 51 |
+
"#@markdown ### **3. Processing Settings**\n",
|
| 52 |
+
"#@markdown **YouTube Trimming (Optional):** Use HH:MM:SS format.\n",
|
| 53 |
+
"start_time = \"\" #@param {type:\"string\"}\n",
|
| 54 |
+
"end_time = \"\" #@param {type:\"string\"}\n",
|
| 55 |
+
"#@markdown **Long Audio Handling:** Split audio longer than this duration before processing with Demucs.\n",
|
| 56 |
+
"chunk_duration_in_minutes = \"30 minutes\" #@param [\"10 minutes\", \"15 minutes\", \"20 minutes\", \"30 minutes\", \"45 minutes\", \"60 minutes\"]\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"#@markdown ---\n",
|
| 59 |
+
"#@markdown ### **4. Output Settings**\n",
|
| 60 |
+
"#@markdown Sample rate for the output files. `0` uses the original sample rate.\n",
|
| 61 |
+
"output_sample_rate = \"48000\" #@param [\"0\", \"8000\", \"16000\", \"22050\", \"32000\", \"44100\", \"48000\"]\n",
|
| 62 |
+
"output_format = \"mp3\" #@param [\"wav\", \"mp3\"]\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"# --- Process Parameters for the next cell ---\n",
|
| 65 |
+
"chunk_duration_map = {\n",
|
| 66 |
+
" \"10 minutes\": 600,\n",
|
| 67 |
+
" \"15 minutes\": 900,\n",
|
| 68 |
+
" \"20 minutes\": 1200,\n",
|
| 69 |
+
" \"30 minutes\": 1800,\n",
|
| 70 |
+
" \"45 minutes\": 2700,\n",
|
| 71 |
+
" \"60 minutes\": 3600\n",
|
| 72 |
+
"}\n",
|
| 73 |
+
"chunk_duration = chunk_duration_map[chunk_duration_in_minutes]\n",
|
| 74 |
+
"output_sr = int(output_sample_rate)\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"# Map new variables to old names for compatibility with the processing script\n",
|
| 77 |
+
"url = youtube_urls\n",
|
| 78 |
+
"drive_path = google_drive_folder_path\n",
|
| 79 |
+
"dataset = dataset_source"
|
| 80 |
+
],
|
| 81 |
+
"metadata": {
|
| 82 |
+
"id": "23UmiGqUt_0a",
|
| 83 |
+
"cellView": "form"
|
| 84 |
+
},
|
| 85 |
+
"execution_count": null,
|
| 86 |
+
"outputs": []
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"source": [
|
| 91 |
+
"#@title Process Dataset\n",
|
| 92 |
+
"import os\n",
|
| 93 |
+
"import subprocess\n",
|
| 94 |
+
"import glob\n",
|
| 95 |
+
"import shutil\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"print(\"Memulai proses...\\n\")\n",
|
| 98 |
+
"\n",
|
| 99 |
+
"# Pastikan runtime Colab menggunakan GPU\n",
|
| 100 |
+
"print(\"GPU Info:\")\n",
|
| 101 |
+
"!nvidia-smi\n",
|
| 102 |
+
"print(\"\\n\")\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"# --- Helper Functions ---\n",
|
| 105 |
+
"def get_duration(file_path):\n",
|
| 106 |
+
" try:\n",
|
| 107 |
+
" result = subprocess.run(\n",
|
| 108 |
+
" [\"ffprobe\", \"-v\", \"error\", \"-show_entries\", \"format=duration\",\n",
|
| 109 |
+
" \"-of\", \"default=noprint_wrappers=1:nokey=1\", file_path],\n",
|
| 110 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)\n",
|
| 111 |
+
" return float(result.stdout.strip())\n",
|
| 112 |
+
" except Exception as e:\n",
|
| 113 |
+
" print(f\"Gagal mendapatkan durasi audio untuk {file_path}: {e}\")\n",
|
| 114 |
+
" return None\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"def run_command(command):\n",
|
| 117 |
+
" result = subprocess.run(command, shell=True, capture_output=True, text=True)\n",
|
| 118 |
+
" if result.stdout:\n",
|
| 119 |
+
" print(result.stdout)\n",
|
| 120 |
+
" if result.stderr:\n",
|
| 121 |
+
" print(result.stderr)\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"# --- Input Validation ---\n",
|
| 124 |
+
"if not project_name:\n",
|
| 125 |
+
" raise ValueError(\"Error: Project Name tidak boleh kosong!\")\n",
|
| 126 |
+
"if dataset == \"Youtube\" and not url:\n",
|
| 127 |
+
" raise ValueError(\"Error: URL tidak boleh kosong untuk dataset Youtube!\")\n",
|
| 128 |
+
"if dataset == \"Drive\" and not drive_path:\n",
|
| 129 |
+
" raise ValueError(\"Error: Drive Path tidak boleh kosong untuk dataset Drive!\")\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"# --- Install Dependencies ---\n",
|
| 132 |
+
"print(\"Menginstal/memperbarui dependensi (yt_dlp, ffmpeg, demucs, librosa)...\")\n",
|
| 133 |
+
"run_command(\"python3 -m pip install --upgrade yt_dlp ffmpeg-python demucs librosa soundfile --quiet\")\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"# === STEP 1: Gather All Audio Sources ===\n",
|
| 136 |
+
"source_audio_paths = []\n",
|
| 137 |
+
"temp_download_folder = \"temp_audio_downloads\"\n",
|
| 138 |
+
"os.makedirs(temp_download_folder, exist_ok=True)\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"if dataset == \"Youtube\":\n",
|
| 141 |
+
" urls = [u.strip() for u in url.split(',') if u.strip()]\n",
|
| 142 |
+
" print(f\"Ditemukan {len(urls)} URL YouTube untuk diproses.\")\n",
|
| 143 |
+
" import yt_dlp\n",
|
| 144 |
+
" for i, u in enumerate(urls):\n",
|
| 145 |
+
" print(f\"\\nDownloading audio ({i+1}/{len(urls)}) dari: {u}\")\n",
|
| 146 |
+
" try:\n",
|
| 147 |
+
" ydl_opts = {\n",
|
| 148 |
+
" 'format': 'bestaudio/best',\n",
|
| 149 |
+
" 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],\n",
|
| 150 |
+
" 'outtmpl': f'{temp_download_folder}/{project_name}_yt_{i+1}.%(ext)s'\n",
|
| 151 |
+
" }\n",
|
| 152 |
+
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
|
| 153 |
+
" ydl.download([u])\n",
|
| 154 |
+
" downloaded_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}.wav\")\n",
|
| 155 |
+
"\n",
|
| 156 |
+
" # Trimming Logic\n",
|
| 157 |
+
" if start_time and end_time:\n",
|
| 158 |
+
" print(f\"Melakukan trimming audio dari {start_time} ke {end_time}...\")\n",
|
| 159 |
+
" trimmed_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}_trimmed.wav\")\n",
|
| 160 |
+
" trim_cmd = f'ffmpeg -i \"{downloaded_file}\" -ss {start_time} -to {end_time} -c copy \"{trimmed_file}\"'\n",
|
| 161 |
+
" run_command(trim_cmd)\n",
|
| 162 |
+
" if os.path.exists(trimmed_file):\n",
|
| 163 |
+
" source_audio_paths.append(trimmed_file)\n",
|
| 164 |
+
" else:\n",
|
| 165 |
+
" print(f\"Warning: Gagal melakukan trimming, file akan diproses penuh.\")\n",
|
| 166 |
+
" source_audio_paths.append(downloaded_file)\n",
|
| 167 |
+
" else:\n",
|
| 168 |
+
" source_audio_paths.append(downloaded_file)\n",
|
| 169 |
+
" except Exception as e:\n",
|
| 170 |
+
" print(f\"Gagal mendownload atau memproses URL {u}: {e}\")\n",
|
| 171 |
+
"elif dataset == \"Drive\":\n",
|
| 172 |
+
" print(f\"Mencari file audio di folder: {drive_path}\")\n",
|
| 173 |
+
" allowed_extensions = [\"*.wav\", \"*.mp3\", \"*.flac\", \"*.m4a\"]\n",
|
| 174 |
+
" for ext in allowed_extensions:\n",
|
| 175 |
+
" source_audio_paths.extend(glob.glob(os.path.join(drive_path, ext)))\n",
|
| 176 |
+
" print(f\"Ditemukan {len(source_audio_paths)} file audio.\")\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"if not source_audio_paths:\n",
|
| 179 |
+
" raise Exception(\"Tidak ada file audio sumber yang ditemukan. Hentikan proses.\")\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"# === STEP 2: Process Each Audio Source with Demucs ===\n",
|
| 182 |
+
"all_vocals_paths = []\n",
|
| 183 |
+
"for idx, audio_input in enumerate(source_audio_paths):\n",
|
| 184 |
+
" current_audio_name = os.path.splitext(os.path.basename(audio_input))[0]\n",
|
| 185 |
+
" print(f\"\\n--- Memproses file {idx+1}/{len(source_audio_paths)}: {current_audio_name} ---\")\n",
|
| 186 |
+
" duration = get_duration(audio_input)\n",
|
| 187 |
+
" if duration is None:\n",
|
| 188 |
+
" print(f\"Melewatkan file karena tidak bisa mendapatkan durasi.\")\n",
|
| 189 |
+
" continue\n",
|
| 190 |
+
" print(f\"Durasi audio: {duration:.0f} detik.\")\n",
|
| 191 |
+
"\n",
|
| 192 |
+
" if duration > chunk_duration:\n",
|
| 193 |
+
" print(f\"Audio lebih panjang dari {chunk_duration} detik. Melakukan splitting audio menjadi beberapa chunk...\")\n",
|
| 194 |
+
" chunk_folder = f\"chunks/{current_audio_name}\"\n",
|
| 195 |
+
" os.makedirs(chunk_folder, exist_ok=True)\n",
|
| 196 |
+
" split_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{audio_input}\" -f segment -segment_time {chunk_duration} -c copy \"{chunk_folder}/{current_audio_name}_%03d.wav\"'\n",
|
| 197 |
+
" run_command(split_cmd)\n",
|
| 198 |
+
" chunk_files = sorted(glob.glob(f\"{chunk_folder}/{current_audio_name}_*.wav\"))\n",
|
| 199 |
+
" if not chunk_files:\n",
|
| 200 |
+
" print(f\"Warning: Gagal membuat chunk untuk {current_audio_name}. Melewatkan file ini.\")\n",
|
| 201 |
+
" continue\n",
|
| 202 |
+
"\n",
|
| 203 |
+
" print(f\"Memproses {len(chunk_files)} chunk dengan Demucs...\")\n",
|
| 204 |
+
" chunk_vocals_files = []\n",
|
| 205 |
+
" for chunk_file in chunk_files:\n",
|
| 206 |
+
" demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} \"{chunk_file}\"'\n",
|
| 207 |
+
" run_command(demucs_cmd)\n",
|
| 208 |
+
" base = os.path.splitext(os.path.basename(chunk_file))[0]\n",
|
| 209 |
+
" vocals_path = f\"separated/{demucs_model}/{base}/vocals.wav\"\n",
|
| 210 |
+
" if os.path.exists(vocals_path):\n",
|
| 211 |
+
" chunk_vocals_files.append(os.path.abspath(vocals_path))\n",
|
| 212 |
+
" else:\n",
|
| 213 |
+
" print(f\"Warning: Hasil Demucs untuk {chunk_file} tidak ditemukan.\")\n",
|
| 214 |
+
"\n",
|
| 215 |
+
" if not chunk_vocals_files:\n",
|
| 216 |
+
" print(f\"Warning: Tidak ada vokal yang berhasil diekstrak untuk {current_audio_name}. Melewatkan file ini.\")\n",
|
| 217 |
+
" continue\n",
|
| 218 |
+
"\n",
|
| 219 |
+
" list_file = \"chunks_list.txt\"\n",
|
| 220 |
+
" with open(list_file, \"w\") as f:\n",
|
| 221 |
+
" for file in chunk_vocals_files:\n",
|
| 222 |
+
" # <<< FIX: Changed '\\\\n' to '\\n' to create a proper newline character\n",
|
| 223 |
+
" f.write(f\"file '{file}'\\n\")\n",
|
| 224 |
+
"\n",
|
| 225 |
+
" combined_vocals_path = f\"separated/{demucs_model}/{current_audio_name}_vocals.wav\"\n",
|
| 226 |
+
" concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i \"{list_file}\" -c copy \"{combined_vocals_path}\"'\n",
|
| 227 |
+
" run_command(concat_cmd)\n",
|
| 228 |
+
" print(\"Penggabungan vokal dari chunk selesai.\")\n",
|
| 229 |
+
"\n",
|
| 230 |
+
" # Convert combined vocals to the desired output format and sample rate\n",
|
| 231 |
+
" final_vocals_output_path = os.path.splitext(combined_vocals_path)[0] + f'.{output_format}'\n",
|
| 232 |
+
" print(f\"Mengonversi vokal gabungan ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...\")\n",
|
| 233 |
+
" convert_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{combined_vocals_path}\"'\n",
|
| 234 |
+
" if output_sr != 0:\n",
|
| 235 |
+
" convert_cmd += f' -ar {output_sr}'\n",
|
| 236 |
+
" convert_cmd += f' -y \"{final_vocals_output_path}\"'\n",
|
| 237 |
+
" run_command(convert_cmd)\n",
|
| 238 |
+
"\n",
|
| 239 |
+
" if os.path.exists(final_vocals_output_path):\n",
|
| 240 |
+
" if combined_vocals_path != final_vocals_output_path:\n",
|
| 241 |
+
" os.remove(combined_vocals_path) # Clean up the intermediate .wav\n",
|
| 242 |
+
" all_vocals_paths.append(os.path.abspath(final_vocals_output_path))\n",
|
| 243 |
+
" else:\n",
|
| 244 |
+
" print(f\"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.\")\n",
|
| 245 |
+
" all_vocals_paths.append(os.path.abspath(combined_vocals_path))\n",
|
| 246 |
+
" else:\n",
|
| 247 |
+
" print(\"Memproses audio penuh dengan Demucs...\")\n",
|
| 248 |
+
" demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} -o \"separated\" --filename \"{current_audio_name}/{{stem}}.{{ext}}\" \"{audio_input}\"'\n",
|
| 249 |
+
" run_command(demucs_cmd)\n",
|
| 250 |
+
" vocals_final_wav = f\"separated/{demucs_model}/{current_audio_name}/vocals.wav\"\n",
|
| 251 |
+
"\n",
|
| 252 |
+
" if os.path.exists(vocals_final_wav):\n",
|
| 253 |
+
" # Convert the final vocals to the desired output format and sample rate\n",
|
| 254 |
+
" final_vocals_output_path = os.path.splitext(vocals_final_wav)[0] + f'.{output_format}'\n",
|
| 255 |
+
" print(f\"Mengonversi vokal ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...\")\n",
|
| 256 |
+
" convert_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{vocals_final_wav}\"'\n",
|
| 257 |
+
" if output_sr != 0:\n",
|
| 258 |
+
" convert_cmd += f' -ar {output_sr}'\n",
|
| 259 |
+
" convert_cmd += f' -y \"{final_vocals_output_path}\"'\n",
|
| 260 |
+
" run_command(convert_cmd)\n",
|
| 261 |
+
"\n",
|
| 262 |
+
" if os.path.exists(final_vocals_output_path):\n",
|
| 263 |
+
" if vocals_final_wav != final_vocals_output_path:\n",
|
| 264 |
+
" os.remove(vocals_final_wav) # Clean up the original .wav\n",
|
| 265 |
+
" all_vocals_paths.append(os.path.abspath(final_vocals_output_path))\n",
|
| 266 |
+
" else:\n",
|
| 267 |
+
" print(f\"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.\")\n",
|
| 268 |
+
" all_vocals_paths.append(os.path.abspath(vocals_final_wav))\n",
|
| 269 |
+
" else:\n",
|
| 270 |
+
" print(f\"Warning: Gagal memisahkan vokal untuk {current_audio_name}.\")\n",
|
| 271 |
+
" continue\n",
|
| 272 |
+
" print(\"Proses pemisahan vokal selesai.\")\n",
|
| 273 |
+
"\n",
|
| 274 |
+
"# === STEP 3: Splitting Vocals (Jika mode = \"Splitting\") ===\n",
|
| 275 |
+
"if mode == \"Splitting\":\n",
|
| 276 |
+
" print(\"\\n--- Melakukan Splitting pada Semua Hasil Vokal ---\")\n",
|
| 277 |
+
" output_slicer_dir = f\"dataset/{project_name}\"\n",
|
| 278 |
+
" os.makedirs(output_slicer_dir, exist_ok=True)\n",
|
| 279 |
+
" try:\n",
|
| 280 |
+
" import numpy as np\n",
|
| 281 |
+
" import librosa\n",
|
| 282 |
+
" import soundfile as sf\n",
|
| 283 |
+
"\n",
|
| 284 |
+
" def get_rms(y, frame_length=2048, hop_length=512, pad_mode=\"constant\"):\n",
|
| 285 |
+
" padding = (int(frame_length // 2), int(frame_length // 2))\n",
|
| 286 |
+
" y = np.pad(y, padding, mode=pad_mode)\n",
|
| 287 |
+
" axis = -1\n",
|
| 288 |
+
" out_strides = y.strides + (y.strides[axis],)\n",
|
| 289 |
+
" x_shape_trimmed = list(y.shape)\n",
|
| 290 |
+
" x_shape_trimmed[axis] -= frame_length - 1\n",
|
| 291 |
+
" out_shape = tuple(x_shape_trimmed) + (frame_length,)\n",
|
| 292 |
+
" xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)\n",
|
| 293 |
+
" if axis < 0:\n",
|
| 294 |
+
" target_axis = axis - 1\n",
|
| 295 |
+
" else:\n",
|
| 296 |
+
" target_axis = axis + 1\n",
|
| 297 |
+
" xw = np.moveaxis(xw, -1, target_axis)\n",
|
| 298 |
+
" slices = [slice(None)] * xw.ndim\n",
|
| 299 |
+
" slices[axis] = slice(0, None, hop_length)\n",
|
| 300 |
+
" x = xw[tuple(slices)]\n",
|
| 301 |
+
" power = np.mean(np.abs(x)**2, axis=-2, keepdims=True)\n",
|
| 302 |
+
" return np.sqrt(power).squeeze(0)\n",
|
| 303 |
+
"\n",
|
| 304 |
+
" class Slicer:\n",
|
| 305 |
+
" def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):\n",
|
| 306 |
+
" if not min_length >= min_interval >= hop_size:\n",
|
| 307 |
+
" raise ValueError('min_length >= min_interval >= hop_size harus terpenuhi')\n",
|
| 308 |
+
" if not max_sil_kept >= hop_size:\n",
|
| 309 |
+
" raise ValueError('max_sil_kept >= hop_size harus terpenuhi')\n",
|
| 310 |
+
" min_interval = sr * min_interval / 1000\n",
|
| 311 |
+
" self.threshold = 10 ** (threshold/20.)\n",
|
| 312 |
+
" self.hop_size = round(sr * hop_size / 1000)\n",
|
| 313 |
+
" self.win_size = min(round(min_interval), 4 * self.hop_size)\n",
|
| 314 |
+
" self.min_length = round(sr * min_length / 1000 / self.hop_size)\n",
|
| 315 |
+
" self.min_interval = round(min_interval / self.hop_size)\n",
|
| 316 |
+
" self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)\n",
|
| 317 |
+
"\n",
|
| 318 |
+
" def _apply_slice(self, waveform, begin, end):\n",
|
| 319 |
+
" return waveform[begin*self.hop_size: min(len(waveform), end*self.hop_size)]\n",
|
| 320 |
+
"\n",
|
| 321 |
+
" def slice(self, waveform):\n",
|
| 322 |
+
" if len(waveform) <= self.min_length:\n",
|
| 323 |
+
" return [waveform]\n",
|
| 324 |
+
" rms_list = get_rms(waveform, frame_length=self.win_size, hop_length=self.hop_size)\n",
|
| 325 |
+
" sil_tags = []\n",
|
| 326 |
+
" silence_start = None\n",
|
| 327 |
+
" clip_start = 0\n",
|
| 328 |
+
" for i, rms in enumerate(rms_list):\n",
|
| 329 |
+
" if rms < self.threshold:\n",
|
| 330 |
+
" if silence_start is None:\n",
|
| 331 |
+
" silence_start = i\n",
|
| 332 |
+
" continue\n",
|
| 333 |
+
" if silence_start is None:\n",
|
| 334 |
+
" continue\n",
|
| 335 |
+
" is_leading_silence = silence_start == 0 and i > self.max_sil_kept\n",
|
| 336 |
+
" need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length\n",
|
| 337 |
+
" if not is_leading_silence and not need_slice_middle:\n",
|
| 338 |
+
" silence_start = None\n",
|
| 339 |
+
" continue\n",
|
| 340 |
+
" if i - silence_start <= self.max_sil_kept:\n",
|
| 341 |
+
" pos = rms_list[silence_start: i+1].argmin() + silence_start\n",
|
| 342 |
+
" sil_tags.append((0, pos) if silence_start == 0 else (pos, pos))\n",
|
| 343 |
+
" clip_start = pos\n",
|
| 344 |
+
" elif i - silence_start <= self.max_sil_kept * 2:\n",
|
| 345 |
+
" pos = rms_list[i-self.max_sil_kept: silence_start+self.max_sil_kept+1].argmin() + i-self.max_sil_kept\n",
|
| 346 |
+
" pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
|
| 347 |
+
" pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
|
| 348 |
+
" sil_tags.append((0, pos_r) if silence_start == 0 else (min(pos_l, pos), max(pos_r, pos)))\n",
|
| 349 |
+
" clip_start = pos_r\n",
|
| 350 |
+
" else:\n",
|
| 351 |
+
" pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
|
| 352 |
+
" pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
|
| 353 |
+
" sil_tags.append((0, pos_r) if silence_start == 0 else (pos_l, pos_r))\n",
|
| 354 |
+
" clip_start = pos_r\n",
|
| 355 |
+
" silence_start = None\n",
|
| 356 |
+
" total_frames = len(rms_list)\n",
|
| 357 |
+
" if silence_start is not None and total_frames - silence_start >= self.min_interval:\n",
|
| 358 |
+
" silence_end = min(total_frames, silence_start+self.max_sil_kept)\n",
|
| 359 |
+
" pos = rms_list[silence_start: silence_end+1].argmin() + silence_start\n",
|
| 360 |
+
" sil_tags.append((pos, total_frames+1))\n",
|
| 361 |
+
" if len(sil_tags) == 0:\n",
|
| 362 |
+
" return [waveform]\n",
|
| 363 |
+
" chunks = []\n",
|
| 364 |
+
" if sil_tags[0][0] > 0:\n",
|
| 365 |
+
" chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))\n",
|
| 366 |
+
" for i in range(len(sil_tags)-1):\n",
|
| 367 |
+
" chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i+1][0]))\n",
|
| 368 |
+
" if sil_tags[-1][1] < total_frames:\n",
|
| 369 |
+
" chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))\n",
|
| 370 |
+
" return chunks\n",
|
| 371 |
+
"\n",
|
| 372 |
+
" global_chunk_count = 0\n",
|
| 373 |
+
" for vocal_file in all_vocals_paths:\n",
|
| 374 |
+
" print(f\"Slicing {os.path.basename(vocal_file)}...\")\n",
|
| 375 |
+
" if not os.path.exists(vocal_file):\n",
|
| 376 |
+
" print(f\" Warning: File vokal tidak ditemukan: {vocal_file}. Melewatkan.\")\n",
|
| 377 |
+
" continue\n",
|
| 378 |
+
"\n",
|
| 379 |
+
" audio, sr = librosa.load(vocal_file, sr=None, mono=True)\n",
|
| 380 |
+
" slicer_sr = output_sr if output_sr != 0 else sr\n",
|
| 381 |
+
"\n",
|
| 382 |
+
" slicer = Slicer(sr=slicer_sr, threshold=-40, min_length=5000, min_interval=500, hop_size=10, max_sil_kept=500)\n",
|
| 383 |
+
" chunks = slicer.slice(audio)\n",
|
| 384 |
+
" for chunk in chunks:\n",
|
| 385 |
+
" sf.write(f\"{output_slicer_dir}/split_{global_chunk_count}.{output_format}\", chunk, slicer_sr)\n",
|
| 386 |
+
" global_chunk_count += 1\n",
|
| 387 |
+
" print(f\"\\nSplitting selesai. Total {global_chunk_count} file dibuat.\")\n",
|
| 388 |
+
" except Exception as e:\n",
|
| 389 |
+
" print(f\"Terjadi kesalahan saat splitting: {e}\")\n",
|
| 390 |
+
" raise e\n",
|
| 391 |
+
"\n",
|
| 392 |
+
"# === STEP 4: Copy Hasil ke Google Drive ===\n",
|
| 393 |
+
"print(\"\\n--- Menyalin Hasil ke Google Drive ---\")\n",
|
| 394 |
+
"base_drive_folder = f\"/content/drive/MyDrive/dataset/{project_name}\"\n",
|
| 395 |
+
"vocals_drive_folder = f\"{base_drive_folder}/vocals_only\"\n",
|
| 396 |
+
"sliced_drive_folder = f\"{base_drive_folder}/sliced_mixed\"\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"os.makedirs(vocals_drive_folder, exist_ok=True)\n",
|
| 399 |
+
"os.makedirs(sliced_drive_folder, exist_ok=True)\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"print(f\"Menyalin vokal mentah ke: {vocals_drive_folder}\")\n",
|
| 402 |
+
"for vocal_path in all_vocals_paths:\n",
|
| 403 |
+
" if os.path.exists(vocal_path):\n",
|
| 404 |
+
" shutil.copy(vocal_path, vocals_drive_folder)\n",
|
| 405 |
+
"\n",
|
| 406 |
+
"if mode == \"Splitting\":\n",
|
| 407 |
+
" print(f\"Menyalin dataset yang sudah di-slice ke: {sliced_drive_folder}\")\n",
|
| 408 |
+
" local_sliced_folder = f\"dataset/{project_name}\"\n",
|
| 409 |
+
" for item in os.listdir(local_sliced_folder):\n",
|
| 410 |
+
" s = os.path.join(local_sliced_folder, item)\n",
|
| 411 |
+
" d = os.path.join(sliced_drive_folder, item)\n",
|
| 412 |
+
" if os.path.isdir(s):\n",
|
| 413 |
+
" shutil.copytree(s, d, dirs_exist_ok=True)\n",
|
| 414 |
+
" else:\n",
|
| 415 |
+
" shutil.copy2(s, d)\n",
|
| 416 |
+
"\n",
|
| 417 |
+
"# --- Cleanup ---\n",
|
| 418 |
+
"shutil.rmtree(\"temp_audio_downloads\", ignore_errors=True)\n",
|
| 419 |
+
"shutil.rmtree(\"chunks\", ignore_errors=True)\n",
|
| 420 |
+
"shutil.rmtree(\"separated\", ignore_errors=True)\n",
|
| 421 |
+
"if os.path.exists(\"chunks_list.txt\"):\n",
|
| 422 |
+
" os.remove(\"chunks_list.txt\")\n",
|
| 423 |
+
"\n",
|
| 424 |
+
"print(\"\\nProses selesai!\")"
|
| 425 |
+
],
|
| 426 |
+
"metadata": {
|
| 427 |
+
"id": "0L7br10ouMlL",
|
| 428 |
+
"cellView": "form"
|
| 429 |
+
},
|
| 430 |
+
"execution_count": null,
|
| 431 |
+
"outputs": []
|
| 432 |
+
}
|
| 433 |
+
]
|
| 434 |
+
}
|