glickko commited on
Commit
f7dd86b
·
verified ·
1 Parent(s): 641d0e6

Delete RVC_Datasets_Maker.ipynb

Browse files
Files changed (1) hide show
  1. RVC_Datasets_Maker.ipynb +0 -399
RVC_Datasets_Maker.ipynb DELETED
@@ -1,399 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "gpuType": "T4"
8
- },
9
- "kernelspec": {
10
- "name": "python3",
11
- "display_name": "Python 3"
12
- },
13
- "language_info": {
14
- "name": "python"
15
- },
16
- "accelerator": "GPU"
17
- },
18
- "cells": [
19
- {
20
- "cell_type": "code",
21
- "source": [
22
- "#@title Mount Google Drive\n",
23
- "from google.colab import drive\n",
24
- "drive.mount('/content/drive')"
25
- ],
26
- "metadata": {
27
- "id": "RkuSSLb7t39L",
28
- "cellView": "form"
29
- },
30
- "execution_count": null,
31
- "outputs": []
32
- },
33
- {
34
- "cell_type": "code",
35
- "source": [
36
- "#@title Parameters\n",
37
- "#@markdown ### **1. General Settings**\n",
38
- "project_name = \"\" #@param {type:\"string\"}\n",
39
- "mode = \"Splitting\" #@param [\"Splitting\", \"Separate\"]\n",
40
- "demucs_model = \"htdemucs\" #@param [\"htdemucs\", \"demucs\", \"htdemucs_ft\", \"demucs_extra\"]\n",
41
- "\n",
42
- "#@markdown ---\n",
43
- "#@markdown ### **2. Input Source**\n",
44
- "dataset_source = \"Youtube\" #@param [\"Youtube\", \"Drive\"]\n",
45
- "#@markdown **If YouTube:** Provide one or more URLs, separated by commas.\n",
46
- "youtube_urls = \"\" #@param {type:\"string\"}\n",
47
- "#@markdown **If Drive:** Provide the full path to the FOLDER containing your audio files.\n",
48
- "google_drive_folder_path = \"\" #@param {type:\"string\"}\n",
49
- "\n",
50
- "#@markdown ---\n",
51
- "#@markdown ### **3. Processing Settings**\n",
52
- "#@markdown **YouTube Trimming (Optional):** Use HH:MM:SS format.\n",
53
- "start_time = \"\" #@param {type:\"string\"}\n",
54
- "end_time = \"\" #@param {type:\"string\"}\n",
55
- "#@markdown **Long Audio Handling:** Split audio longer than this duration before processing with Demucs.\n",
56
- "chunk_duration_in_minutes = \"30 minutes\" #@param [\"10 minutes\", \"15 minutes\", \"20 minutes\", \"30 minutes\", \"45 minutes\", \"60 minutes\"]\n",
57
- "\n",
58
- "#@markdown ---\n",
59
- "#@markdown ### **4. Output Settings**\n",
60
- "#@markdown Sample rate for the output files. `0` uses the original sample rate.\n",
61
- "output_sample_rate = \"48000\" #@param [\"0\", \"8000\", \"16000\", \"22050\", \"32000\", \"44100\", \"48000\"]\n",
62
- "output_format = \"mp3\" #@param [\"wav\", \"mp3\"]\n",
63
- "\n",
64
- "# --- Process Parameters for the next cell ---\n",
65
- "chunk_duration_map = {\n",
66
- " \"10 minutes\": 600,\n",
67
- " \"15 minutes\": 900,\n",
68
- " \"20 minutes\": 1200,\n",
69
- " \"30 minutes\": 1800,\n",
70
- " \"45 minutes\": 2700,\n",
71
- " \"60 minutes\": 3600\n",
72
- "}\n",
73
- "chunk_duration = chunk_duration_map[chunk_duration_in_minutes]\n",
74
- "output_sr = int(output_sample_rate)\n",
75
- "\n",
76
- "# Map new variables to old names for compatibility with the processing script\n",
77
- "url = youtube_urls\n",
78
- "drive_path = google_drive_folder_path\n",
79
- "dataset = dataset_source"
80
- ],
81
- "metadata": {
82
- "id": "23UmiGqUt_0a",
83
- "cellView": "form"
84
- },
85
- "execution_count": null,
86
- "outputs": []
87
- },
88
- {
89
- "cell_type": "code",
90
- "source": [
91
- "#@title Process Dataset\n",
92
- "import os\n",
93
- "import subprocess\n",
94
- "import glob\n",
95
- "import shutil\n",
96
- "\n",
97
- "print(\"Memulai proses...\\n\")\n",
98
- "\n",
99
- "# Pastikan runtime Colab menggunakan GPU\n",
100
- "print(\"GPU Info:\")\n",
101
- "!nvidia-smi\n",
102
- "print(\"\\n\")\n",
103
- "\n",
104
- "# --- Helper Functions ---\n",
105
- "def get_duration(file_path):\n",
106
- " try:\n",
107
- " result = subprocess.run(\n",
108
- " [\"ffprobe\", \"-v\", \"error\", \"-show_entries\", \"format=duration\",\n",
109
- " \"-of\", \"default=noprint_wrappers=1:nokey=1\", file_path],\n",
110
- " stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)\n",
111
- " return float(result.stdout.strip())\n",
112
- " except Exception as e:\n",
113
- " print(f\"Gagal mendapatkan durasi audio untuk {file_path}: {e}\")\n",
114
- " return None\n",
115
- "\n",
116
- "def run_command(command):\n",
117
- " result = subprocess.run(command, shell=True, capture_output=True, text=True)\n",
118
- " if result.stdout:\n",
119
- " print(result.stdout)\n",
120
- " if result.stderr:\n",
121
- " print(result.stderr)\n",
122
- "\n",
123
- "# --- Input Validation ---\n",
124
- "if not project_name:\n",
125
- " raise ValueError(\"Error: Project Name tidak boleh kosong!\")\n",
126
- "if dataset == \"Youtube\" and not url:\n",
127
- " raise ValueError(\"Error: URL tidak boleh kosong untuk dataset Youtube!\")\n",
128
- "if dataset == \"Drive\" and not drive_path:\n",
129
- " raise ValueError(\"Error: Drive Path tidak boleh kosong untuk dataset Drive!\")\n",
130
- "\n",
131
- "# --- Install Dependencies ---\n",
132
- "print(\"Menginstal/memperbarui dependensi (yt_dlp, ffmpeg, demucs, librosa)...\")\n",
133
- "run_command(\"python3 -m pip install --upgrade yt_dlp ffmpeg-python demucs librosa soundfile --quiet\")\n",
134
- "\n",
135
- "# === STEP 1: Gather All Audio Sources ===\n",
136
- "source_audio_paths = []\n",
137
- "temp_download_folder = \"temp_audio_downloads\"\n",
138
- "os.makedirs(temp_download_folder, exist_ok=True)\n",
139
- "\n",
140
- "if dataset == \"Youtube\":\n",
141
- " urls = [u.strip() for u in url.split(',') if u.strip()]\n",
142
- " print(f\"Ditemukan {len(urls)} URL YouTube untuk diproses.\")\n",
143
- " import yt_dlp\n",
144
- " for i, u in enumerate(urls):\n",
145
- " print(f\"\\nDownloading audio ({i+1}/{len(urls)}) dari: {u}\")\n",
146
- " try:\n",
147
- " ydl_opts = {\n",
148
- " 'format': 'bestaudio/best',\n",
149
- " 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],\n",
150
- " 'outtmpl': f'{temp_download_folder}/{project_name}_yt_{i+1}.%(ext)s'\n",
151
- " }\n",
152
- " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
153
- " ydl.download([u])\n",
154
- " downloaded_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}.wav\")\n",
155
- "\n",
156
- " # Trimming Logic\n",
157
- " if start_time and end_time:\n",
158
- " print(f\"Melakukan trimming audio dari {start_time} ke {end_time}...\")\n",
159
- " trimmed_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}_trimmed.wav\")\n",
160
- " trim_cmd = f'ffmpeg -i \"{downloaded_file}\" -ss {start_time} -to {end_time} -c copy \"{trimmed_file}\"'\n",
161
- " run_command(trim_cmd)\n",
162
- " if os.path.exists(trimmed_file):\n",
163
- " source_audio_paths.append(trimmed_file)\n",
164
- " else:\n",
165
- " print(f\"Warning: Gagal melakukan trimming, file akan diproses penuh.\")\n",
166
- " source_audio_paths.append(downloaded_file)\n",
167
- " else:\n",
168
- " source_audio_paths.append(downloaded_file)\n",
169
- " except Exception as e:\n",
170
- " print(f\"Gagal mendownload atau memproses URL {u}: {e}\")\n",
171
- "elif dataset == \"Drive\":\n",
172
- " print(f\"Mencari file audio di folder: {drive_path}\")\n",
173
- " allowed_extensions = [\"*.wav\", \"*.mp3\", \"*.flac\", \"*.m4a\"]\n",
174
- " for ext in allowed_extensions:\n",
175
- " source_audio_paths.extend(glob.glob(os.path.join(drive_path, ext)))\n",
176
- " print(f\"Ditemukan {len(source_audio_paths)} file audio.\")\n",
177
- "\n",
178
- "if not source_audio_paths:\n",
179
- " raise Exception(\"Tidak ada file audio sumber yang ditemukan. Hentikan proses.\")\n",
180
- "\n",
181
- "# === STEP 2: Process Each Audio Source with Demucs ===\n",
182
- "all_vocals_paths = []\n",
183
- "for idx, audio_input in enumerate(source_audio_paths):\n",
184
- " current_audio_name = os.path.splitext(os.path.basename(audio_input))[0]\n",
185
- " print(f\"\\n--- Memproses file {idx+1}/{len(source_audio_paths)}: {current_audio_name} ---\")\n",
186
- " duration = get_duration(audio_input)\n",
187
- " if duration is None:\n",
188
- " print(f\"Melewatkan file karena tidak bisa mendapatkan durasi.\")\n",
189
- " continue\n",
190
- " print(f\"Durasi audio: {duration:.0f} detik.\")\n",
191
- "\n",
192
- " if duration > chunk_duration:\n",
193
- " print(f\"Audio lebih panjang dari {chunk_duration} detik. Melakukan splitting audio menjadi beberapa chunk...\")\n",
194
- " chunk_folder = f\"chunks/{current_audio_name}\"\n",
195
- " os.makedirs(chunk_folder, exist_ok=True)\n",
196
- " split_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{audio_input}\" -f segment -segment_time {chunk_duration} -c copy \"{chunk_folder}/{current_audio_name}_%03d.wav\"'\n",
197
- " run_command(split_cmd)\n",
198
- " chunk_files = sorted(glob.glob(f\"{chunk_folder}/{current_audio_name}_*.wav\"))\n",
199
- " if not chunk_files:\n",
200
- " print(f\"Warning: Gagal membuat chunk untuk {current_audio_name}. Melewatkan file ini.\")\n",
201
- " continue\n",
202
- "\n",
203
- " print(f\"Memproses {len(chunk_files)} chunk dengan Demucs...\")\n",
204
- " chunk_vocals_files = []\n",
205
- " for chunk_file in chunk_files:\n",
206
- " demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} \"{chunk_file}\"'\n",
207
- " run_command(demucs_cmd)\n",
208
- " base = os.path.splitext(os.path.basename(chunk_file))[0]\n",
209
- " vocals_path = f\"separated/{demucs_model}/{base}/vocals.wav\"\n",
210
- " if os.path.exists(vocals_path):\n",
211
- " chunk_vocals_files.append(os.path.abspath(vocals_path))\n",
212
- " else:\n",
213
- " print(f\"Warning: Hasil Demucs untuk {chunk_file} tidak ditemukan.\")\n",
214
- "\n",
215
- " if not chunk_vocals_files:\n",
216
- " print(f\"Warning: Tidak ada vokal yang berhasil diekstrak untuk {current_audio_name}. Melewatkan file ini.\")\n",
217
- " continue\n",
218
- "\n",
219
- " list_file = \"chunks_list.txt\"\n",
220
- " with open(list_file, \"w\") as f:\n",
221
- " for file in chunk_vocals_files:\n",
222
- " f.write(f\"file '{file}'\\n\")\n",
223
- " combined_vocals_path = f\"separated/{demucs_model}/{current_audio_name}_vocals.wav\"\n",
224
- " concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i \"{list_file}\" -c copy \"{combined_vocals_path}\"'\n",
225
- " run_command(concat_cmd)\n",
226
- " print(\"Penggabungan vokal dari chunk selesai.\")\n",
227
- " all_vocals_paths.append(os.path.abspath(combined_vocals_path))\n",
228
- " else:\n",
229
- " print(\"Memproses audio penuh dengan Demucs...\")\n",
230
- " demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} -o \"separated\" --filename \"{current_audio_name}/{{stem}}.{{ext}}\" \"{audio_input}\"'\n",
231
- " run_command(demucs_cmd)\n",
232
- " vocals_final = f\"separated/{demucs_model}/{current_audio_name}/vocals.wav\"\n",
233
- " if os.path.exists(vocals_final):\n",
234
- " all_vocals_paths.append(os.path.abspath(vocals_final))\n",
235
- " else:\n",
236
- " print(f\"Warning: Gagal memisahkan vokal untuk {current_audio_name}.\")\n",
237
- " continue\n",
238
- " print(\"Proses pemisahan vokal selesai.\")\n",
239
- "\n",
240
- "# === STEP 3: Splitting Vocals (Jika mode = \"Splitting\") ===\n",
241
- "if mode == \"Splitting\":\n",
242
- " print(\"\\n--- Melakukan Splitting pada Semua Hasil Vokal ---\")\n",
243
- " output_slicer_dir = f\"dataset/{project_name}\"\n",
244
- " os.makedirs(output_slicer_dir, exist_ok=True)\n",
245
- " try:\n",
246
- " import numpy as np\n",
247
- " import librosa\n",
248
- " import soundfile as sf\n",
249
- "\n",
250
- " # (Fungsi Slicer dan get_rms tetap sama, disertakan di sini)\n",
251
- " def get_rms(y, frame_length=2048, hop_length=512, pad_mode=\"constant\"):\n",
252
- " padding = (int(frame_length // 2), int(frame_length // 2))\n",
253
- " y = np.pad(y, padding, mode=pad_mode)\n",
254
- " axis = -1\n",
255
- " out_strides = y.strides + (y.strides[axis],)\n",
256
- " x_shape_trimmed = list(y.shape)\n",
257
- " x_shape_trimmed[axis] -= frame_length - 1\n",
258
- " out_shape = tuple(x_shape_trimmed) + (frame_length,)\n",
259
- " xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)\n",
260
- " if axis < 0:\n",
261
- " target_axis = axis - 1\n",
262
- " else:\n",
263
- " target_axis = axis + 1\n",
264
- " xw = np.moveaxis(xw, -1, target_axis)\n",
265
- " slices = [slice(None)] * xw.ndim\n",
266
- " slices[axis] = slice(0, None, hop_length)\n",
267
- " x = xw[tuple(slices)]\n",
268
- " power = np.mean(np.abs(x)**2, axis=-2, keepdims=True)\n",
269
- " return np.sqrt(power).squeeze(0)\n",
270
- "\n",
271
- " class Slicer:\n",
272
- " def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):\n",
273
- " if not min_length >= min_interval >= hop_size:\n",
274
- " raise ValueError('min_length >= min_interval >= hop_size harus terpenuhi')\n",
275
- " if not max_sil_kept >= hop_size:\n",
276
- " raise ValueError('max_sil_kept >= hop_size harus terpenuhi')\n",
277
- " min_interval = sr * min_interval / 1000\n",
278
- " self.threshold = 10 ** (threshold/20.)\n",
279
- " self.hop_size = round(sr * hop_size / 1000)\n",
280
- " self.win_size = min(round(min_interval), 4 * self.hop_size)\n",
281
- " self.min_length = round(sr * min_length / 1000 / self.hop_size)\n",
282
- " self.min_interval = round(min_interval / self.hop_size)\n",
283
- " self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)\n",
284
- "\n",
285
- " def _apply_slice(self, waveform, begin, end):\n",
286
- " return waveform[begin*self.hop_size: min(len(waveform), end*self.hop_size)]\n",
287
- "\n",
288
- " def slice(self, waveform):\n",
289
- " if len(waveform) <= self.min_length:\n",
290
- " return [waveform]\n",
291
- " rms_list = get_rms(waveform, frame_length=self.win_size, hop_length=self.hop_size)\n",
292
- " sil_tags = []\n",
293
- " silence_start = None\n",
294
- " clip_start = 0\n",
295
- " for i, rms in enumerate(rms_list):\n",
296
- " if rms < self.threshold:\n",
297
- " if silence_start is None:\n",
298
- " silence_start = i\n",
299
- " continue\n",
300
- " if silence_start is None:\n",
301
- " continue\n",
302
- " is_leading_silence = silence_start == 0 and i > self.max_sil_kept\n",
303
- " need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length\n",
304
- " if not is_leading_silence and not need_slice_middle:\n",
305
- " silence_start = None\n",
306
- " continue\n",
307
- " if i - silence_start <= self.max_sil_kept:\n",
308
- " pos = rms_list[silence_start: i+1].argmin() + silence_start\n",
309
- " sil_tags.append((0, pos) if silence_start == 0 else (pos, pos))\n",
310
- " clip_start = pos\n",
311
- " elif i - silence_start <= self.max_sil_kept * 2:\n",
312
- " pos = rms_list[i-self.max_sil_kept: silence_start+self.max_sil_kept+1].argmin() + i-self.max_sil_kept\n",
313
- " pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
314
- " pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
315
- " sil_tags.append((0, pos_r) if silence_start == 0 else (min(pos_l, pos), max(pos_r, pos)))\n",
316
- " clip_start = pos_r\n",
317
- " else:\n",
318
- " pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
319
- " pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
320
- " sil_tags.append((0, pos_r) if silence_start == 0 else (pos_l, pos_r))\n",
321
- " clip_start = pos_r\n",
322
- " silence_start = None\n",
323
- " total_frames = len(rms_list)\n",
324
- " if silence_start is not None and total_frames - silence_start >= self.min_interval:\n",
325
- " silence_end = min(total_frames, silence_start+self.max_sil_kept)\n",
326
- " pos = rms_list[silence_start: silence_end+1].argmin() + silence_start\n",
327
- " sil_tags.append((pos, total_frames+1))\n",
328
- " if len(sil_tags) == 0:\n",
329
- " return [waveform]\n",
330
- " chunks = []\n",
331
- " if sil_tags[0][0] > 0:\n",
332
- " chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))\n",
333
- " for i in range(len(sil_tags)-1):\n",
334
- " chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i+1][0]))\n",
335
- " if sil_tags[-1][1] < total_frames:\n",
336
- " chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))\n",
337
- " return chunks\n",
338
- "\n",
339
- " global_chunk_count = 0\n",
340
- " for vocal_file in all_vocals_paths:\n",
341
- " print(f\"Slicing {os.path.basename(vocal_file)}...\")\n",
342
- " if not os.path.exists(vocal_file):\n",
343
- " print(f\" Warning: File vokal tidak ditemukan: {vocal_file}. Melewatkan.\")\n",
344
- " continue\n",
345
- "\n",
346
- " load_sr = None if output_sr == 0 else output_sr\n",
347
- " audio, sr = librosa.load(vocal_file, sr=load_sr, mono=True)\n",
348
- "\n",
349
- " slicer = Slicer(sr=sr, threshold=-40, min_length=5000, min_interval=500, hop_size=10, max_sil_kept=500)\n",
350
- " chunks = slicer.slice(audio)\n",
351
- " for chunk in chunks:\n",
352
- " sf.write(f\"{output_slicer_dir}/split_{global_chunk_count}.{output_format}\", chunk, sr)\n",
353
- " global_chunk_count += 1\n",
354
- " print(f\"\\nSplitting selesai. Total {global_chunk_count} file dibuat.\")\n",
355
- " except Exception as e:\n",
356
- " print(f\"Terjadi kesalahan saat splitting: {e}\")\n",
357
- " raise e\n",
358
- "\n",
359
- "# === STEP 4: Copy Hasil ke Google Drive ===\n",
360
- "print(\"\\n--- Menyalin Hasil ke Google Drive ---\")\n",
361
- "base_drive_folder = f\"/content/drive/MyDrive/dataset/{project_name}\"\n",
362
- "vocals_drive_folder = f\"{base_drive_folder}/vocals_only\"\n",
363
- "sliced_drive_folder = f\"{base_drive_folder}/sliced_mixed\"\n",
364
- "\n",
365
- "os.makedirs(vocals_drive_folder, exist_ok=True)\n",
366
- "os.makedirs(sliced_drive_folder, exist_ok=True)\n",
367
- "\n",
368
- "print(f\"Menyalin vokal mentah ke: {vocals_drive_folder}\")\n",
369
- "for vocal_path in all_vocals_paths:\n",
370
- " if os.path.exists(vocal_path):\n",
371
- " shutil.copy(vocal_path, vocals_drive_folder)\n",
372
- "\n",
373
- "if mode == \"Splitting\":\n",
374
- " print(f\"Menyalin dataset yang sudah di-slice ke: {sliced_drive_folder}\")\n",
375
- " local_sliced_folder = f\"dataset/{project_name}\"\n",
376
- " for item in os.listdir(local_sliced_folder):\n",
377
- " s = os.path.join(local_sliced_folder, item)\n",
378
- " d = os.path.join(sliced_drive_folder, item)\n",
379
- " if os.path.isdir(s):\n",
380
- " shutil.copytree(s, d, dirs_exist_ok=True)\n",
381
- " else:\n",
382
- " shutil.copy2(s, d)\n",
383
- "\n",
384
- "# --- Cleanup ---\n",
385
- "shutil.rmtree(\"temp_audio_downloads\", ignore_errors=True)\n",
386
- "shutil.rmtree(\"chunks\", ignore_errors=True)\n",
387
- "shutil.rmtree(\"separated\", ignore_errors=True)\n",
388
- "\n",
389
- "print(\"\\nProses selesai!\")\n"
390
- ],
391
- "metadata": {
392
- "id": "0L7br10ouMlL",
393
- "cellView": "form"
394
- },
395
- "execution_count": null,
396
- "outputs": []
397
- }
398
- ]
399
- }