glickko commited on
Commit
2eca7f2
·
verified ·
1 Parent(s): f7dd86b

Upload RVC_Datasets_Maker.ipynb

Browse files
Files changed (1) hide show
  1. RVC_Datasets_Maker.ipynb +434 -0
RVC_Datasets_Maker.ipynb ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "source": [
22
+ "#@title Mount Google Drive\n",
23
+ "from google.colab import drive\n",
24
+ "drive.mount('/content/drive')"
25
+ ],
26
+ "metadata": {
27
+ "id": "RkuSSLb7t39L",
28
+ "cellView": "form"
29
+ },
30
+ "execution_count": null,
31
+ "outputs": []
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "source": [
36
+ "#@title Parameters\n",
37
+ "#@markdown ### **1. General Settings**\n",
38
+ "project_name = \"\" #@param {type:\"string\"}\n",
39
+ "mode = \"Splitting\" #@param [\"Splitting\", \"Separate\"]\n",
40
+ "demucs_model = \"htdemucs\" #@param [\"htdemucs\", \"demucs\", \"htdemucs_ft\", \"demucs_extra\"]\n",
41
+ "\n",
42
+ "#@markdown ---\n",
43
+ "#@markdown ### **2. Input Source**\n",
44
+ "dataset_source = \"Youtube\" #@param [\"Youtube\", \"Drive\"]\n",
45
+ "#@markdown **If YouTube:** Provide one or more URLs, separated by commas.\n",
46
+ "youtube_urls = \"\" #@param {type:\"string\"}\n",
47
+ "#@markdown **If Drive:** Provide the full path to the FOLDER containing your audio files.\n",
48
+ "google_drive_folder_path = \"\" #@param {type:\"string\"}\n",
49
+ "\n",
50
+ "#@markdown ---\n",
51
+ "#@markdown ### **3. Processing Settings**\n",
52
+ "#@markdown **YouTube Trimming (Optional):** Use HH:MM:SS format.\n",
53
+ "start_time = \"\" #@param {type:\"string\"}\n",
54
+ "end_time = \"\" #@param {type:\"string\"}\n",
55
+ "#@markdown **Long Audio Handling:** Split audio longer than this duration before processing with Demucs.\n",
56
+ "chunk_duration_in_minutes = \"30 minutes\" #@param [\"10 minutes\", \"15 minutes\", \"20 minutes\", \"30 minutes\", \"45 minutes\", \"60 minutes\"]\n",
57
+ "\n",
58
+ "#@markdown ---\n",
59
+ "#@markdown ### **4. Output Settings**\n",
60
+ "#@markdown Sample rate for the output files. `0` uses the original sample rate.\n",
61
+ "output_sample_rate = \"48000\" #@param [\"0\", \"8000\", \"16000\", \"22050\", \"32000\", \"44100\", \"48000\"]\n",
62
+ "output_format = \"mp3\" #@param [\"wav\", \"mp3\"]\n",
63
+ "\n",
64
+ "# --- Process Parameters for the next cell ---\n",
65
+ "chunk_duration_map = {\n",
66
+ " \"10 minutes\": 600,\n",
67
+ " \"15 minutes\": 900,\n",
68
+ " \"20 minutes\": 1200,\n",
69
+ " \"30 minutes\": 1800,\n",
70
+ " \"45 minutes\": 2700,\n",
71
+ " \"60 minutes\": 3600\n",
72
+ "}\n",
73
+ "chunk_duration = chunk_duration_map[chunk_duration_in_minutes]\n",
74
+ "output_sr = int(output_sample_rate)\n",
75
+ "\n",
76
+ "# Map new variables to old names for compatibility with the processing script\n",
77
+ "url = youtube_urls\n",
78
+ "drive_path = google_drive_folder_path\n",
79
+ "dataset = dataset_source"
80
+ ],
81
+ "metadata": {
82
+ "id": "23UmiGqUt_0a",
83
+ "cellView": "form"
84
+ },
85
+ "execution_count": null,
86
+ "outputs": []
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "source": [
91
+ "#@title Process Dataset\n",
92
+ "import os\n",
93
+ "import subprocess\n",
94
+ "import glob\n",
95
+ "import shutil\n",
96
+ "\n",
97
+ "print(\"Memulai proses...\\n\")\n",
98
+ "\n",
99
+ "# Pastikan runtime Colab menggunakan GPU\n",
100
+ "print(\"GPU Info:\")\n",
101
+ "!nvidia-smi\n",
102
+ "print(\"\\n\")\n",
103
+ "\n",
104
+ "# --- Helper Functions ---\n",
105
+ "def get_duration(file_path):\n",
106
+ " try:\n",
107
+ " result = subprocess.run(\n",
108
+ " [\"ffprobe\", \"-v\", \"error\", \"-show_entries\", \"format=duration\",\n",
109
+ " \"-of\", \"default=noprint_wrappers=1:nokey=1\", file_path],\n",
110
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)\n",
111
+ " return float(result.stdout.strip())\n",
112
+ " except Exception as e:\n",
113
+ " print(f\"Gagal mendapatkan durasi audio untuk {file_path}: {e}\")\n",
114
+ " return None\n",
115
+ "\n",
116
+ "def run_command(command):\n",
117
+ " result = subprocess.run(command, shell=True, capture_output=True, text=True)\n",
118
+ " if result.stdout:\n",
119
+ " print(result.stdout)\n",
120
+ " if result.stderr:\n",
121
+ " print(result.stderr)\n",
122
+ "\n",
123
+ "# --- Input Validation ---\n",
124
+ "if not project_name:\n",
125
+ " raise ValueError(\"Error: Project Name tidak boleh kosong!\")\n",
126
+ "if dataset == \"Youtube\" and not url:\n",
127
+ " raise ValueError(\"Error: URL tidak boleh kosong untuk dataset Youtube!\")\n",
128
+ "if dataset == \"Drive\" and not drive_path:\n",
129
+ " raise ValueError(\"Error: Drive Path tidak boleh kosong untuk dataset Drive!\")\n",
130
+ "\n",
131
+ "# --- Install Dependencies ---\n",
132
+ "print(\"Menginstal/memperbarui dependensi (yt_dlp, ffmpeg, demucs, librosa)...\")\n",
133
+ "run_command(\"python3 -m pip install --upgrade yt_dlp ffmpeg-python demucs librosa soundfile --quiet\")\n",
134
+ "\n",
135
+ "# === STEP 1: Gather All Audio Sources ===\n",
136
+ "source_audio_paths = []\n",
137
+ "temp_download_folder = \"temp_audio_downloads\"\n",
138
+ "os.makedirs(temp_download_folder, exist_ok=True)\n",
139
+ "\n",
140
+ "if dataset == \"Youtube\":\n",
141
+ " urls = [u.strip() for u in url.split(',') if u.strip()]\n",
142
+ " print(f\"Ditemukan {len(urls)} URL YouTube untuk diproses.\")\n",
143
+ " import yt_dlp\n",
144
+ " for i, u in enumerate(urls):\n",
145
+ " print(f\"\\nDownloading audio ({i+1}/{len(urls)}) dari: {u}\")\n",
146
+ " try:\n",
147
+ " ydl_opts = {\n",
148
+ " 'format': 'bestaudio/best',\n",
149
+ " 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],\n",
150
+ " 'outtmpl': f'{temp_download_folder}/{project_name}_yt_{i+1}.%(ext)s'\n",
151
+ " }\n",
152
+ " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
153
+ " ydl.download([u])\n",
154
+ " downloaded_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}.wav\")\n",
155
+ "\n",
156
+ " # Trimming Logic\n",
157
+ " if start_time and end_time:\n",
158
+ " print(f\"Melakukan trimming audio dari {start_time} ke {end_time}...\")\n",
159
+ " trimmed_file = os.path.abspath(f\"{temp_download_folder}/{project_name}_yt_{i+1}_trimmed.wav\")\n",
160
+ " trim_cmd = f'ffmpeg -i \"{downloaded_file}\" -ss {start_time} -to {end_time} -c copy \"{trimmed_file}\"'\n",
161
+ " run_command(trim_cmd)\n",
162
+ " if os.path.exists(trimmed_file):\n",
163
+ " source_audio_paths.append(trimmed_file)\n",
164
+ " else:\n",
165
+ " print(f\"Warning: Gagal melakukan trimming, file akan diproses penuh.\")\n",
166
+ " source_audio_paths.append(downloaded_file)\n",
167
+ " else:\n",
168
+ " source_audio_paths.append(downloaded_file)\n",
169
+ " except Exception as e:\n",
170
+ " print(f\"Gagal mendownload atau memproses URL {u}: {e}\")\n",
171
+ "elif dataset == \"Drive\":\n",
172
+ " print(f\"Mencari file audio di folder: {drive_path}\")\n",
173
+ " allowed_extensions = [\"*.wav\", \"*.mp3\", \"*.flac\", \"*.m4a\"]\n",
174
+ " for ext in allowed_extensions:\n",
175
+ " source_audio_paths.extend(glob.glob(os.path.join(drive_path, ext)))\n",
176
+ " print(f\"Ditemukan {len(source_audio_paths)} file audio.\")\n",
177
+ "\n",
178
+ "if not source_audio_paths:\n",
179
+ " raise Exception(\"Tidak ada file audio sumber yang ditemukan. Hentikan proses.\")\n",
180
+ "\n",
181
+ "# === STEP 2: Process Each Audio Source with Demucs ===\n",
182
+ "all_vocals_paths = []\n",
183
+ "for idx, audio_input in enumerate(source_audio_paths):\n",
184
+ " current_audio_name = os.path.splitext(os.path.basename(audio_input))[0]\n",
185
+ " print(f\"\\n--- Memproses file {idx+1}/{len(source_audio_paths)}: {current_audio_name} ---\")\n",
186
+ " duration = get_duration(audio_input)\n",
187
+ " if duration is None:\n",
188
+ " print(f\"Melewatkan file karena tidak bisa mendapatkan durasi.\")\n",
189
+ " continue\n",
190
+ " print(f\"Durasi audio: {duration:.0f} detik.\")\n",
191
+ "\n",
192
+ " if duration > chunk_duration:\n",
193
+ " print(f\"Audio lebih panjang dari {chunk_duration} detik. Melakukan splitting audio menjadi beberapa chunk...\")\n",
194
+ " chunk_folder = f\"chunks/{current_audio_name}\"\n",
195
+ " os.makedirs(chunk_folder, exist_ok=True)\n",
196
+ " split_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{audio_input}\" -f segment -segment_time {chunk_duration} -c copy \"{chunk_folder}/{current_audio_name}_%03d.wav\"'\n",
197
+ " run_command(split_cmd)\n",
198
+ " chunk_files = sorted(glob.glob(f\"{chunk_folder}/{current_audio_name}_*.wav\"))\n",
199
+ " if not chunk_files:\n",
200
+ " print(f\"Warning: Gagal membuat chunk untuk {current_audio_name}. Melewatkan file ini.\")\n",
201
+ " continue\n",
202
+ "\n",
203
+ " print(f\"Memproses {len(chunk_files)} chunk dengan Demucs...\")\n",
204
+ " chunk_vocals_files = []\n",
205
+ " for chunk_file in chunk_files:\n",
206
+ " demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} \"{chunk_file}\"'\n",
207
+ " run_command(demucs_cmd)\n",
208
+ " base = os.path.splitext(os.path.basename(chunk_file))[0]\n",
209
+ " vocals_path = f\"separated/{demucs_model}/{base}/vocals.wav\"\n",
210
+ " if os.path.exists(vocals_path):\n",
211
+ " chunk_vocals_files.append(os.path.abspath(vocals_path))\n",
212
+ " else:\n",
213
+ " print(f\"Warning: Hasil Demucs untuk {chunk_file} tidak ditemukan.\")\n",
214
+ "\n",
215
+ " if not chunk_vocals_files:\n",
216
+ " print(f\"Warning: Tidak ada vokal yang berhasil diekstrak untuk {current_audio_name}. Melewatkan file ini.\")\n",
217
+ " continue\n",
218
+ "\n",
219
+ " list_file = \"chunks_list.txt\"\n",
220
+ " with open(list_file, \"w\") as f:\n",
221
+ " for file in chunk_vocals_files:\n",
222
+ " # <<< FIX: Changed '\\\\n' to '\\n' to create a proper newline character\n",
223
+ " f.write(f\"file '{file}'\\n\")\n",
224
+ "\n",
225
+ " combined_vocals_path = f\"separated/{demucs_model}/{current_audio_name}_vocals.wav\"\n",
226
+ " concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i \"{list_file}\" -c copy \"{combined_vocals_path}\"'\n",
227
+ " run_command(concat_cmd)\n",
228
+ " print(\"Penggabungan vokal dari chunk selesai.\")\n",
229
+ "\n",
230
+ " # Convert combined vocals to the desired output format and sample rate\n",
231
+ " final_vocals_output_path = os.path.splitext(combined_vocals_path)[0] + f'.{output_format}'\n",
232
+ " print(f\"Mengonversi vokal gabungan ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...\")\n",
233
+ " convert_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{combined_vocals_path}\"'\n",
234
+ " if output_sr != 0:\n",
235
+ " convert_cmd += f' -ar {output_sr}'\n",
236
+ " convert_cmd += f' -y \"{final_vocals_output_path}\"'\n",
237
+ " run_command(convert_cmd)\n",
238
+ "\n",
239
+ " if os.path.exists(final_vocals_output_path):\n",
240
+ " if combined_vocals_path != final_vocals_output_path:\n",
241
+ " os.remove(combined_vocals_path) # Clean up the intermediate .wav\n",
242
+ " all_vocals_paths.append(os.path.abspath(final_vocals_output_path))\n",
243
+ " else:\n",
244
+ " print(f\"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.\")\n",
245
+ " all_vocals_paths.append(os.path.abspath(combined_vocals_path))\n",
246
+ " else:\n",
247
+ " print(\"Memproses audio penuh dengan Demucs...\")\n",
248
+ " demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} -o \"separated\" --filename \"{current_audio_name}/{{stem}}.{{ext}}\" \"{audio_input}\"'\n",
249
+ " run_command(demucs_cmd)\n",
250
+ " vocals_final_wav = f\"separated/{demucs_model}/{current_audio_name}/vocals.wav\"\n",
251
+ "\n",
252
+ " if os.path.exists(vocals_final_wav):\n",
253
+ " # Convert the final vocals to the desired output format and sample rate\n",
254
+ " final_vocals_output_path = os.path.splitext(vocals_final_wav)[0] + f'.{output_format}'\n",
255
+ " print(f\"Mengonversi vokal ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...\")\n",
256
+ " convert_cmd = f'ffmpeg -hide_banner -loglevel error -i \"{vocals_final_wav}\"'\n",
257
+ " if output_sr != 0:\n",
258
+ " convert_cmd += f' -ar {output_sr}'\n",
259
+ " convert_cmd += f' -y \"{final_vocals_output_path}\"'\n",
260
+ " run_command(convert_cmd)\n",
261
+ "\n",
262
+ " if os.path.exists(final_vocals_output_path):\n",
263
+ " if vocals_final_wav != final_vocals_output_path:\n",
264
+ " os.remove(vocals_final_wav) # Clean up the original .wav\n",
265
+ " all_vocals_paths.append(os.path.abspath(final_vocals_output_path))\n",
266
+ " else:\n",
267
+ " print(f\"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.\")\n",
268
+ " all_vocals_paths.append(os.path.abspath(vocals_final_wav))\n",
269
+ " else:\n",
270
+ " print(f\"Warning: Gagal memisahkan vokal untuk {current_audio_name}.\")\n",
271
+ " continue\n",
272
+ " print(\"Proses pemisahan vokal selesai.\")\n",
273
+ "\n",
274
+ "# === STEP 3: Splitting Vocals (Jika mode = \"Splitting\") ===\n",
275
+ "if mode == \"Splitting\":\n",
276
+ " print(\"\\n--- Melakukan Splitting pada Semua Hasil Vokal ---\")\n",
277
+ " output_slicer_dir = f\"dataset/{project_name}\"\n",
278
+ " os.makedirs(output_slicer_dir, exist_ok=True)\n",
279
+ " try:\n",
280
+ " import numpy as np\n",
281
+ " import librosa\n",
282
+ " import soundfile as sf\n",
283
+ "\n",
284
+ " def get_rms(y, frame_length=2048, hop_length=512, pad_mode=\"constant\"):\n",
285
+ " padding = (int(frame_length // 2), int(frame_length // 2))\n",
286
+ " y = np.pad(y, padding, mode=pad_mode)\n",
287
+ " axis = -1\n",
288
+ " out_strides = y.strides + (y.strides[axis],)\n",
289
+ " x_shape_trimmed = list(y.shape)\n",
290
+ " x_shape_trimmed[axis] -= frame_length - 1\n",
291
+ " out_shape = tuple(x_shape_trimmed) + (frame_length,)\n",
292
+ " xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)\n",
293
+ " if axis < 0:\n",
294
+ " target_axis = axis - 1\n",
295
+ " else:\n",
296
+ " target_axis = axis + 1\n",
297
+ " xw = np.moveaxis(xw, -1, target_axis)\n",
298
+ " slices = [slice(None)] * xw.ndim\n",
299
+ " slices[axis] = slice(0, None, hop_length)\n",
300
+ " x = xw[tuple(slices)]\n",
301
+ " power = np.mean(np.abs(x)**2, axis=-2, keepdims=True)\n",
302
+ " return np.sqrt(power).squeeze(0)\n",
303
+ "\n",
304
+ " class Slicer:\n",
305
+ " def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):\n",
306
+ " if not min_length >= min_interval >= hop_size:\n",
307
+ " raise ValueError('min_length >= min_interval >= hop_size harus terpenuhi')\n",
308
+ " if not max_sil_kept >= hop_size:\n",
309
+ " raise ValueError('max_sil_kept >= hop_size harus terpenuhi')\n",
310
+ " min_interval = sr * min_interval / 1000\n",
311
+ " self.threshold = 10 ** (threshold/20.)\n",
312
+ " self.hop_size = round(sr * hop_size / 1000)\n",
313
+ " self.win_size = min(round(min_interval), 4 * self.hop_size)\n",
314
+ " self.min_length = round(sr * min_length / 1000 / self.hop_size)\n",
315
+ " self.min_interval = round(min_interval / self.hop_size)\n",
316
+ " self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)\n",
317
+ "\n",
318
+ " def _apply_slice(self, waveform, begin, end):\n",
319
+ " return waveform[begin*self.hop_size: min(len(waveform), end*self.hop_size)]\n",
320
+ "\n",
321
+ " def slice(self, waveform):\n",
322
+ " if len(waveform) <= self.min_length:\n",
323
+ " return [waveform]\n",
324
+ " rms_list = get_rms(waveform, frame_length=self.win_size, hop_length=self.hop_size)\n",
325
+ " sil_tags = []\n",
326
+ " silence_start = None\n",
327
+ " clip_start = 0\n",
328
+ " for i, rms in enumerate(rms_list):\n",
329
+ " if rms < self.threshold:\n",
330
+ " if silence_start is None:\n",
331
+ " silence_start = i\n",
332
+ " continue\n",
333
+ " if silence_start is None:\n",
334
+ " continue\n",
335
+ " is_leading_silence = silence_start == 0 and i > self.max_sil_kept\n",
336
+ " need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length\n",
337
+ " if not is_leading_silence and not need_slice_middle:\n",
338
+ " silence_start = None\n",
339
+ " continue\n",
340
+ " if i - silence_start <= self.max_sil_kept:\n",
341
+ " pos = rms_list[silence_start: i+1].argmin() + silence_start\n",
342
+ " sil_tags.append((0, pos) if silence_start == 0 else (pos, pos))\n",
343
+ " clip_start = pos\n",
344
+ " elif i - silence_start <= self.max_sil_kept * 2:\n",
345
+ " pos = rms_list[i-self.max_sil_kept: silence_start+self.max_sil_kept+1].argmin() + i-self.max_sil_kept\n",
346
+ " pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
347
+ " pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
348
+ " sil_tags.append((0, pos_r) if silence_start == 0 else (min(pos_l, pos), max(pos_r, pos)))\n",
349
+ " clip_start = pos_r\n",
350
+ " else:\n",
351
+ " pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start\n",
352
+ " pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept\n",
353
+ " sil_tags.append((0, pos_r) if silence_start == 0 else (pos_l, pos_r))\n",
354
+ " clip_start = pos_r\n",
355
+ " silence_start = None\n",
356
+ " total_frames = len(rms_list)\n",
357
+ " if silence_start is not None and total_frames - silence_start >= self.min_interval:\n",
358
+ " silence_end = min(total_frames, silence_start+self.max_sil_kept)\n",
359
+ " pos = rms_list[silence_start: silence_end+1].argmin() + silence_start\n",
360
+ " sil_tags.append((pos, total_frames+1))\n",
361
+ " if len(sil_tags) == 0:\n",
362
+ " return [waveform]\n",
363
+ " chunks = []\n",
364
+ " if sil_tags[0][0] > 0:\n",
365
+ " chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))\n",
366
+ " for i in range(len(sil_tags)-1):\n",
367
+ " chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i+1][0]))\n",
368
+ " if sil_tags[-1][1] < total_frames:\n",
369
+ " chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))\n",
370
+ " return chunks\n",
371
+ "\n",
372
+ " global_chunk_count = 0\n",
373
+ " for vocal_file in all_vocals_paths:\n",
374
+ " print(f\"Slicing {os.path.basename(vocal_file)}...\")\n",
375
+ " if not os.path.exists(vocal_file):\n",
376
+ " print(f\" Warning: File vokal tidak ditemukan: {vocal_file}. Melewatkan.\")\n",
377
+ " continue\n",
378
+ "\n",
379
+ " audio, sr = librosa.load(vocal_file, sr=None, mono=True)\n",
380
+ " slicer_sr = output_sr if output_sr != 0 else sr\n",
381
+ "\n",
382
+ " slicer = Slicer(sr=slicer_sr, threshold=-40, min_length=5000, min_interval=500, hop_size=10, max_sil_kept=500)\n",
383
+ " chunks = slicer.slice(audio)\n",
384
+ " for chunk in chunks:\n",
385
+ " sf.write(f\"{output_slicer_dir}/split_{global_chunk_count}.{output_format}\", chunk, slicer_sr)\n",
386
+ " global_chunk_count += 1\n",
387
+ " print(f\"\\nSplitting selesai. Total {global_chunk_count} file dibuat.\")\n",
388
+ " except Exception as e:\n",
389
+ " print(f\"Terjadi kesalahan saat splitting: {e}\")\n",
390
+ " raise e\n",
391
+ "\n",
392
+ "# === STEP 4: Copy Hasil ke Google Drive ===\n",
393
+ "print(\"\\n--- Menyalin Hasil ke Google Drive ---\")\n",
394
+ "base_drive_folder = f\"/content/drive/MyDrive/dataset/{project_name}\"\n",
395
+ "vocals_drive_folder = f\"{base_drive_folder}/vocals_only\"\n",
396
+ "sliced_drive_folder = f\"{base_drive_folder}/sliced_mixed\"\n",
397
+ "\n",
398
+ "os.makedirs(vocals_drive_folder, exist_ok=True)\n",
399
+ "os.makedirs(sliced_drive_folder, exist_ok=True)\n",
400
+ "\n",
401
+ "print(f\"Menyalin vokal mentah ke: {vocals_drive_folder}\")\n",
402
+ "for vocal_path in all_vocals_paths:\n",
403
+ " if os.path.exists(vocal_path):\n",
404
+ " shutil.copy(vocal_path, vocals_drive_folder)\n",
405
+ "\n",
406
+ "if mode == \"Splitting\":\n",
407
+ " print(f\"Menyalin dataset yang sudah di-slice ke: {sliced_drive_folder}\")\n",
408
+ " local_sliced_folder = f\"dataset/{project_name}\"\n",
409
+ " for item in os.listdir(local_sliced_folder):\n",
410
+ " s = os.path.join(local_sliced_folder, item)\n",
411
+ " d = os.path.join(sliced_drive_folder, item)\n",
412
+ " if os.path.isdir(s):\n",
413
+ " shutil.copytree(s, d, dirs_exist_ok=True)\n",
414
+ " else:\n",
415
+ " shutil.copy2(s, d)\n",
416
+ "\n",
417
+ "# --- Cleanup ---\n",
418
+ "shutil.rmtree(\"temp_audio_downloads\", ignore_errors=True)\n",
419
+ "shutil.rmtree(\"chunks\", ignore_errors=True)\n",
420
+ "shutil.rmtree(\"separated\", ignore_errors=True)\n",
421
+ "if os.path.exists(\"chunks_list.txt\"):\n",
422
+ " os.remove(\"chunks_list.txt\")\n",
423
+ "\n",
424
+ "print(\"\\nProses selesai!\")"
425
+ ],
426
+ "metadata": {
427
+ "id": "0L7br10ouMlL",
428
+ "cellView": "form"
429
+ },
430
+ "execution_count": null,
431
+ "outputs": []
432
+ }
433
+ ]
434
+ }