In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Parameters
#@markdown ### **1. General Settings**
project_name = "" #@param {type:"string"}
mode = "Splitting" #@param ["Splitting", "Separate"]
demucs_model = "htdemucs" #@param ["htdemucs", "demucs", "htdemucs_ft", "demucs_extra"]

#@markdown ---
#@markdown ### **2. Input Source**
dataset_source = "Youtube" #@param ["Youtube", "Drive"]
#@markdown **If YouTube:** Provide one or more URLs, separated by commas.
youtube_urls = "" #@param {type:"string"}
#@markdown **If Drive:** Provide the full path to the FOLDER containing your audio files.
google_drive_folder_path = "" #@param {type:"string"}

#@markdown ---
#@markdown ### **3. Processing Settings**
#@markdown **YouTube Trimming (Optional):** Use HH:MM:SS format.
start_time = "" #@param {type:"string"}
end_time = "" #@param {type:"string"}
#@markdown **Long Audio Handling:** Split audio longer than this duration before processing with Demucs.
chunk_duration_in_minutes = "30 minutes" #@param ["10 minutes", "15 minutes", "20 minutes", "30 minutes", "45 minutes", "60 minutes"]

#@markdown ---
#@markdown ### **4. Output Settings**
#@markdown Sample rate for the output files. `0` uses the original sample rate.
output_sample_rate = "48000" #@param ["0", "8000", "16000", "22050", "32000", "44100", "48000"]
output_format = "mp3" #@param ["wav", "mp3"]

# --- Process Parameters for the next cell ---
chunk_duration_map = {
 "10 minutes": 600,
 "15 minutes": 900,
 "20 minutes": 1200,
 "30 minutes": 1800,
 "45 minutes": 2700,
 "60 minutes": 3600
}
chunk_duration = chunk_duration_map[chunk_duration_in_minutes]
output_sr = int(output_sample_rate)

# Map new variables to old names for compatibility with the processing script
url = youtube_urls
drive_path = google_drive_folder_path
dataset = dataset_source

In [None]:
#@title Process Dataset
import os
import subprocess
import glob
import shutil

print("Memulai proses...\n")

# Pastikan runtime Colab menggunakan GPU
print("GPU Info:")
!nvidia-smi
print("\n")

# --- Helper Functions ---
def get_duration(file_path):
 try:
 result = subprocess.run(
 ["ffprobe", "-v", "error", "-show_entries", "format=duration",
 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
 stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
 return float(result.stdout.strip())
 except Exception as e:
 print(f"Gagal mendapatkan durasi audio untuk {file_path}: {e}")
 return None

def run_command(command):
 result = subprocess.run(command, shell=True, capture_output=True, text=True)
 if result.stdout:
 print(result.stdout)
 if result.stderr:
 print(result.stderr)

# --- Input Validation ---
if not project_name:
 raise ValueError("Error: Project Name tidak boleh kosong!")
if dataset == "Youtube" and not url:
 raise ValueError("Error: URL tidak boleh kosong untuk dataset Youtube!")
if dataset == "Drive" and not drive_path:
 raise ValueError("Error: Drive Path tidak boleh kosong untuk dataset Drive!")

# --- Install Dependencies ---
print("Menginstal/memperbarui dependensi (yt_dlp, ffmpeg, demucs, librosa)...")
run_command("python3 -m pip install --upgrade yt_dlp ffmpeg-python demucs librosa soundfile --quiet")

# === STEP 1: Gather All Audio Sources ===
source_audio_paths = []
temp_download_folder = "temp_audio_downloads"
os.makedirs(temp_download_folder, exist_ok=True)

if dataset == "Youtube":
 urls = [u.strip() for u in url.split(',') if u.strip()]
 print(f"Ditemukan {len(urls)} URL YouTube untuk diproses.")
 import yt_dlp
 for i, u in enumerate(urls):
 print(f"\nDownloading audio ({i+1}/{len(urls)}) dari: {u}")
 try:
 ydl_opts = {
 'format': 'bestaudio/best',
 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
 'outtmpl': f'{temp_download_folder}/{project_name}_yt_{i+1}.%(ext)s'
 }
 with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 ydl.download([u])
 downloaded_file = os.path.abspath(f"{temp_download_folder}/{project_name}_yt_{i+1}.wav")

 # Trimming Logic
 if start_time and end_time:
 print(f"Melakukan trimming audio dari {start_time} ke {end_time}...")
 trimmed_file = os.path.abspath(f"{temp_download_folder}/{project_name}_yt_{i+1}_trimmed.wav")
 trim_cmd = f'ffmpeg -i "{downloaded_file}" -ss {start_time} -to {end_time} -c copy "{trimmed_file}"'
 run_command(trim_cmd)
 if os.path.exists(trimmed_file):
 source_audio_paths.append(trimmed_file)
 else:
 print(f"Warning: Gagal melakukan trimming, file akan diproses penuh.")
 source_audio_paths.append(downloaded_file)
 else:
 source_audio_paths.append(downloaded_file)
 except Exception as e:
 print(f"Gagal mendownload atau memproses URL {u}: {e}")
elif dataset == "Drive":
 print(f"Mencari file audio di folder: {drive_path}")
 allowed_extensions = ["*.wav", "*.mp3", "*.flac", "*.m4a"]
 for ext in allowed_extensions:
 source_audio_paths.extend(glob.glob(os.path.join(drive_path, ext)))
 print(f"Ditemukan {len(source_audio_paths)} file audio.")

if not source_audio_paths:
 raise Exception("Tidak ada file audio sumber yang ditemukan. Hentikan proses.")

# === STEP 2: Process Each Audio Source with Demucs ===
all_vocals_paths = []
for idx, audio_input in enumerate(source_audio_paths):
 current_audio_name = os.path.splitext(os.path.basename(audio_input))[0]
 print(f"\n--- Memproses file {idx+1}/{len(source_audio_paths)}: {current_audio_name} ---")
 duration = get_duration(audio_input)
 if duration is None:
 print(f"Melewatkan file karena tidak bisa mendapatkan durasi.")
 continue
 print(f"Durasi audio: {duration:.0f} detik.")

 if duration > chunk_duration:
 print(f"Audio lebih panjang dari {chunk_duration} detik. Melakukan splitting audio menjadi beberapa chunk...")
 chunk_folder = f"chunks/{current_audio_name}"
 os.makedirs(chunk_folder, exist_ok=True)
 split_cmd = f'ffmpeg -hide_banner -loglevel error -i "{audio_input}" -f segment -segment_time {chunk_duration} -c copy "{chunk_folder}/{current_audio_name}_%03d.wav"'
 run_command(split_cmd)
 chunk_files = sorted(glob.glob(f"{chunk_folder}/{current_audio_name}_*.wav"))
 if not chunk_files:
 print(f"Warning: Gagal membuat chunk untuk {current_audio_name}. Melewatkan file ini.")
 continue

 print(f"Memproses {len(chunk_files)} chunk dengan Demucs...")
 chunk_vocals_files = []
 for chunk_file in chunk_files:
 demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} "{chunk_file}"'
 run_command(demucs_cmd)
 base = os.path.splitext(os.path.basename(chunk_file))[0]
 vocals_path = f"separated/{demucs_model}/{base}/vocals.wav"
 if os.path.exists(vocals_path):
 chunk_vocals_files.append(os.path.abspath(vocals_path))
 else:
 print(f"Warning: Hasil Demucs untuk {chunk_file} tidak ditemukan.")

 if not chunk_vocals_files:
 print(f"Warning: Tidak ada vokal yang berhasil diekstrak untuk {current_audio_name}. Melewatkan file ini.")
 continue

 list_file = "chunks_list.txt"
 with open(list_file, "w") as f:
 for file in chunk_vocals_files:
 # <<< FIX: Changed '\\n' to '\n' to create a proper newline character
 f.write(f"file '{file}'\n")

 combined_vocals_path = f"separated/{demucs_model}/{current_audio_name}_vocals.wav"
 concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i "{list_file}" -c copy "{combined_vocals_path}"'
 run_command(concat_cmd)
 print("Penggabungan vokal dari chunk selesai.")

 # Convert combined vocals to the desired output format and sample rate
 final_vocals_output_path = os.path.splitext(combined_vocals_path)[0] + f'.{output_format}'
 print(f"Mengonversi vokal gabungan ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...")
 convert_cmd = f'ffmpeg -hide_banner -loglevel error -i "{combined_vocals_path}"'
 if output_sr != 0:
 convert_cmd += f' -ar {output_sr}'
 convert_cmd += f' -y "{final_vocals_output_path}"'
 run_command(convert_cmd)

 if os.path.exists(final_vocals_output_path):
 if combined_vocals_path != final_vocals_output_path:
 os.remove(combined_vocals_path) # Clean up the intermediate .wav
 all_vocals_paths.append(os.path.abspath(final_vocals_output_path))
 else:
 print(f"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.")
 all_vocals_paths.append(os.path.abspath(combined_vocals_path))
 else:
 print("Memproses audio penuh dengan Demucs...")
 demucs_cmd = f'python3 -m demucs.separate --two-stems vocals -n {demucs_model} -o "separated" --filename "{current_audio_name}/{{stem}}.{{ext}}" "{audio_input}"'
 run_command(demucs_cmd)
 vocals_final_wav = f"separated/{demucs_model}/{current_audio_name}/vocals.wav"

 if os.path.exists(vocals_final_wav):
 # Convert the final vocals to the desired output format and sample rate
 final_vocals_output_path = os.path.splitext(vocals_final_wav)[0] + f'.{output_format}'
 print(f"Mengonversi vokal ke format {output_format.upper()} (Sample Rate: {output_sr if output_sr != 0 else 'asli'})...")
 convert_cmd = f'ffmpeg -hide_banner -loglevel error -i "{vocals_final_wav}"'
 if output_sr != 0:
 convert_cmd += f' -ar {output_sr}'
 convert_cmd += f' -y "{final_vocals_output_path}"'
 run_command(convert_cmd)

 if os.path.exists(final_vocals_output_path):
 if vocals_final_wav != final_vocals_output_path:
 os.remove(vocals_final_wav) # Clean up the original .wav
 all_vocals_paths.append(os.path.abspath(final_vocals_output_path))
 else:
 print(f"Warning: Gagal mengonversi vokal. Menyimpan file .wav asli.")
 all_vocals_paths.append(os.path.abspath(vocals_final_wav))
 else:
 print(f"Warning: Gagal memisahkan vokal untuk {current_audio_name}.")
 continue
 print("Proses pemisahan vokal selesai.")

# === STEP 3: Splitting Vocals (Jika mode = "Splitting") ===
if mode == "Splitting":
 print("\n--- Melakukan Splitting pada Semua Hasil Vokal ---")
 output_slicer_dir = f"dataset/{project_name}"
 os.makedirs(output_slicer_dir, exist_ok=True)
 try:
 import numpy as np
 import librosa
 import soundfile as sf

 def get_rms(y, frame_length=2048, hop_length=512, pad_mode="constant"):
 padding = (int(frame_length // 2), int(frame_length // 2))
 y = np.pad(y, padding, mode=pad_mode)
 axis = -1
 out_strides = y.strides + (y.strides[axis],)
 x_shape_trimmed = list(y.shape)
 x_shape_trimmed[axis] -= frame_length - 1
 out_shape = tuple(x_shape_trimmed) + (frame_length,)
 xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
 if axis < 0:
 target_axis = axis - 1
 else:
 target_axis = axis + 1
 xw = np.moveaxis(xw, -1, target_axis)
 slices = [slice(None)] * xw.ndim
 slices[axis] = slice(0, None, hop_length)
 x = xw[tuple(slices)]
 power = np.mean(np.abs(x)**2, axis=-2, keepdims=True)
 return np.sqrt(power).squeeze(0)

 class Slicer:
 def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):
 if not min_length >= min_interval >= hop_size:
 raise ValueError('min_length >= min_interval >= hop_size harus terpenuhi')
 if not max_sil_kept >= hop_size:
 raise ValueError('max_sil_kept >= hop_size harus terpenuhi')
 min_interval = sr * min_interval / 1000
 self.threshold = 10 ** (threshold/20.)
 self.hop_size = round(sr * hop_size / 1000)
 self.win_size = min(round(min_interval), 4 * self.hop_size)
 self.min_length = round(sr * min_length / 1000 / self.hop_size)
 self.min_interval = round(min_interval / self.hop_size)
 self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)

 def _apply_slice(self, waveform, begin, end):
 return waveform[begin*self.hop_size: min(len(waveform), end*self.hop_size)]

 def slice(self, waveform):
 if len(waveform) <= self.min_length:
 return [waveform]
 rms_list = get_rms(waveform, frame_length=self.win_size, hop_length=self.hop_size)
 sil_tags = []
 silence_start = None
 clip_start = 0
 for i, rms in enumerate(rms_list):
 if rms < self.threshold:
 if silence_start is None:
 silence_start = i
 continue
 if silence_start is None:
 continue
 is_leading_silence = silence_start == 0 and i > self.max_sil_kept
 need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length
 if not is_leading_silence and not need_slice_middle:
 silence_start = None
 continue
 if i - silence_start <= self.max_sil_kept:
 pos = rms_list[silence_start: i+1].argmin() + silence_start
 sil_tags.append((0, pos) if silence_start == 0 else (pos, pos))
 clip_start = pos
 elif i - silence_start <= self.max_sil_kept * 2:
 pos = rms_list[i-self.max_sil_kept: silence_start+self.max_sil_kept+1].argmin() + i-self.max_sil_kept
 pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start
 pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept
 sil_tags.append((0, pos_r) if silence_start == 0 else (min(pos_l, pos), max(pos_r, pos)))
 clip_start = pos_r
 else:
 pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start
 pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept
 sil_tags.append((0, pos_r) if silence_start == 0 else (pos_l, pos_r))
 clip_start = pos_r
 silence_start = None
 total_frames = len(rms_list)
 if silence_start is not None and total_frames - silence_start >= self.min_interval:
 silence_end = min(total_frames, silence_start+self.max_sil_kept)
 pos = rms_list[silence_start: silence_end+1].argmin() + silence_start
 sil_tags.append((pos, total_frames+1))
 if len(sil_tags) == 0:
 return [waveform]
 chunks = []
 if sil_tags[0][0] > 0:
 chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))
 for i in range(len(sil_tags)-1):
 chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i+1][0]))
 if sil_tags[-1][1] < total_frames:
 chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))
 return chunks

 global_chunk_count = 0
 for vocal_file in all_vocals_paths:
 print(f"Slicing {os.path.basename(vocal_file)}...")
 if not os.path.exists(vocal_file):
 print(f" Warning: File vokal tidak ditemukan: {vocal_file}. Melewatkan.")
 continue

 audio, sr = librosa.load(vocal_file, sr=None, mono=True)
 slicer_sr = output_sr if output_sr != 0 else sr

 slicer = Slicer(sr=slicer_sr, threshold=-40, min_length=5000, min_interval=500, hop_size=10, max_sil_kept=500)
 chunks = slicer.slice(audio)
 for chunk in chunks:
 sf.write(f"{output_slicer_dir}/split_{global_chunk_count}.{output_format}", chunk, slicer_sr)
 global_chunk_count += 1
 print(f"\nSplitting selesai. Total {global_chunk_count} file dibuat.")
 except Exception as e:
 print(f"Terjadi kesalahan saat splitting: {e}")
 raise e

# === STEP 4: Copy Hasil ke Google Drive ===
print("\n--- Menyalin Hasil ke Google Drive ---")
base_drive_folder = f"/content/drive/MyDrive/dataset/{project_name}"
vocals_drive_folder = f"{base_drive_folder}/vocals_only"
sliced_drive_folder = f"{base_drive_folder}/sliced_mixed"

os.makedirs(vocals_drive_folder, exist_ok=True)
os.makedirs(sliced_drive_folder, exist_ok=True)

print(f"Menyalin vokal mentah ke: {vocals_drive_folder}")
for vocal_path in all_vocals_paths:
 if os.path.exists(vocal_path):
 shutil.copy(vocal_path, vocals_drive_folder)

if mode == "Splitting":
 print(f"Menyalin dataset yang sudah di-slice ke: {sliced_drive_folder}")
 local_sliced_folder = f"dataset/{project_name}"
 for item in os.listdir(local_sliced_folder):
 s = os.path.join(local_sliced_folder, item)
 d = os.path.join(sliced_drive_folder, item)
 if os.path.isdir(s):
 shutil.copytree(s, d, dirs_exist_ok=True)
 else:
 shutil.copy2(s, d)

# --- Cleanup ---
shutil.rmtree("temp_audio_downloads", ignore_errors=True)
shutil.rmtree("chunks", ignore_errors=True)
shutil.rmtree("separated", ignore_errors=True)
if os.path.exists("chunks_list.txt"):
 os.remove("chunks_list.txt")

print("\nProses selesai!")