|
|
|
|
|
|
|
|
import os
|
|
|
import shutil
|
|
|
import random
|
|
|
import math
|
|
|
|
|
|
|
|
|
|
|
|
SOURCE_DIR = 'dataset'
|
|
|
|
|
|
TARGET_DIR = 'data'
|
|
|
|
|
|
TRAIN_RATIO = 0.8
|
|
|
|
|
|
|
|
|
print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...")
|
|
|
|
|
|
|
|
|
if os.path.exists(TARGET_DIR):
|
|
|
shutil.rmtree(TARGET_DIR)
|
|
|
|
|
|
|
|
|
train_path = os.path.join(TARGET_DIR, 'train')
|
|
|
valid_path = os.path.join(TARGET_DIR, 'valid')
|
|
|
os.makedirs(train_path, exist_ok=True)
|
|
|
os.makedirs(valid_path, exist_ok=True)
|
|
|
|
|
|
|
|
|
try:
|
|
|
class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))]
|
|
|
if not class_folders:
|
|
|
raise FileNotFoundError
|
|
|
except FileNotFoundError:
|
|
|
print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.")
|
|
|
print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.")
|
|
|
exit()
|
|
|
|
|
|
print(f"Ditemukan {len(class_folders)} kelas: {class_folders}")
|
|
|
|
|
|
|
|
|
for cls in class_folders:
|
|
|
source_class_path = os.path.join(SOURCE_DIR, cls)
|
|
|
|
|
|
|
|
|
train_class_path = os.path.join(train_path, cls)
|
|
|
valid_class_path = os.path.join(valid_path, cls)
|
|
|
os.makedirs(train_class_path, exist_ok=True)
|
|
|
os.makedirs(valid_class_path, exist_ok=True)
|
|
|
|
|
|
|
|
|
images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
|
|
|
|
|
|
|
|
|
random.shuffle(images)
|
|
|
|
|
|
|
|
|
split_point = math.floor(len(images) * TRAIN_RATIO)
|
|
|
|
|
|
|
|
|
train_images = images[:split_point]
|
|
|
valid_images = images[split_point:]
|
|
|
|
|
|
print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid")
|
|
|
|
|
|
|
|
|
for img in train_images:
|
|
|
shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img))
|
|
|
|
|
|
for img in valid_images:
|
|
|
shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img))
|
|
|
|
|
|
print("\n--- Proses split data selesai! ---")
|
|
|
print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") |