|
|
import sys
|
|
|
from pathlib import Path
|
|
|
|
|
|
sys.path.append(str(Path(__file__).resolve().parents[1]))
|
|
|
|
|
|
import torch
|
|
|
import numpy as np
|
|
|
from torch.utils.data import DataLoader, Dataset, random_split, WeightedRandomSampler
|
|
|
from torchvision import datasets, transforms
|
|
|
from src import config
|
|
|
import matplotlib.pyplot as plt
|
|
|
import warnings
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MEAN = [0.485, 0.456, 0.406]
|
|
|
STD = [0.229, 0.224, 0.225]
|
|
|
|
|
|
|
|
|
|
|
|
train_transform = transforms.Compose([
|
|
|
transforms.Resize((config.IMAGE_SIZE + 32, config.IMAGE_SIZE + 32)),
|
|
|
transforms.RandomCrop((config.IMAGE_SIZE, config.IMAGE_SIZE), padding=4),
|
|
|
transforms.RandomHorizontalFlip(p=0.5),
|
|
|
transforms.RandomVerticalFlip(p=0.2),
|
|
|
transforms.RandomRotation(degrees=15),
|
|
|
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
|
|
|
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=5),
|
|
|
|
|
|
|
|
|
transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
|
|
|
transforms.RandomErasing(p=0.2, scale=(0.02, 0.33), ratio=(0.3, 3.3)),
|
|
|
|
|
|
|
|
|
|
|
|
transforms.TrivialAugmentWide(num_magnitude_bins=31),
|
|
|
|
|
|
|
|
|
transforms.ToTensor(),
|
|
|
transforms.Normalize(mean=MEAN, std=STD)
|
|
|
])
|
|
|
|
|
|
|
|
|
val_transform = transforms.Compose([
|
|
|
transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
|
|
|
transforms.ToTensor(),
|
|
|
transforms.Normalize(mean=MEAN, std=STD)
|
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TransformedDataset(Dataset):
|
|
|
"""Wrapper Dataset untuk menerapkan transformasi ke Subset."""
|
|
|
def __init__(self, subset, transform=None):
|
|
|
self.subset = subset
|
|
|
self.transform = transform
|
|
|
|
|
|
def __getitem__(self, index):
|
|
|
|
|
|
try:
|
|
|
x, y = self.subset[index]
|
|
|
|
|
|
|
|
|
if self.transform:
|
|
|
x = self.transform(x)
|
|
|
|
|
|
return x, y
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Warning] Error pada index {index}: {e}")
|
|
|
|
|
|
next_index = (index + 1) % len(self.subset)
|
|
|
return self.__getitem__(next_index)
|
|
|
|
|
|
def __len__(self):
|
|
|
return len(self.subset)
|
|
|
|
|
|
|
|
|
|
|
|
def create_dataloaders():
|
|
|
"""
|
|
|
Fungsi utama untuk membuat dan mengembalikan data loader
|
|
|
untuk training dan validasi.
|
|
|
"""
|
|
|
|
|
|
data_path = Path(config.DATA_PATH)
|
|
|
if not data_path.exists():
|
|
|
project_root = Path(__file__).resolve().parents[1]
|
|
|
alt_names = ["Batik_Indonesia_JPG", "Batik-Indonesia", "Batik_Indonesia", "data", "dataset"]
|
|
|
found = None
|
|
|
for name in alt_names:
|
|
|
candidate = project_root / name
|
|
|
if candidate.exists() and candidate.is_dir():
|
|
|
found = candidate
|
|
|
break
|
|
|
if found:
|
|
|
print(f"[Data] config.DATA_PATH '{config.DATA_PATH}' tidak ditemukan. Menggunakan alternatif: {found}")
|
|
|
|
|
|
try:
|
|
|
config.DATA_PATH = str(found)
|
|
|
except Exception:
|
|
|
pass
|
|
|
data_path = found
|
|
|
else:
|
|
|
raise FileNotFoundError(
|
|
|
f"config.DATA_PATH='{config.DATA_PATH}' tidak ditemukan. "
|
|
|
f"Pastikan folder dataset ada atau set config.DATA_PATH ke path yang benar."
|
|
|
)
|
|
|
|
|
|
|
|
|
print(f"[Data] Memuat dataset induk dari: {data_path}")
|
|
|
full_dataset = datasets.ImageFolder(str(data_path))
|
|
|
|
|
|
|
|
|
class_names = full_dataset.classes
|
|
|
num_classes = len(class_names)
|
|
|
print(f"[Data] Ditemukan {num_classes} kelas: {class_names}")
|
|
|
|
|
|
|
|
|
print(f"[Data] Membagi dataset 80:20 (seed: {config.RANDOM_SEED})...")
|
|
|
total_size = len(full_dataset)
|
|
|
val_size = int(total_size * config.TEST_SPLIT_SIZE)
|
|
|
train_size = total_size - val_size
|
|
|
|
|
|
|
|
|
|
|
|
train_dataset_raw, val_dataset_raw = random_split(
|
|
|
full_dataset,
|
|
|
[train_size, val_size],
|
|
|
generator=torch.Generator().manual_seed(config.RANDOM_SEED)
|
|
|
)
|
|
|
|
|
|
print(f"[Data] Ukuran Train: {len(train_dataset_raw)} | Ukuran Validasi: {len(val_dataset_raw)}")
|
|
|
|
|
|
|
|
|
train_dataset = TransformedDataset(train_dataset_raw, transform=train_transform)
|
|
|
val_dataset = TransformedDataset(val_dataset_raw, transform=val_transform)
|
|
|
|
|
|
|
|
|
print("[Data] Menghitung bobot untuk mengatasi ketidakseimbangan kelas...")
|
|
|
|
|
|
|
|
|
train_targets = [full_dataset.targets[i] for i in train_dataset_raw.indices]
|
|
|
|
|
|
|
|
|
|
|
|
class_counts = np.bincount(train_targets)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
|
|
|
|
|
|
|
|
|
|
|
|
sample_weights = class_weights[train_targets]
|
|
|
|
|
|
|
|
|
|
|
|
train_sampler = WeightedRandomSampler(
|
|
|
weights=sample_weights,
|
|
|
num_samples=len(sample_weights),
|
|
|
replacement=True
|
|
|
)
|
|
|
|
|
|
print("[Data] WeightedRandomSampler berhasil dibuat.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_loader = DataLoader(
|
|
|
train_dataset,
|
|
|
batch_size=config.BATCH_SIZE,
|
|
|
sampler=train_sampler,
|
|
|
num_workers=2,
|
|
|
pin_memory=False,
|
|
|
shuffle=False
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
val_loader = DataLoader(
|
|
|
val_dataset,
|
|
|
batch_size=config.BATCH_SIZE,
|
|
|
num_workers=2,
|
|
|
pin_memory=False,
|
|
|
shuffle=False
|
|
|
)
|
|
|
|
|
|
print("[Data] Data loader untuk Train dan Validasi siap.")
|
|
|
|
|
|
return train_loader, val_loader, class_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("Menjalankan pengujian data_loader.py...")
|
|
|
|
|
|
|
|
|
train_loader, val_loader, class_names = create_dataloaders()
|
|
|
|
|
|
print(f"\nTotal kelas: {len(class_names)}")
|
|
|
|
|
|
|
|
|
print("\nMengambil 1 batch dari train_loader (untuk tes)...")
|
|
|
with warnings.catch_warnings():
|
|
|
warnings.simplefilter("ignore")
|
|
|
|
|
|
try:
|
|
|
images, labels = next(iter(train_loader))
|
|
|
|
|
|
print(f" > Ukuran batch gambar: {images.shape}")
|
|
|
print(f" > Ukuran batch label: {labels.shape}")
|
|
|
print(f" > Contoh 5 label di batch ini: {labels[:5]}")
|
|
|
|
|
|
|
|
|
img_to_show = images[0].permute(1, 2, 0).numpy()
|
|
|
|
|
|
img_to_show = STD * img_to_show + MEAN
|
|
|
img_to_show = np.clip(img_to_show, 0, 1)
|
|
|
|
|
|
plt.imshow(img_to_show)
|
|
|
plt.title(f"Contoh Gambar (Label: {class_names[labels[0]]})")
|
|
|
plt.axis('off')
|
|
|
plt.show()
|
|
|
|
|
|
print("\n[Sukses] data_loader.py berfungsi dengan baik!")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"\n[Error] Gagal menguji data loader: {e}") |