|
|
import os
|
|
|
from datasets import load_dataset
|
|
|
|
|
|
from PIL import Image, UnidentifiedImageError
|
|
|
|
|
|
|
|
|
dataset_name = "muhammadsalmanalfaridzi/Batik-Indonesia"
|
|
|
|
|
|
output_dir = "Batik_Indonesia_JPG"
|
|
|
|
|
|
|
|
|
if not os.path.exists(output_dir):
|
|
|
os.makedirs(output_dir)
|
|
|
|
|
|
|
|
|
print("Memuat dataset...")
|
|
|
|
|
|
dataset = load_dataset(dataset_name, split='train')
|
|
|
print("Dataset dimuat.")
|
|
|
|
|
|
|
|
|
labels = dataset.features['label'].names
|
|
|
|
|
|
|
|
|
print("Memulai proses ekstraksi gambar...")
|
|
|
skipped_files = 0
|
|
|
|
|
|
|
|
|
for item in dataset:
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
gambar: Image.Image = item['image']
|
|
|
label_index = item['label']
|
|
|
|
|
|
|
|
|
label_name = labels[label_index]
|
|
|
|
|
|
|
|
|
class_dir = os.path.join(output_dir, label_name)
|
|
|
if not os.path.exists(class_dir):
|
|
|
os.makedirs(class_dir)
|
|
|
|
|
|
|
|
|
num_existing_files = len(os.listdir(class_dir))
|
|
|
file_name = f"{label_name.replace(' ', '_')}_{num_existing_files + 1}.jpg"
|
|
|
|
|
|
|
|
|
save_path = os.path.join(class_dir, file_name)
|
|
|
|
|
|
|
|
|
if gambar.mode != 'RGB':
|
|
|
gambar = gambar.convert('RGB')
|
|
|
gambar.save(save_path)
|
|
|
|
|
|
|
|
|
|
|
|
except UnidentifiedImageError:
|
|
|
skipped_files += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
skipped_files += 1
|
|
|
print(f"WARNING: Terjadi error lain ({e}). Melewati file...")
|
|
|
|
|
|
|
|
|
print(f"Ekstraksi selesai!")
|
|
|
print(f"Total file yang dilewati (rusak/error): {skipped_files}") |