|
|
import os
|
|
|
import shutil
|
|
|
|
|
|
|
|
|
dataset_resized_path = "C:\\Users\\srira\\Downloads\\dataset-resized\\dataset-resized"
|
|
|
structured_dataset_path = "C:\\Users\\srira\\OneDrive\\Desktop\\AI_PROJ"
|
|
|
|
|
|
|
|
|
splits = ["train", "val", "test"]
|
|
|
categories = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
|
|
|
|
|
|
|
|
|
for split in splits:
|
|
|
split_path = os.path.join(structured_dataset_path, split)
|
|
|
os.makedirs(split_path, exist_ok=True)
|
|
|
for category in categories:
|
|
|
os.makedirs(os.path.join(split_path, category), exist_ok=True)
|
|
|
|
|
|
|
|
|
split_files = {
|
|
|
"train": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_train.txt",
|
|
|
"val": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_val.txt",
|
|
|
"test": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_test.txt"
|
|
|
}
|
|
|
|
|
|
|
|
|
def organize_dataset(split, file_path):
|
|
|
with open(file_path, "r") as f:
|
|
|
for line in f:
|
|
|
parts = line.strip().split()
|
|
|
if len(parts) != 2:
|
|
|
continue
|
|
|
|
|
|
filename, label = parts
|
|
|
label_map = {
|
|
|
"1": "glass",
|
|
|
"2": "paper",
|
|
|
"3": "cardboard",
|
|
|
"4": "plastic",
|
|
|
"5": "metal",
|
|
|
"6": "trash"
|
|
|
}
|
|
|
|
|
|
if label not in label_map:
|
|
|
continue
|
|
|
|
|
|
category = label_map[label]
|
|
|
src_path = os.path.join(dataset_resized_path, category, filename)
|
|
|
dest_path = os.path.join(structured_dataset_path, split, category, filename)
|
|
|
|
|
|
if os.path.exists(src_path):
|
|
|
shutil.copy(src_path, dest_path)
|
|
|
|
|
|
|
|
|
for split, file_path in split_files.items():
|
|
|
organize_dataset(split, file_path)
|
|
|
|
|
|
print("Dataset successfully organized into structured directories.")
|
|
|
|