File size: 2,079 Bytes
2894987 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
import shutil
# Define dataset paths
dataset_resized_path = "C:\\Users\\srira\\Downloads\\dataset-resized\\dataset-resized"
structured_dataset_path = "C:\\Users\\srira\\OneDrive\\Desktop\\AI_PROJ"
# Define dataset splits
splits = ["train", "val", "test"]
categories = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
# Create structured dataset directories
for split in splits:
split_path = os.path.join(structured_dataset_path, split)
os.makedirs(split_path, exist_ok=True)
for category in categories:
os.makedirs(os.path.join(split_path, category), exist_ok=True)
# Define dataset split files
split_files = {
"train": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_train.txt",
"val": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_val.txt",
"test": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_test.txt"
}
# Function to organize dataset based on split files
def organize_dataset(split, file_path):
with open(file_path, "r") as f:
for line in f:
parts = line.strip().split()
if len(parts) != 2:
continue # Skip invalid lines
filename, label = parts
label_map = {
"1": "glass",
"2": "paper",
"3": "cardboard",
"4": "plastic",
"5": "metal",
"6": "trash"
}
if label not in label_map:
continue
category = label_map[label]
src_path = os.path.join(dataset_resized_path, category, filename)
dest_path = os.path.join(structured_dataset_path, split, category, filename)
if os.path.exists(src_path):
shutil.copy(src_path, dest_path)
# Process dataset splits
for split, file_path in split_files.items():
organize_dataset(split, file_path)
print("Dataset successfully organized into structured directories.")
|