File size: 2,079 Bytes
import os
import shutil

# Define dataset paths
dataset_resized_path = "C:\\Users\\srira\\Downloads\\dataset-resized\\dataset-resized"
structured_dataset_path = "C:\\Users\\srira\\OneDrive\\Desktop\\AI_PROJ"

# Define dataset splits
splits = ["train", "val", "test"]
categories = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]

# Create structured dataset directories
for split in splits:
    split_path = os.path.join(structured_dataset_path, split)
    os.makedirs(split_path, exist_ok=True)
    for category in categories:
        os.makedirs(os.path.join(split_path, category), exist_ok=True)

# Define dataset split files
split_files = {
    "train": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_train.txt",
    "val": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_val.txt",
    "test": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_test.txt"
}

# Function to organize dataset based on split files
def organize_dataset(split, file_path):
    with open(file_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                continue  # Skip invalid lines
            
            filename, label = parts
            label_map = {
                "1": "glass",
                "2": "paper",
                "3": "cardboard",
                "4": "plastic",
                "5": "metal",
                "6": "trash"
            }
            
            if label not in label_map:
                continue
            
            category = label_map[label]
            src_path = os.path.join(dataset_resized_path, category, filename)
            dest_path = os.path.join(structured_dataset_path, split, category, filename)
            
            if os.path.exists(src_path):
                shutil.copy(src_path, dest_path)

# Process dataset splits
for split, file_path in split_files.items():
    organize_dataset(split, file_path)

print("Dataset successfully organized into structured directories.")