import os import shutil # Define dataset paths dataset_resized_path = "C:\\Users\\srira\\Downloads\\dataset-resized\\dataset-resized" structured_dataset_path = "C:\\Users\\srira\\OneDrive\\Desktop\\AI_PROJ" # Define dataset splits splits = ["train", "val", "test"] categories = ["cardboard", "glass", "metal", "paper", "plastic", "trash"] # Create structured dataset directories for split in splits: split_path = os.path.join(structured_dataset_path, split) os.makedirs(split_path, exist_ok=True) for category in categories: os.makedirs(os.path.join(split_path, category), exist_ok=True) # Define dataset split files split_files = { "train": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_train.txt", "val": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_val.txt", "test": "C:\\Users\\srira\\Downloads\\one-indexed-files-notrash_test.txt" } # Function to organize dataset based on split files def organize_dataset(split, file_path): with open(file_path, "r") as f: for line in f: parts = line.strip().split() if len(parts) != 2: continue # Skip invalid lines filename, label = parts label_map = { "1": "glass", "2": "paper", "3": "cardboard", "4": "plastic", "5": "metal", "6": "trash" } if label not in label_map: continue category = label_map[label] src_path = os.path.join(dataset_resized_path, category, filename) dest_path = os.path.join(structured_dataset_path, split, category, filename) if os.path.exists(src_path): shutil.copy(src_path, dest_path) # Process dataset splits for split, file_path in split_files.items(): organize_dataset(split, file_path) print("Dataset successfully organized into structured directories.")