import os import shutil dataset_path = "/home/codernotme/Projects/Github/katariaoptics/dataset" standard_shapes = ["heart", "oblong", "oval", "round", "square"] def normalize_dataset(): if not os.path.exists(dataset_path): print(f"Dataset path {dataset_path} does not exist.") return # Create standard folders if they don't exist for shape in standard_shapes: target_dir = os.path.join(dataset_path, shape) os.makedirs(target_dir, exist_ok=True) # Walk through the dataset directory for item in os.listdir(dataset_path): item_path = os.path.join(dataset_path, item) if os.path.isdir(item_path): lower_name = item.lower() # If it matches a standard shape but has different case (e.g., "Heart") if lower_name in standard_shapes and item != lower_name: target_dir = os.path.join(dataset_path, lower_name) print(f"Merging {item} into {lower_name}...") for file_name in os.listdir(item_path): src_file = os.path.join(item_path, file_name) dst_file = os.path.join(target_dir, file_name) # Handle duplicates by renaming if os.path.exists(dst_file): base, ext = os.path.splitext(file_name) dst_file = os.path.join(target_dir, f"{base}_1{ext}") shutil.move(src_file, dst_file) # Remove the empty directory os.rmdir(item_path) print(f"Removed {item}") if __name__ == "__main__": normalize_dataset()