kataria_opticals_api / dataset_prep.py
codernotme's picture
commit
a5a6a2e verified
import os
import shutil
dataset_path = "/home/codernotme/Projects/Github/katariaoptics/dataset"
standard_shapes = ["heart", "oblong", "oval", "round", "square"]
def normalize_dataset():
if not os.path.exists(dataset_path):
print(f"Dataset path {dataset_path} does not exist.")
return
# Create standard folders if they don't exist
for shape in standard_shapes:
target_dir = os.path.join(dataset_path, shape)
os.makedirs(target_dir, exist_ok=True)
# Walk through the dataset directory
for item in os.listdir(dataset_path):
item_path = os.path.join(dataset_path, item)
if os.path.isdir(item_path):
lower_name = item.lower()
# If it matches a standard shape but has different case (e.g., "Heart")
if lower_name in standard_shapes and item != lower_name:
target_dir = os.path.join(dataset_path, lower_name)
print(f"Merging {item} into {lower_name}...")
for file_name in os.listdir(item_path):
src_file = os.path.join(item_path, file_name)
dst_file = os.path.join(target_dir, file_name)
# Handle duplicates by renaming
if os.path.exists(dst_file):
base, ext = os.path.splitext(file_name)
dst_file = os.path.join(target_dir, f"{base}_1{ext}")
shutil.move(src_file, dst_file)
# Remove the empty directory
os.rmdir(item_path)
print(f"Removed {item}")
if __name__ == "__main__":
normalize_dataset()