import os import argparse from glob import glob from PIL import Image, ImageEnhance import random def augment_image(img): if random.random() < 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) if random.random() < 0.5: img = img.rotate(random.uniform(-5, 5)) if random.random() < 0.5: enhancer = ImageEnhance.Brightness(img) img = enhancer.enhance(random.uniform(0.8, 1.2)) return img def main(): parser = argparse.ArgumentParser() parser.add_argument("--data") parser.add_argument("--output_data") args = parser.parse_args() input_dir = args.data output_dir = args.output_data size = (64, 64) print("Input:", input_dir) print("Output:", output_dir) os.makedirs(output_dir, exist_ok=True) # recursively search for images all_images = [] for ext in ("*.jpg", "*.jpeg", "*.png"): all_images.extend(glob(os.path.join(input_dir, "**", ext), recursive=True)) print(f"Total images found: {len(all_images)}") if len(all_images) == 0: print("⚠ No images found. Check dataset structure.") return for img_path in all_images: try: # class = parent folder name (AzureML-friendly) class_name = os.path.basename(os.path.dirname(img_path)) out_class_dir = os.path.join(output_dir, class_name) os.makedirs(out_class_dir, exist_ok=True) img = Image.open(img_path) img_resized = img.resize(size) base_name = os.path.basename(img_path) out_path = os.path.join(out_class_dir, base_name) img_resized.save(out_path) aug_img = augment_image(img_resized) aug_out_path = os.path.join(out_class_dir, f"aug_{base_name}") aug_img.save(aug_out_path) except Exception as e: print("ERROR:", img_path, e) print("✔ Data prep completed.") if __name__ == "__main__": main()