File size: 1,974 Bytes
ae51a24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import argparse
from glob import glob
from PIL import Image, ImageEnhance
import random

def augment_image(img):
    if random.random() < 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)
    if random.random() < 0.5:
        img = img.rotate(random.uniform(-5, 5))
    if random.random() < 0.5:
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(random.uniform(0.8, 1.2))
    return img

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data")
    parser.add_argument("--output_data")
    args = parser.parse_args()

    input_dir = args.data
    output_dir = args.output_data
    size = (64, 64)

    print("Input:", input_dir)
    print("Output:", output_dir)

    os.makedirs(output_dir, exist_ok=True)

    # recursively search for images
    all_images = []
    for ext in ("*.jpg", "*.jpeg", "*.png"):
        all_images.extend(glob(os.path.join(input_dir, "**", ext), recursive=True))

    print(f"Total images found: {len(all_images)}")

    if len(all_images) == 0:
        print("⚠ No images found. Check dataset structure.")
        return

    for img_path in all_images:
        try:
            # class = parent folder name (AzureML-friendly)
            class_name = os.path.basename(os.path.dirname(img_path))

            out_class_dir = os.path.join(output_dir, class_name)
            os.makedirs(out_class_dir, exist_ok=True)

            img = Image.open(img_path)
            img_resized = img.resize(size)

            base_name = os.path.basename(img_path)
            out_path = os.path.join(out_class_dir, base_name)

            img_resized.save(out_path)

            aug_img = augment_image(img_resized)
            aug_out_path = os.path.join(out_class_dir, f"aug_{base_name}")
            aug_img.save(aug_out_path)

        except Exception as e:
            print("ERROR:", img_path, e)

    print("✔ Data prep completed.")

if __name__ == "__main__":
    main()