Alessia2004's picture
Upload folder using huggingface_hub
ae51a24 verified
import os
import argparse
from glob import glob
from PIL import Image, ImageEnhance
import random
def augment_image(img):
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
if random.random() < 0.5:
img = img.rotate(random.uniform(-5, 5))
if random.random() < 0.5:
enhancer = ImageEnhance.Brightness(img)
img = enhancer.enhance(random.uniform(0.8, 1.2))
return img
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data")
parser.add_argument("--output_data")
args = parser.parse_args()
input_dir = args.data
output_dir = args.output_data
size = (64, 64)
print("Input:", input_dir)
print("Output:", output_dir)
os.makedirs(output_dir, exist_ok=True)
# recursively search for images
all_images = []
for ext in ("*.jpg", "*.jpeg", "*.png"):
all_images.extend(glob(os.path.join(input_dir, "**", ext), recursive=True))
print(f"Total images found: {len(all_images)}")
if len(all_images) == 0:
print("⚠ No images found. Check dataset structure.")
return
for img_path in all_images:
try:
# class = parent folder name (AzureML-friendly)
class_name = os.path.basename(os.path.dirname(img_path))
out_class_dir = os.path.join(output_dir, class_name)
os.makedirs(out_class_dir, exist_ok=True)
img = Image.open(img_path)
img_resized = img.resize(size)
base_name = os.path.basename(img_path)
out_path = os.path.join(out_class_dir, base_name)
img_resized.save(out_path)
aug_img = augment_image(img_resized)
aug_out_path = os.path.join(out_class_dir, f"aug_{base_name}")
aug_img.save(aug_out_path)
except Exception as e:
print("ERROR:", img_path, e)
print("✔ Data prep completed.")
if __name__ == "__main__":
main()