| """ |
| A basic script to augment your dataset by applying random rotation for each image |
| This will read labels from xml/ folder and images from images/ and will generate |
| an augmented dataset to images_augmented/ with a csv file train.csv |
| containing x, y, width, height and angle for each car in the image |
| """ |
| import os |
| import xml.etree.ElementTree as ET |
| import pandas as pd |
| import numpy as np |
| import cv2 |
| from tqdm import tqdm |
|
|
| dicts = [] |
| idx = 1 |
|
|
| |
| def resize_and_pad(image, target_size=(512, 512)): |
| original_height, original_width = image.shape[:2] |
| target_width, target_height = target_size |
|
|
| |
| scale = min(target_width / original_width, target_height / original_height) |
| |
| |
| new_width = int(original_width * scale) |
| new_height = int(original_height * scale) |
| |
| |
| resized_image = cv2.resize(image, (new_width, new_height)) |
| |
| |
| delta_w = target_width - new_width |
| delta_h = target_height - new_height |
| top, bottom = delta_h // 2, delta_h - (delta_h // 2) |
| left, right = delta_w // 2, delta_w - (delta_w // 2) |
| padded_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) |
| |
| return padded_image, scale, left, top |
|
|
| |
| for label_path in tqdm(sorted(os.listdir("xml"))): |
| mytree = ET.parse(os.path.join("xml", label_path)) |
| root = mytree.getroot() |
|
|
| target_size = (512,512) |
|
|
| |
| img = cv2.imread(root.find("path").text) |
| |
| padded_image, scale, offset_x, offset_y = resize_and_pad(img, target_size) |
| img = padded_image |
|
|
| |
| rand_rotations = np.random.uniform(low=-180, high=180, size=(9,)) |
| rand_rotations = np.append(rand_rotations, [0.0]) |
|
|
| for rot_angle in rand_rotations: |
| image_center = tuple(np.array(img.shape[1::-1]) / 2) |
| rot_mat = cv2.getRotationMatrix2D(image_center, rot_angle, 1.0) |
|
|
| |
| img_rotated = cv2.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv2.INTER_LINEAR) |
|
|
| |
| for robndbox in root.findall("object/robndbox"): |
| x = float(robndbox.find("cx").text) * scale + offset_x |
| y = float(robndbox.find("cy").text) * scale + offset_y |
| w = float(robndbox.find("w").text) * scale |
| h = float(robndbox.find("h").text) * scale |
| angle = float(robndbox.find("angle").text) |
|
|
| |
| correct = np.dot(rot_mat, np.array([x, y, 1]).reshape(3, 1)) |
|
|
| |
| if not 0 <= correct[0] < img.shape[1] or not 0 <= correct[1] < img.shape[0]: |
| continue |
|
|
| |
| angle = np.pi / 2 - angle + np.pi / 180 * rot_angle |
|
|
| if angle > np.pi: |
| angle -= 2 * np.pi |
| elif angle <= -np.pi: |
| angle += 2 * np.pi |
|
|
| |
| assert -np.pi <= angle <= np.pi |
|
|
| |
| |
| dicts.append( |
| { |
| "name": f"image_{idx:04d}", |
| "img_width": img.shape[1], |
| "img_height": img.shape[0], |
| "x": int(correct[0]), |
| "y": int(correct[1]), |
| "w": f"{h:.2f}", |
| "l": f"{w:.2f}", |
| "angle": angle, |
| } |
| ) |
|
|
| cv2.imwrite(f"images_augmented/image_{idx:04d}.png", img_rotated) |
| idx += 1 |
|
|
| debug = False |
| if debug: |
| for car in dicts: |
| if car["name"] == f"image_{idx-1:04d}": |
| print(car) |
| cos_angle = np.cos(car["angle"]) |
| sin_angle = np.sin(car["angle"]) |
| rot = np.array([[cos_angle, sin_angle], [-sin_angle, cos_angle]]) |
|
|
| w = float(car["w"]) |
| l = float(car["l"]) |
|
|
| bottom_right = np.dot(rot, np.array([w / 2, l / 2]).reshape(2, 1)).reshape(2) |
| top_right = np.dot(rot, np.array([w / 2, -l / 2]).reshape(2, 1)).reshape(2) |
| top_left = np.dot(rot, np.array([-w / 2, -l / 2]).reshape(2, 1)).reshape(2) |
| bottom_left = np.dot(rot, np.array([-w / 2, l / 2]).reshape(2, 1)).reshape(2) |
|
|
| br = (int(car["x"] + bottom_right[0]), int(car["y"] + bottom_right[1])) |
| tr = (int(car["x"] + top_right[0]), int(car["y"] + top_right[1])) |
| tl = (int(car["x"] + top_left[0]), int(car["y"] + top_left[1])) |
| bl = (int(car["x"] + bottom_left[0]), int(car["y"] + bottom_left[1])) |
|
|
| thickness = 3 |
| cv2.line(img_rotated, br, tr, (0, 220, 0), thickness) |
| cv2.line(img_rotated, br, bl, (220, 220, 0), thickness) |
| cv2.line(img_rotated, tl, bl, (220, 220, 0), thickness) |
| cv2.line(img_rotated, tl, tr, (220, 220, 0), thickness) |
|
|
| cv2.imshow("Img", img_rotated) |
| k = cv2.waitKey(0) |
|
|
| if k == 27: |
| cv2.destroyAllWindows() |
| break |
|
|
| |
| df = pd.DataFrame(dicts) |
| df.to_csv("train.csv", index=False) |
|
|