Oriented_Barcode_Centernet / gptgenerated_barcode_augment.py
RivianG's picture
Upload 7 files
e08f471 verified
"""
A basic script to augment your dataset by applying random rotation for each image
This will read labels from xml/ folder and images from images/ and will generate
an augmented dataset to images_augmented/ with a csv file train.csv
containing x, y, width, height and angle for each car in the image
"""
import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
dicts = []
idx = 1
# Function to resize image while preserving aspect ratio and padding
def resize_and_pad(image, target_size=(512, 512)):
original_height, original_width = image.shape[:2]
target_width, target_height = target_size
# Calculate the scaling factor
scale = min(target_width / original_width, target_height / original_height)
# Calculate new dimensions
new_width = int(original_width * scale)
new_height = int(original_height * scale)
# Resize the image
resized_image = cv2.resize(image, (new_width, new_height))
# Pad the image to the target size
delta_w = target_width - new_width
delta_h = target_height - new_height
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
left, right = delta_w // 2, delta_w - (delta_w // 2)
padded_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
return padded_image, scale, left, top
# Loop over xml files
for label_path in tqdm(sorted(os.listdir("xml"))):
mytree = ET.parse(os.path.join("xml", label_path))
root = mytree.getroot()
target_size = (512,512)
# Read image with OpenCV
img = cv2.imread(root.find("path").text)
padded_image, scale, offset_x, offset_y = resize_and_pad(img, target_size)
img = padded_image
# Generate random rotation (identity + 9 rotations)
rand_rotations = np.random.uniform(low=-180, high=180, size=(9,))
rand_rotations = np.append(rand_rotations, [0.0])
for rot_angle in rand_rotations:
image_center = tuple(np.array(img.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, rot_angle, 1.0)
# Rotate image around image center
img_rotated = cv2.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv2.INTER_LINEAR)
# Parse xml file
for robndbox in root.findall("object/robndbox"):
x = float(robndbox.find("cx").text) * scale + offset_x
y = float(robndbox.find("cy").text) * scale + offset_y
w = float(robndbox.find("w").text) * scale
h = float(robndbox.find("h").text) * scale
angle = float(robndbox.find("angle").text) # between 0 and 2*PI
# Correct bbox position by applying rotation matrix
correct = np.dot(rot_mat, np.array([x, y, 1]).reshape(3, 1))
# It can happen that this random rotation pushes objects out of bounds
if not 0 <= correct[0] < img.shape[1] or not 0 <= correct[1] < img.shape[0]:
continue
# Add rotation angle
angle = np.pi / 2 - angle + np.pi / 180 * rot_angle
if angle > np.pi:
angle -= 2 * np.pi
elif angle <= -np.pi:
angle += 2 * np.pi
# Resulting angle is between -PI and +PI
assert -np.pi <= angle <= np.pi
# Convert width of labelImg2 to length of the vehicle
# Convert height of labelImg2 to "width" of the vehicle
dicts.append(
{
"name": f"image_{idx:04d}",
"img_width": img.shape[1], # Corrected to get width
"img_height": img.shape[0], # Corrected to get height
"x": int(correct[0]),
"y": int(correct[1]),
"w": f"{h:.2f}",
"l": f"{w:.2f}",
"angle": angle,
}
)
cv2.imwrite(f"images_augmented/image_{idx:04d}.png", img_rotated)
idx += 1
debug = False # Set to True to debug it
if debug:
for car in dicts:
if car["name"] == f"image_{idx-1:04d}":
print(car)
cos_angle = np.cos(car["angle"])
sin_angle = np.sin(car["angle"])
rot = np.array([[cos_angle, sin_angle], [-sin_angle, cos_angle]])
w = float(car["w"])
l = float(car["l"])
bottom_right = np.dot(rot, np.array([w / 2, l / 2]).reshape(2, 1)).reshape(2)
top_right = np.dot(rot, np.array([w / 2, -l / 2]).reshape(2, 1)).reshape(2)
top_left = np.dot(rot, np.array([-w / 2, -l / 2]).reshape(2, 1)).reshape(2)
bottom_left = np.dot(rot, np.array([-w / 2, l / 2]).reshape(2, 1)).reshape(2)
br = (int(car["x"] + bottom_right[0]), int(car["y"] + bottom_right[1]))
tr = (int(car["x"] + top_right[0]), int(car["y"] + top_right[1]))
tl = (int(car["x"] + top_left[0]), int(car["y"] + top_left[1]))
bl = (int(car["x"] + bottom_left[0]), int(car["y"] + bottom_left[1]))
thickness = 3
cv2.line(img_rotated, br, tr, (0, 220, 0), thickness) # Draw front of the vehicle in another color
cv2.line(img_rotated, br, bl, (220, 220, 0), thickness)
cv2.line(img_rotated, tl, bl, (220, 220, 0), thickness)
cv2.line(img_rotated, tl, tr, (220, 220, 0), thickness)
cv2.imshow("Img", img_rotated)
k = cv2.waitKey(0)
if k == 27:
cv2.destroyAllWindows()
break
# Save targets to csv file
df = pd.DataFrame(dicts)
df.to_csv("train.csv", index=False)