Oriented_Barcode_Centernet / gptgenerated_barcode_augment.py

Upload 7 files

e08f471 verified 11 months ago

5.74 kB

	"""
	A basic script to augment your dataset by applying random rotation for each image
	This will read labels from xml/ folder and images from images/ and will generate
	an augmented dataset to images_augmented/ with a csv file train.csv
	containing x, y, width, height and angle for each car in the image
	"""
	import os
	import xml.etree.ElementTree as ET
	import pandas as pd
	import numpy as np
	import cv2
	from tqdm import tqdm

	dicts = []
	idx = 1

	# Function to resize image while preserving aspect ratio and padding
	def resize_and_pad(image, target_size=(512, 512)):
	original_height, original_width = image.shape[:2]
	target_width, target_height = target_size

	# Calculate the scaling factor
	scale = min(target_width / original_width, target_height / original_height)

	# Calculate new dimensions
	new_width = int(original_width * scale)
	new_height = int(original_height * scale)

	# Resize the image
	resized_image = cv2.resize(image, (new_width, new_height))

	# Pad the image to the target size
	delta_w = target_width - new_width
	delta_h = target_height - new_height
	top, bottom = delta_h // 2, delta_h - (delta_h // 2)
	left, right = delta_w // 2, delta_w - (delta_w // 2)
	padded_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

	return padded_image, scale, left, top

	# Loop over xml files
	for label_path in tqdm(sorted(os.listdir("xml"))):
	mytree = ET.parse(os.path.join("xml", label_path))
	root = mytree.getroot()

	target_size = (512,512)

	# Read image with OpenCV
	img = cv2.imread(root.find("path").text)

	padded_image, scale, offset_x, offset_y = resize_and_pad(img, target_size)
	img = padded_image

	# Generate random rotation (identity + 9 rotations)
	rand_rotations = np.random.uniform(low=-180, high=180, size=(9,))
	rand_rotations = np.append(rand_rotations, [0.0])

	for rot_angle in rand_rotations:
	image_center = tuple(np.array(img.shape[1::-1]) / 2)
	rot_mat = cv2.getRotationMatrix2D(image_center, rot_angle, 1.0)

	# Rotate image around image center
	img_rotated = cv2.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv2.INTER_LINEAR)

	# Parse xml file
	for robndbox in root.findall("object/robndbox"):
	x = float(robndbox.find("cx").text) * scale + offset_x
	y = float(robndbox.find("cy").text) * scale + offset_y
	w = float(robndbox.find("w").text) * scale
	h = float(robndbox.find("h").text) * scale
	angle = float(robndbox.find("angle").text) # between 0 and 2*PI

	# Correct bbox position by applying rotation matrix
	correct = np.dot(rot_mat, np.array([x, y, 1]).reshape(3, 1))

	# It can happen that this random rotation pushes objects out of bounds
	if not 0 <= correct[0] < img.shape[1] or not 0 <= correct[1] < img.shape[0]:
	continue

	# Add rotation angle
	angle = np.pi / 2 - angle + np.pi / 180 * rot_angle

	if angle > np.pi:
	angle -= 2 * np.pi
	elif angle <= -np.pi:
	angle += 2 * np.pi

	# Resulting angle is between -PI and +PI
	assert -np.pi <= angle <= np.pi

	# Convert width of labelImg2 to length of the vehicle
	# Convert height of labelImg2 to "width" of the vehicle
	dicts.append(
	{
	"name": f"image_{idx:04d}",
	"img_width": img.shape[1], # Corrected to get width
	"img_height": img.shape[0], # Corrected to get height
	"x": int(correct[0]),
	"y": int(correct[1]),
	"w": f"{h:.2f}",
	"l": f"{w:.2f}",
	"angle": angle,
	}
	)

	cv2.imwrite(f"images_augmented/image_{idx:04d}.png", img_rotated)
	idx += 1

	debug = False # Set to True to debug it
	if debug:
	for car in dicts:
	if car["name"] == f"image_{idx-1:04d}":
	print(car)
	cos_angle = np.cos(car["angle"])
	sin_angle = np.sin(car["angle"])
	rot = np.array([[cos_angle, sin_angle], [-sin_angle, cos_angle]])

	w = float(car["w"])
	l = float(car["l"])

	bottom_right = np.dot(rot, np.array([w / 2, l / 2]).reshape(2, 1)).reshape(2)
	top_right = np.dot(rot, np.array([w / 2, -l / 2]).reshape(2, 1)).reshape(2)
	top_left = np.dot(rot, np.array([-w / 2, -l / 2]).reshape(2, 1)).reshape(2)
	bottom_left = np.dot(rot, np.array([-w / 2, l / 2]).reshape(2, 1)).reshape(2)

	br = (int(car["x"] + bottom_right[0]), int(car["y"] + bottom_right[1]))
	tr = (int(car["x"] + top_right[0]), int(car["y"] + top_right[1]))
	tl = (int(car["x"] + top_left[0]), int(car["y"] + top_left[1]))
	bl = (int(car["x"] + bottom_left[0]), int(car["y"] + bottom_left[1]))

	thickness = 3
	cv2.line(img_rotated, br, tr, (0, 220, 0), thickness) # Draw front of the vehicle in another color
	cv2.line(img_rotated, br, bl, (220, 220, 0), thickness)
	cv2.line(img_rotated, tl, bl, (220, 220, 0), thickness)
	cv2.line(img_rotated, tl, tr, (220, 220, 0), thickness)

	cv2.imshow("Img", img_rotated)
	k = cv2.waitKey(0)

	if k == 27:
	cv2.destroyAllWindows()
	break

	# Save targets to csv file
	df = pd.DataFrame(dicts)
	df.to_csv("train.csv", index=False)