Spaces:

Shayak03tintcsbs
/

app

Configuration error

App Files Files Community

app / utils.py

Shayak03tintcsbs

Upload 9 files

36d2aa7 over 2 years ago

raw

history blame contribute delete

31.5 kB

	import os
	from time import localtime, strftime, time
	from pathlib import Path
	import hashlib
	import requests

	import numpy as np
	import torch
	from matplotlib import cm as colormap
	from matplotlib import pyplot as plt
	from matplotlib.colors import Normalize
	from PIL import Image, ImageDraw, ImageFont

	YOLOV3_WEIGHTS_PATH = 'https://pjreddie.com/media/files/yolov3.weights'
	YOLOV3_WEIGHTS_MD5 = 'c84e5b99d0e52cd466ae710cadf6d84c'

	def md5_hash(path):
	with open(path, "rb") as f:
	content = f.read()
	return hashlib.md5(content).hexdigest()

	def check_if_file_exists_else_download(path, chunk_size=1024):
	path = Path(path)
	if not path.exists() or (md5_hash(path) != YOLOV3_WEIGHTS_MD5):
	print(path, 'does not exist or md5sum is incorrect downloading...')
	path.parent.mkdir(exist_ok=True, parents=True)
	with requests.get(YOLOV3_WEIGHTS_PATH, stream=True) as r:
	total_size = int(r.headers.get('content-length', 0))
	with open(path, 'wb') as f:
	for data in r.iter_content(chunk_size=chunk_size):
	if data:
	f.write(data)
	print('downloaded from', YOLOV3_WEIGHTS_PATH, 'md5 of the file:', md5_hash(path))
	return path

	def parse_cfg(file):
	'''
	Parses the original cfg file

	Argument
	--------
	file: str
	A path to cfg file.

	Output
	------
	layers: list
	A list of dicts with config for each layer.
	Note: the 0th element of the list contain config for network itself
	'''

	layers = []
	layer = {}

	with open(file, 'r') as readf:
	lines = readf.read().split('\n')
	# skip commented lines
	lines = [line for line in lines if not line.startswith('#')]
	# skip empty lines
	lines = [line for line in lines if not len(line) == 0]
	# remove all whitespaces
	lines = [line.replace(' ', '') for line in lines]

	for line in lines:

	# if the name of the layer (they are of form : [*])
	if line.startswith('[') and line.endswith(']'):

	# save the prev. layer as the next lines contains info for the next layer
	if len(layer) > 0:
	layers.append(layer)
	layer = {}

	# add the layer's name/type
	layer['name'] = line.replace('[', '').replace(']', '')

	# if not the name then parse agruments
	else:
	# all arguments follows the pattern: 'key=value'
	key, value = line.split('=')
	# add info to the layer
	layer[key] = value

	# append the last layer
	layers.append(layer)

	return layers

	def get_center_coords(bboxes): #top_left_x, top_left_y, box_w, box_h
	'''
	Given the bboxes with top-left coordinates transforms the bboxes
	with center coordinates.

	Argument
	--------
	bboxes: torch.FloatTensor
	A tensor of size (P, D) where D should contain info about the coords
	in the following order (top_left_x, top_left_y, width, height).
	Note: D can be higher than 4.

	Output
	------
	bboxes: torch.FloatTensor
	The similar to the tensor specified in the input but with center
	coordinates in 0th and 1st columns.
	'''
	bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] // 2
	bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] // 2
	return bboxes

	def get_corner_coords(bboxes):
	'''
	Transforms the bounding boxes coordinate from (center_x, center_y, w, h) into
	(top_left_x, top_left_y, bottom_right_x, bottom_right_y),
	i.e. into corner coordinates.

	Argument
	--------
	bboxes: torch.FloatTensor
	A tensor of size (P, D) where D should contain info about the coords
	in the following order (center_x, center_y, width, height). Note:
	D can be higher than 4.

	Outputs
	-------
	top_left_x, top_left_y, bottom_right_x, bottom_right_y: torch.FloatTensors
	Transformed coordinates for bboxes: top-left corner coordinates for x and y
	and bottom-right coordinates for x and y respectively.
	'''
	top_left_x = bboxes[:, 0] - bboxes[:, 2]/2
	top_left_y = bboxes[:, 1] - bboxes[:, 3]/2
	bottom_right_x = bboxes[:, 0] + bboxes[:, 2]/2
	bottom_right_y = bboxes[:, 1] + bboxes[:, 3]/2

	return top_left_x, top_left_y, bottom_right_x, bottom_right_y

	def iou_vectorized(bboxes1, bboxes2, without_center_coords=False):
	'''
	Calculates intersection over union between every bbox in bboxes1 with
	every bbox in bboxes2, i.e. Cartesian product of both sets.

	Arguments
	---------
	bboxes1: torch.FloatTensor
	(M, 4 + ) shapped tensor with M bboxes with 4 bbox coordinates (cx, cy, w, h, ).
	bboxes2: torch.FloatTensor
	(N, 4 + ) shapped tensor with M bboxes with 4 bbox coordinates (cx, cy, w, h, ).
	without_center_coords: bool
	True: IoU is calculated only using width and height (no center coordinates).
	It is useful on training when the best bbox is selected to replace the gt bbox.
	Note: bboxes1 and bboxes2 are expected to have (M, 2 + ) and (N, 2 + ), respectively.

	Output
	------
	: torch.FloatTensor
	(M, N) shapped tensor with (i, j) corresponding to IoU between i-th bbox
	from bboxes1 with j-th bbox from bboxes2.
	'''
	# pixel shift is 0 if we calculate without center coordinates and 1 otherwise.
	# Why? Let's say I want to calculate the number of pixels the width of a box
	# overlaps given two x coordinates for pixels: 0 and 5. So, the side is 6 pixels
	# but 5 - 0 = 5. Therefore, we add 1.
	# However, we don't need to do it when we don't have center coordinates
	# i.e. without_center_coords = True
	px_shift = 1

	# add 'fake' center coordinates. You can use any value, we use zeros
	if without_center_coords:
	bboxes1 = torch.cat([torch.zeros_like(bboxes1[:, :2]), bboxes1], dim=1)
	bboxes2 = torch.cat([torch.zeros_like(bboxes2[:, :2]), bboxes2], dim=1)
	px_shift = 0

	M, D = bboxes1.shape
	N, D = bboxes2.shape

	# Transform coords of the 1st bboxes (y=0 is at the top, and increases downwards)
	top_left_x1, top_left_y1, bottom_right_x1, bottom_right_y1 = get_corner_coords(bboxes1)
	# Transform coords of the 2nd bboxes
	top_left_x2, top_left_y2, bottom_right_x2, bottom_right_y2 = get_corner_coords(bboxes2)

	# broadcasting 1st bboxes
	top_left_x1 = top_left_x1.view(M, 1)
	top_left_y1 = top_left_y1.view(M, 1)
	bottom_right_x1 = bottom_right_x1.view(M, 1)
	bottom_right_y1 = bottom_right_y1.view(M, 1)
	# broadcasting 2nd bboxes
	top_left_x2 = top_left_x2.view(1, N)
	top_left_y2 = top_left_y2.view(1, N)
	bottom_right_x2 = bottom_right_x2.view(1, N)
	bottom_right_y2 = bottom_right_y2.view(1, N)

	# calculate coords for intersection
	inner_top_left_x = torch.max(top_left_x1, top_left_x2)
	inner_top_left_y = torch.max(top_left_y1, top_left_y2)
	inner_bottom_right_x = torch.min(bottom_right_x1, bottom_right_x2)
	inner_bottom_right_y = torch.min(bottom_right_y1, bottom_right_y2)

	# area = side_a * side_b
	# clamp(x, min=0) = max(x, 0)
	# we make sure that the area is 0 if size of a side is negative
	# which means that inner_top_left_x > inner_bottom_right_x which is not feasible
	# Note: adding one because the coordinates starts at 0 and let's
	a = torch.clamp(inner_bottom_right_x - inner_top_left_x + px_shift, min=0)
	b = torch.clamp(inner_bottom_right_y - inner_top_left_y + px_shift, min=0)
	inner_area = a * b

	# finally we calculate union for each pair of bboxes
	out_area1 = (bottom_right_x1 - top_left_x1 + px_shift) * (bottom_right_y1 - top_left_y1 + px_shift)
	out_area2 = (bottom_right_x2 - top_left_x2 + px_shift) * (bottom_right_y2 - top_left_y2 + px_shift)
	out_area = out_area1 + out_area2 - inner_area

	return inner_area / out_area

	def objectness_filter_and_nms(predictions, classes, obj_thresh=0.8, nms_thresh=0.4):
	'''
	Performs filtering according objectness score and non-maximum supression on predictions.

	Arguments
	---------
	predictions: torch.FloatTensor
	A tensor of size (B, P, 5+classes) with predictions.
	B -- batch size; P -- number of predictions for an image,
	i.e. 3 scales and 3 anchor boxes and
	For example: P = (1313 + 2626 + 5252) 3 = 10647;
	5 + classes -- (cx, cy, w, h, obj_score, {prob_class}).
	classes: int
	An integer with the number of classes to detect.
	obj_thresh: float
	A float that corresponds to the lowest objectness score the detector allows.
	nms_thresh: float
	Corresponds to the highest IoU the detector allows.

	Output
	------
	predictions: torch.FloatTensor or None
	Predictions after objectness filtering and non-max supression (same size
	as predictions in arguments but with a different P). Returns None when
	there no detections found.
	'''

	# iterate for images in a batch
	for i, prediction in enumerate(predictions):
	## objectness thresholding

	# If prediction's (bbox') score is higher than obj_thress keep the prediction
	# the fourth (fifth) element is objectness score; if there are no
	# detections with obj score higher than obj_thresh, return None
	objectness_mask = (prediction[:, 4] > obj_thresh)

	if len(torch.nonzero(objectness_mask)) == 0:
	return None

	prediction = prediction[objectness_mask]

	# if no object on an image found, continue with the next image
	if prediction.size(0) == 0:
	continue

	## non-max supression
	# The idea is as follows. If a prediction "survived" objectness filtering
	# then it is considered meaningful. Since we may have multiple detections of
	# one object on an image we need to filter out those predictions that have
	# substantial (more than nms_thresh) overlap, or IoU, with the box with highest
	# class score. Also note that one image might contact more than object of the same class.
	# So, as they don't have high IoU with the box with highest class score, they will be kept
	# in the list of predictions

	# for each prediction we save the class with the maximum class score
	pred_score, pred_classes = torch.max(prediction[:, 5:5+classes], dim=-1)

	# we are going to iterate through classes, so, first, we select the set of unique classes
	unique_classes = pred_classes.unique().float()

	# initialize the list of filtered detections
	detections_after_nms = []

	for cls in unique_classes:
	# select only the entries for a specific class.
	# pred_classes is of torch.LongTensor type but we need torch.FloatTensor
	prediction_4_cls = prediction[pred_classes.float() == cls]
	# then we sort predictions for a specific class by objectness score (high -> low)
	sort_pred_idxs = torch.sort(prediction_4_cls[:, 4], descending=True)[1]
	prediction_4_cls = prediction_4_cls[sort_pred_idxs]

	# next we want to fill out detections_after_nms with only with those objects
	# that has a unique position, i.e. low IoU with other predictions.
	# The idea here is to append (save) the first prediction in the prediction_4_cls
	# and calculate IoUs with the rest predictions in that prediction_4_cls of the
	# ordered list. Next, the predictions with the high IoU
	# with the first prediction in prediction_4_cls will be discarded.
	# For the next iteration, the first prediction will be the prediction with
	# the highest obj score among the ones that are left.
	# exit the loop when there is no prediction left after the nms
	while len(prediction_4_cls) > 0:
	# we append the first prediction for a specific class to the list of predictions.
	# We can do this because we ordered the prediction_4_cls beforehand.
	detections_after_nms.append(prediction_4_cls[0].unsqueeze(0))

	# also stop when this is the last prediction in prediction_4_cls
	if len(prediction_4_cls) == 1:
	break

	# calculate IoUs with the first pred in prediction_4_cls and the rest of them
	ious = iou_vectorized(prediction_4_cls[0, :5].unsqueeze(0), prediction_4_cls[1:, :5])
	# when iou_vectorized inputs two tensors, the ious.shape is (N, M) but now N = 1
	# and [ious < nms_thresh] should be one dimesional
	ious = ious.reshape(-1)
	# filter out the first prediction (1:) and the ones with high IoU with the 0th pred
	prediction_4_cls = prediction_4_cls[1:][ious < nms_thresh]

	# as detections_after_nms is a list, we concatenate its elements to a tensor
	predictions = torch.cat(detections_after_nms)

	return predictions

	def scale_numbers(num1, num2, largest_num_target):
	'''
	Scales two numbers (for example, dimensions) keeping aspect ratio.

	Arguments
	---------
	num1: float or int
	The 1st number (dim1).
	num2: float or int
	The 2nd number (dim2).
	largest_num_target: int
	The expected size of the largest number among 1st and 2nd numbers.

	Outputs
	-------
	(int, int, float)
	Two scaled numbers such that the largest is equal to largest_num_target
	maintaining the same aspect ratio as num1 and num2 in input. Also,
	returns a scalling coefficient.
	Note: two ints are returned.

	Examples
	--------
	scale_numbers(832, 832, 416) -> (416, 416, 0.5)
	scale_numbers(223, 111, 416) -> (416, 207, 1.865...)
	scale_numbers(100, 200, 416) -> (208, 416, 2.08)
	scale_numbers(200, 832, 416) -> (100, 416, 0.5)
	'''
	# make sure the arguments are of correct types
	assert isinstance(largest_num_target, int), 'largest_num_target should be "int"'

	# to make the largest number to be equal largest_num_target keeping aspect ratio
	# we need, first, to estimate by how much the largest number is smaller (larger)
	# than largest_num_target and, second, to scale both numbers by this ratio.

	# select the maximum among two numbers
	max_num = max(num1, num2)
	# calculate scalling coefficient
	scale_coeff = largest_num_target / max_num
	# scale both numbers
	num1 = num1 * scale_coeff
	num2 = num2 * scale_coeff

	return round(num1), round(num2), scale_coeff

	def letterbox_pad(img, color=127.5):
	'''
	Adds padding to an image according to the original implementation of darknet.
	Specifically, it will pad the image up to (net_input_size x net_input_size) size.

	Arguments
	---------
	img: numpy.ndarray
	An image to pad.
	color: (float or int) \in [0, 255]
	The RGB intensity. The image will be padded with this color.

	Output
	------
	img: numpy.ndarray
	The padded image.
	pad_sizes: (int, int, int, int)
	The sizes of paddings. Used in show_prediction module where we need to shift
	predictions by the size of the padding. order: top, bottom, left, right
	'''
	# make sure the arguments are of correct types
	assert isinstance(img, np.ndarray), '"img" should have numpy.ndarray type'
	# assert isinstance(net_input_size, int), '"net_input_size" should have int type'
	assert isinstance(color, (int, float)), '"color" should be an int or float'

	H, W, C = img.shape
	max_side_len = max(H, W)

	# if width is higher than height then, to make a squared-shaped image, we need
	# to pad the height; else, we need to pad width.
	if W > H:
	# calculates how much should be padded "on top" which is a half of
	# the difference between the target size and the current height
	pad_top = (max_side_len - H) // 2
	# another half is added to the bottom
	pad_bottom = max_side_len - (H + pad_top)
	pad_left = 0
	pad_right = 0

	else:
	pad_top = 0
	pad_bottom = 0
	# calculates how much should be padded "on left" which is a half of
	# the difference between the target size and the current width
	pad_left = (max_side_len - W) // 2
	pad_right = max_side_len - (W + pad_left)

	# pad_widths should contain three pairs (because of 3d) of padding sizes:
	# first pair adds rows [from top and bottom],
	# second adds columns [from left to right],
	# the third adds nothing because we pad only spatially, not channel-wise
	pad_widths = [[pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]
	# for each padding we specify a color (constant parameter)
	color = [[color, color], [color, color], [0, 0]]
	# perform padding
	img = np.pad(img, pad_widths, 'constant', constant_values=color)
	# save padding sizes
	pad_sizes = (pad_top, pad_bottom, pad_left, pad_right)

	return img, pad_sizes

	def fix_orientation_if_needed(pil_img, orientation):
	'''
	Motivation: sometimes when a user uploads a photo from their phone the
	photo is rotated by 90 deg even though it looks fine on the phone. This
	functionfixes this problem by correcting the orientation by employing info
	from EXIF. For more info regarding this issue, please see:
	https://magnushoff.com/jpeg-orientation.html

	Argument
	--------
	pil_img: PIL.Image.Image
	The target image.
	orientation: str
	Orientation which front-end tries to extract from EXIF of an image.
	Can be 'undefined' or some integer which can be used to orient the image.

	Output
	------
	pil_img: PIL.Image.Image
	The original image with the fixed orientation or the same image if
	no EXIF info is available
	'''

	# if expand is False the dimension of the image remains the same
	if orientation == '3':
	pil_img = pil_img.rotate(180, expand=True)

	elif orientation == '6':
	pil_img = pil_img.rotate(270, expand=True)

	elif orientation == '8':
	pil_img = pil_img.rotate(90, expand=True)

	return pil_img

	# TODO: test for different devices
	def predict_and_save(source_img, model, device, labels_path, font_path, orientation, show=False, save=True):
	'''
	Performs inference on an image and saves the image with bounding boxes drawn on it.

	Arguments
	---------
	source_img: PIL.Image.Image or str
	The image to perform inference on.
	model: Darknet
	The model which will be used for inference.
	device: torch.device or str
	Device for calculations.
	labels_path: str
	The path to the object names.
	font_path: str
	The path to the font which is going to be used to tag bounding boxes.
	orientation: str
	Orientation which front-end tries to extract from EXIF of an image.
	Can be 'undefined' or some integer which can be used to orient the image.
	Used in fix_orientation_if_needed().
	show: bool
	Whether to show the output image with bounding boxes, for example, in jupyter notebook
	save: bool
	Whether to save the output image with bounding boxes.

	Outputs
	-------
	prediction: torch.FloatTensor or NoneType
	Predictions of a size (<number of detected objects>, 4+1+<number of classes>).
	prediction is NoneType when no object has been detected on an image.

	'''
	assert isinstance(labels_path, (str, Path)), '"labels_path" should be str or Path'
	assert isinstance(device, (torch.device, str)), 'device should be either torch.device or str'
	assert isinstance(show, bool), 'show should be boolean'

	# parameters of the vizualization: color palette, figsize to show,
	# label parameters, jpeg quality
	norm = Normalize(vmin=0, vmax=model.classes)
	color_map = colormap.tab10
	figsize = (15, 15)
	line_thickness = 2
	obj_thresh = 0.8 # 0.8
	nms_thresh = 0.4 # 0.4

	# make a dict: {class_number: class_name} if we have more than 1 class
	if model.classes > 1:
	# replacing with whitespace because we would like to remove space from
	# the text format later in naming the bounding boxes:
	names = [name.replace('\n', ' ') for name in open(labels_path, 'r').readlines()]
	num2name = {num: name for num, name in enumerate(names)}

	else:
	# we don't need a class names if the the number of classes is 1
	num2name = {0: ''}

	source_img = fix_orientation_if_needed(source_img, orientation)
	W, H = source_img.size

	# add letterbox padding and save the pad sizes and scalling coefficient
	# to use it latter when drawing bboxes on the original image
	H_new, W_new, scale = scale_numbers(H, W, model.model_width)
	img = source_img.resize((W_new, H_new))
	img = np.array(img)
	img, pad_sizes = letterbox_pad(img)

	# HWC -> CHW, scale intensities to [0, 1], send to pytorch, add 'batch-'dimension
	img = img.astype(np.float32)
	img = img.transpose(2, 0, 1)
	img = img / 255
	img = torch.from_numpy(img)
	img = img.unsqueeze(0)
	img = img.to(device)

	# make prediction
	prediction, loss = model(img, device=device)
	# and apply objectness filtering and nms. If returns None, draw a box that states it
	prediction = objectness_filter_and_nms(prediction, model.classes, obj_thresh, nms_thresh)

	# if show initialize a figure environment
	if show:
	plt.figure(figsize=figsize)

	### if no objects have been detected draw one rectangle on the perimeter of the
	# source_img with text that no objects are found. for comments for this
	# if condition please see the for-loop below
	if prediction is None:
	text = "Couldn't find any objects that I was trained to detect :-("
	font = ImageFont.truetype(str(font_path), 20)
	text_size = font.getsize(text)
	top_left_coords = ((W-text_size[0])//2, H//2)
	black = (0, 0, 0)
	# increase the font size a bit
	tag = Image.new('RGB', text_size, black)
	source_img.paste(tag, top_left_coords)
	# create a rectangle object and draw it on the source image
	tag_draw = ImageDraw.Draw(source_img)
	# adds the text
	tag_draw.text(top_left_coords, text, font=font)

	if show:
	plt.imshow(source_img)

	if save:
	source_img.save('output.jpg', 'JPEG')

	return None, source_img
	###

	# since the predictions are made for a resized and padded images,
	# the bounding boxes have to be scaled and shifted back
	pad_top, pad_bottom, pad_left, pad_right = pad_sizes
	prediction[:, 0] = (prediction[:, 0] - pad_left) / scale
	prediction[:, 1] = (prediction[:, 1] - pad_top) / scale
	prediction[:, 2] = prediction[:, 2] / scale
	prediction[:, 3] = prediction[:, 3] / scale

	# the, transform the coordinates (cx, cy, w, h) into corner coordinates:
	# (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
	top_left_x, top_left_y, bottom_right_x, bottom_right_y = get_corner_coords(prediction)

	# detach values from the computation graph, take the int part and transform to np.ndarray
	top_left_x = top_left_x.cpu().detach().int().numpy()
	top_left_y = top_left_y.cpu().detach().int().numpy()
	bottom_right_x = bottom_right_x.cpu().detach().int().numpy()
	bottom_right_y = bottom_right_y.cpu().detach().int().numpy()

	# add each prediction on the image and captures it with a class number
	machine_readable_preds = []
	machine_readable_preds.append('class,confidence,bx,by,bw,bh')
	for i in range(len(prediction)):

	## ADD BBOXES
	# first we need to extract coords for both top left and bottom right corners
	# note: sometimes, the corner coordinates lie outside of the image itself
	# hence we need to keep them on image -> min and max
	top_left_coords = max(0, top_left_x[i]), max(0, top_left_y[i])
	bottom_right_coords = min(W, bottom_right_x[i]), min(H, bottom_right_y[i])
	# predicted class number
	# todo dim (also see NMS with batch dim)
	class_score, class_int = torch.max(prediction[i, 5:5+model.classes], dim=-1)
	class_score, class_int = float(class_score), int(class_int)

	# select the color for a class according to its label number and scale it to [0, 255]
	bbox_color = color_map(class_int / model.classes)[:3]
	bbox_color = tuple(map(lambda x: int(x * 255), bbox_color))

	## ADD A LABLE FOR EACH BBOX INSIDE THE RECTANGLE WITH THE SAME COLOR
	## AS THE BBOX ITSELF
	# predicted class name to put on a bbox
	class_name = num2name[class_int]
	# text to name a box: class name and the probability in percents
	text = f'{class_name}{(class_score * 100):.0f}%'
	font = ImageFont.truetype(str(font_path), 14)
	text_size = font.getsize(text)

	# create a tag object and draw it on the source image
	tag = Image.new('RGB', text_size, bbox_color)
	top_left_coords_tag = top_left_coords[0], max(0, top_left_coords[1] - text_size[1])
	source_img.paste(tag, top_left_coords_tag)
	# create a rectangle object and draw it on the source image
	bbox_draw = ImageDraw.Draw(source_img)
	bbox_draw.rectangle((top_left_coords, bottom_right_coords),
	width=line_thickness, outline=bbox_color)
	# adds the class label with confidence
	bbox_draw.text(top_left_coords_tag, text, font=font)

	# add a prediction to the list of human readable predictions by making an ugly string
	machine_readable_preds.append(
	','.join([
	f'{class_name.strip()}',
	f'{class_score:.2f}',
	f'{prediction[i, 0].item() / W:.2f}',
	f'{prediction[i, 1].item() / H:.2f}',
	f'{prediction[i, 2].item() / W:.2f}',
	f'{prediction[i, 3].item() / H:.2f}',
	])
	)

	# enclose the list of human readable predictions into a markdown code block
	machine_readable_preds = '\n'.join(machine_readable_preds)
	machine_readable_preds = f'```\n{machine_readable_preds}\n```'

	# if show, then, show and close the environment
	if show:
	plt.imshow(source_img)

	if save:
	source_img.save('output.jpg', 'JPEG')

	return machine_readable_preds, source_img

	def show_image_w_bboxes_for_server(
	img_path: str,
	out_path: str,
	archive_path: str,
	labels_path: str,
	font_path: str,
	model: torch.nn.Module,
	device: torch.device,
	orientation: str) -> None:
	'''
	Reads an image from the disk and applies a detection algorithm specified in model.

	Arguments
	---------
	img_path: str
	A path to an image.
	out_path: str
	A path where to save the result image with detections. This image will
	be used to send back to the user.
	archive_path: str
	Another path where the result image will be saved (archive).
	Since `out_path` is always the same, we also use the archive path.
	labels_path: str
	A path to model labels (COCO)
	font_path: str:
	A path to a font-face to use to draw the prediction labels
	model: Darknet
	Model to apply to the image.
	device: str:
	PyTorch device. Use this argument to control 'cuda' vs 'cpu'.
	orientation: str
	Orientation which front-end tries to extract from EXIF of an image.
	Can be 'undefined' or some integer which can be used to orient the image.
	Used in predict_and_save().
	'''

	# I want to log the processing time for each image
	start = time()

	# predict_and_save returns both img with predictions drawn on it
	# and the tensor with predictions
	assert out_path is None or isinstance(out_path, str), 'output should be either NoneType or str'
	# make sure the arguments are of correct types
	assert isinstance(img_path, (str, Path)), '"img_path" should be str or Path'

	# read an image
	source_img = Image.open(img_path).convert('RGB')

	with torch.no_grad():
	predictions, img = predict_and_save(
	source_img, model, device, labels_path, font_path, orientation, show=False
	)

	# selecting a name for a file for archiving
	filename = f'{strftime("%y-%m-%dT%H-%M-%S", localtime())}.jpg'
	archive_full_path = os.path.join(archive_path, filename)
	img.save(archive_full_path, 'JPEG')
	img.save(out_path, 'JPEG')

	# calculating elapsed time and printing it to flask console
	elapsed_time = round(time() - start, 2)

	print(f'Processing time of {filename}: {elapsed_time} sec.')
	print('=' * 70)


	### SOME CODE FOR WIDER DATASET HANDLING
	'''
	The dataset folder is expected to have
	the following structure:

	./Submission_example/
	0--Parade/
	0_Parade_marchingband_1_20.txt
	./wider_face_split/
	wider_face_test.mat
	wider_face_train.mat
	wider_face_test_filelist.txt
	wider_face_val.mat
	wider_face_train_bbx_gt.txt
	readme.txt
	wider_face_val_bbx_gt.txt
	./WIDER_train/
	images/
	0--Parade/
	0_Parade_marchingband_1_100.jpg
	...
	1--Handshaking/
	1_Handshaking_Handshaking_1_102.jpg
	...
	...
	./WIDER_val/
	(similar to ./WIDER_train/)
	./WIDER_test/
	(similar to ./WIDER_train/)
	'''

	def read_meta_from_file(data_root_path):
	'''
	Parses WIDER ground truth data.

	Argument
	--------
	data_root_path: str
	A path to the ground truth dataset. It is expected to have the '.txt'
	extension.

	Output
	------
	meta: dict
	A map between a file path and ground truth bounding box coordinates and some
	attributes (x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose)
	stored as list of lists. For more information about the attributes see readme.txt.
	'''
	split_path = os.path.join(data_root_path, 'wider_face_split')
	train_data_path = os.path.join(data_root_path, 'WIDER_train/images')
	train_meta_path = os.path.join(split_path, 'wider_face_train_bbx_gt.txt')

	meta = {}

	with open(train_meta_path, 'r') as rfile:

	while True:

	short_file_path = rfile.readline()
	bbox_count = rfile.readline()

	if short_file_path == '' or bbox_count == '':
	rfile.close()
	break

	short_file_path = short_file_path.replace('\n', '')
	bbox_count = int(bbox_count.replace('\n', ''))

	full_file_path = os.path.join(train_data_path, short_file_path)

	gt_bboxes = []

	for _ in range(bbox_count):
	attributes = rfile.readline()
	attributes = attributes.replace('\n', '').split(' ')
	attributes = [int(att) for att in attributes if len(att) > 0]

	gt_bboxes.append(attributes)

	meta[full_file_path] = gt_bboxes

	return meta