| | |
| | """Utility functions for visualizing results on html page.""" |
| |
|
| | import base64 |
| | import os.path |
| | import cv2 |
| | import numpy as np |
| |
|
| | __all__ = [ |
| | 'get_grid_shape', 'get_blank_image', 'load_image', 'save_image', |
| | 'resize_image', 'add_text_to_image', 'fuse_images', 'HtmlPageVisualizer', |
| | 'VideoReader', 'VideoWriter', 'adjust_pixel_range' |
| | ] |
| |
|
| |
|
| | def adjust_pixel_range(images, min_val=-1.0, max_val=1.0, channel_order='NCHW'): |
| | """Adjusts the pixel range of the input images. |
| | |
| | This function assumes the input array (image batch) is with shape [batch_size, |
| | channel, height, width] if `channel_order = NCHW`, or with shape [batch_size, |
| | height, width] if `channel_order = NHWC`. The returned images are with shape |
| | [batch_size, height, width, channel] and pixel range [0, 255]. |
| | |
| | NOTE: The channel order of output images will remain the same as the input. |
| | |
| | Args: |
| | images: Input images to adjust pixel range. |
| | min_val: Min value of the input images. (default: -1.0) |
| | max_val: Max value of the input images. (default: 1.0) |
| | channel_order: Channel order of the input array. (default: NCHW) |
| | |
| | Returns: |
| | The postprocessed images with dtype `numpy.uint8` and range [0, 255]. |
| | |
| | Raises: |
| | ValueError: If the input `images` are not with type `numpy.ndarray` or the |
| | shape is invalid according to `channel_order`. |
| | """ |
| | if not isinstance(images, np.ndarray): |
| | raise ValueError(f'Images should be with type `numpy.ndarray`!') |
| |
|
| | channel_order = channel_order.upper() |
| | if channel_order not in ['NCHW', 'NHWC']: |
| | raise ValueError(f'Invalid channel order `{channel_order}`!') |
| |
|
| | if images.ndim != 4: |
| | raise ValueError(f'Input images are expected to be with shape `NCHW` or ' |
| | f'`NHWC`, but `{images.shape}` is received!') |
| | if channel_order == 'NCHW' and images.shape[1] not in [1, 3]: |
| | raise ValueError(f'Input images should have 1 or 3 channels under `NCHW` ' |
| | f'channel order!') |
| | if channel_order == 'NHWC' and images.shape[3] not in [1, 3]: |
| | raise ValueError(f'Input images should have 1 or 3 channels under `NHWC` ' |
| | f'channel order!') |
| |
|
| | images = images.astype(np.float32) |
| | images = (images - min_val) * 255 / (max_val - min_val) |
| | images = np.clip(images + 0.5, 0, 255).astype(np.uint8) |
| | if channel_order == 'NCHW': |
| | images = images.transpose(0, 2, 3, 1) |
| |
|
| | return images |
| |
|
| |
|
| | def get_grid_shape(size, row=0, col=0, is_portrait=False): |
| | """Gets the shape of a grid based on the size. |
| | |
| | This function makes greatest effort on making the output grid square if |
| | neither `row` nor `col` is set. If `is_portrait` is set as `False`, the height |
| | will always be equal to or smaller than the width. For example, if input |
| | `size = 16`, output shape will be `(4, 4)`; if input `size = 15`, output shape |
| | will be (3, 5). Otherwise, the height will always be equal to or larger than |
| | the width. |
| | |
| | Args: |
| | size: Size (height * width) of the target grid. |
| | is_portrait: Whether to return a portrait size of a landscape size. |
| | (default: False) |
| | |
| | Returns: |
| | A two-element tuple, representing height and width respectively. |
| | """ |
| | assert isinstance(size, int) |
| | assert isinstance(row, int) |
| | assert isinstance(col, int) |
| | if size == 0: |
| | return (0, 0) |
| |
|
| | if row > 0 and col > 0 and row * col != size: |
| | row = 0 |
| | col = 0 |
| |
|
| | if row > 0 and size % row == 0: |
| | return (row, size // row) |
| | if col > 0 and size % col == 0: |
| | return (size // col, col) |
| |
|
| | row = int(np.sqrt(size)) |
| | while row > 0: |
| | if size % row == 0: |
| | col = size // row |
| | break |
| | row = row - 1 |
| |
|
| | return (col, row) if is_portrait else (row, col) |
| |
|
| |
|
| | def get_blank_image(height, width, channels=3, is_black=True): |
| | """Gets a blank image, either white of black. |
| | |
| | NOTE: This function will always return an image with `RGB` channel order for |
| | color image and pixel range [0, 255]. |
| | |
| | Args: |
| | height: Height of the returned image. |
| | width: Width of the returned image. |
| | channels: Number of channels. (default: 3) |
| | is_black: Whether to return a black image or white image. (default: True) |
| | """ |
| | shape = (height, width, channels) |
| | if is_black: |
| | return np.zeros(shape, dtype=np.uint8) |
| | return np.ones(shape, dtype=np.uint8) * 255 |
| |
|
| |
|
| | def load_image(path): |
| | """Loads an image from disk. |
| | |
| | NOTE: This function will always return an image with `RGB` channel order for |
| | color image and pixel range [0, 255]. |
| | |
| | Args: |
| | path: Path to load the image from. |
| | |
| | Returns: |
| | An image with dtype `np.ndarray` or `None` if input `path` does not exist. |
| | """ |
| | if not os.path.isfile(path): |
| | return None |
| |
|
| | image = cv2.imread(path) |
| | return image[:, :, ::-1] |
| |
|
| |
|
| | def save_image(path, image): |
| | """Saves an image to disk. |
| | |
| | NOTE: The input image (if colorful) is assumed to be with `RGB` channel order |
| | and pixel range [0, 255]. |
| | |
| | Args: |
| | path: Path to save the image to. |
| | image: Image to save. |
| | """ |
| | if image is None: |
| | return |
| |
|
| | assert len(image.shape) == 3 and image.shape[2] in [1, 3] |
| | cv2.imwrite(path, image[:, :, ::-1]) |
| |
|
| |
|
| | def resize_image(image, *args, **kwargs): |
| | """Resizes image. |
| | |
| | This is a wrap of `cv2.resize()`. |
| | |
| | NOTE: THe channel order of the input image will not be changed. |
| | |
| | Args: |
| | image: Image to resize. |
| | """ |
| | if image is None: |
| | return None |
| |
|
| | assert image.ndim == 3 and image.shape[2] in [1, 3] |
| | image = cv2.resize(image, *args, **kwargs) |
| | if image.ndim == 2: |
| | return image[:, :, np.newaxis] |
| | return image |
| |
|
| |
|
| | def add_text_to_image(image, |
| | text='', |
| | position=None, |
| | font=cv2.FONT_HERSHEY_TRIPLEX, |
| | font_size=1.0, |
| | line_type=cv2.LINE_8, |
| | line_width=1, |
| | color=(255, 255, 255)): |
| | """Overlays text on given image. |
| | |
| | NOTE: The input image is assumed to be with `RGB` channel order. |
| | |
| | Args: |
| | image: The image to overlay text on. |
| | text: Text content to overlay on the image. (default: '') |
| | position: Target position (bottom-left corner) to add text. If not set, |
| | center of the image will be used by default. (default: None) |
| | font: Font of the text added. (default: cv2.FONT_HERSHEY_TRIPLEX) |
| | font_size: Font size of the text added. (default: 1.0) |
| | line_type: Line type used to depict the text. (default: cv2.LINE_8) |
| | line_width: Line width used to depict the text. (default: 1) |
| | color: Color of the text added in `RGB` channel order. (default: |
| | (255, 255, 255)) |
| | |
| | Returns: |
| | An image with target text overlayed on. |
| | """ |
| | if image is None or not text: |
| | return image |
| |
|
| | cv2.putText(img=image, |
| | text=text, |
| | org=position, |
| | fontFace=font, |
| | fontScale=font_size, |
| | color=color, |
| | thickness=line_width, |
| | lineType=line_type, |
| | bottomLeftOrigin=False) |
| |
|
| | return image |
| |
|
| |
|
| | def fuse_images(images, |
| | image_size=None, |
| | row=0, |
| | col=0, |
| | is_row_major=True, |
| | is_portrait=False, |
| | row_spacing=0, |
| | col_spacing=0, |
| | border_left=0, |
| | border_right=0, |
| | border_top=0, |
| | border_bottom=0, |
| | black_background=True): |
| | """Fuses a collection of images into an entire image. |
| | |
| | Args: |
| | images: A collection of images to fuse. Should be with shape [num, height, |
| | width, channels]. |
| | image_size: Int or two-element tuple. This field is used to resize the image |
| | before fusing. `None` disables resizing. (default: None) |
| | row: Number of rows used for image fusion. If not set, this field will be |
| | automatically assigned based on `col` and total number of images. |
| | (default: None) |
| | col: Number of columns used for image fusion. If not set, this field will be |
| | automatically assigned based on `row` and total number of images. |
| | (default: None) |
| | is_row_major: Whether the input images should be arranged row-major or |
| | column-major. (default: True) |
| | is_portrait: Only active when both `row` and `col` should be assigned |
| | automatically. (default: False) |
| | row_spacing: Space between rows. (default: 0) |
| | col_spacing: Space between columns. (default: 0) |
| | border_left: Width of left border. (default: 0) |
| | border_right: Width of right border. (default: 0) |
| | border_top: Width of top border. (default: 0) |
| | border_bottom: Width of bottom border. (default: 0) |
| | |
| | Returns: |
| | The fused image. |
| | |
| | Raises: |
| | ValueError: If the input `images` is not with shape [num, height, width, |
| | width]. |
| | """ |
| | if images is None: |
| | return images |
| |
|
| | if not images.ndim == 4: |
| | raise ValueError(f'Input `images` should be with shape [num, height, ' |
| | f'width, channels], but {images.shape} is received!') |
| |
|
| | num, image_height, image_width, channels = images.shape |
| | if image_size is not None: |
| | if isinstance(image_size, int): |
| | image_size = (image_size, image_size) |
| | assert isinstance(image_size, (list, tuple)) and len(image_size) == 2 |
| | width, height = image_size |
| | else: |
| | height, width = image_height, image_width |
| | row, col = get_grid_shape(num, row=row, col=col, is_portrait=is_portrait) |
| | fused_height = ( |
| | height * row + row_spacing * (row - 1) + border_top + border_bottom) |
| | fused_width = ( |
| | width * col + col_spacing * (col - 1) + border_left + border_right) |
| | fused_image = get_blank_image( |
| | fused_height, fused_width, channels=channels, is_black=black_background) |
| | images = images.reshape(row, col, image_height, image_width, channels) |
| | if not is_row_major: |
| | images = images.transpose(1, 0, 2, 3, 4) |
| |
|
| | for i in range(row): |
| | y = border_top + i * (height + row_spacing) |
| | for j in range(col): |
| | x = border_left + j * (width + col_spacing) |
| | if image_size is not None: |
| | image = cv2.resize(images[i, j], image_size) |
| | else: |
| | image = images[i, j] |
| | fused_image[y:y + height, x:x + width] = image |
| |
|
| | return fused_image |
| |
|
| |
|
| | def get_sortable_html_header(column_name_list, sort_by_ascending=False): |
| | """Gets header for sortable html page. |
| | |
| | Basically, the html page contains a sortable table, where user can sort the |
| | rows by a particular column by clicking the column head. |
| | |
| | Example: |
| | |
| | column_name_list = [name_1, name_2, name_3] |
| | header = get_sortable_html_header(column_name_list) |
| | footer = get_sortable_html_footer() |
| | sortable_table = ... |
| | html_page = header + sortable_table + footer |
| | |
| | Args: |
| | column_name_list: List of column header names. |
| | sort_by_ascending: Default sorting order. If set as `True`, the html page |
| | will be sorted by ascending order when the header is clicked for the first |
| | time. |
| | |
| | Returns: |
| | A string, which represents for the header for a sortable html page. |
| | """ |
| | header = '\n'.join([ |
| | '<script type="text/javascript">', |
| | 'var column_idx;', |
| | 'var sort_by_ascending = ' + str(sort_by_ascending).lower() + ';', |
| | '', |
| | 'function sorting(tbody, column_idx){', |
| | ' this.column_idx = column_idx;', |
| | ' Array.from(tbody.rows)', |
| | ' .sort(compareCells)', |
| | ' .forEach(function(row) { tbody.appendChild(row); })', |
| | ' sort_by_ascending = !sort_by_ascending;', |
| | '}', |
| | '', |
| | 'function compareCells(row_a, row_b) {', |
| | ' var val_a = row_a.cells[column_idx].innerText;', |
| | ' var val_b = row_b.cells[column_idx].innerText;', |
| | ' var flag = sort_by_ascending ? 1 : -1;', |
| | ' return flag * (val_a > val_b ? 1 : -1);', |
| | '}', |
| | '</script>', |
| | '', |
| | '<html>', |
| | '', |
| | '<head>', |
| | '<style>', |
| | ' table {', |
| | ' border-spacing: 0;', |
| | ' border: 1px solid black;', |
| | ' }', |
| | ' th {', |
| | ' cursor: pointer;', |
| | ' }', |
| | ' th, td {', |
| | ' text-align: left;', |
| | ' vertical-align: middle;', |
| | ' border-collapse: collapse;', |
| | ' border: 0.5px solid black;', |
| | ' padding: 8px;', |
| | ' }', |
| | ' tr:nth-child(even) {', |
| | ' background-color: #d2d2d2;', |
| | ' }', |
| | '</style>', |
| | '</head>', |
| | '', |
| | '<body>', |
| | '', |
| | '<table>', |
| | '<thead>', |
| | '<tr>', |
| | '']) |
| | for idx, column_name in enumerate(column_name_list): |
| | header += f' <th onclick="sorting(tbody, {idx})">{column_name}</th>\n' |
| | header += '</tr>\n' |
| | header += '</thead>\n' |
| | header += '<tbody id="tbody">\n' |
| |
|
| | return header |
| |
|
| |
|
| | def get_sortable_html_footer(): |
| | """Gets footer for sortable html page. |
| | |
| | Check function `get_sortable_html_header()` for more details. |
| | """ |
| | return '</tbody>\n</table>\n\n</body>\n</html>\n' |
| |
|
| |
|
| | def encode_image_to_html_str(image, image_size=None): |
| | """Encodes an image to html language. |
| | |
| | Args: |
| | image: The input image to encode. Should be with `RGB` channel order. |
| | image_size: Int or two-element tuple. This field is used to resize the image |
| | before encoding. `None` disables resizing. (default: None) |
| | |
| | Returns: |
| | A string which represents the encoded image. |
| | """ |
| | if image is None: |
| | return '' |
| |
|
| | assert len(image.shape) == 3 and image.shape[2] in [1, 3] |
| |
|
| | |
| | image = image[:, :, ::-1] |
| |
|
| | |
| | if image_size is not None: |
| | if isinstance(image_size, int): |
| | image_size = (image_size, image_size) |
| | assert isinstance(image_size, (list, tuple)) and len(image_size) == 2 |
| | image = cv2.resize(image, image_size) |
| |
|
| | |
| | encoded_image = cv2.imencode(".jpg", image)[1].tostring() |
| | encoded_image_base64 = base64.b64encode(encoded_image).decode('utf-8') |
| | html_str = f'<img src="data:image/jpeg;base64, {encoded_image_base64}"/>' |
| |
|
| | return html_str |
| |
|
| |
|
| | class HtmlPageVisualizer(object): |
| | """Defines the html page visualizer. |
| | |
| | This class can be used to visualize image results as html page. Basically, it |
| | is based on an html-format sorted table with helper functions |
| | `get_sortable_html_header()`, `get_sortable_html_footer()`, and |
| | `encode_image_to_html_str()`. To simplify the usage, specifying the following |
| | fields is enough to create a visualization page: |
| | |
| | (1) num_rows: Number of rows of the table (header-row exclusive). |
| | (2) num_cols: Number of columns of the table. |
| | (3) header contents (optional): Title of each column. |
| | |
| | NOTE: `grid_size` can be used to assign `num_rows` and `num_cols` |
| | automatically. |
| | |
| | Example: |
| | |
| | html = HtmlPageVisualizer(num_rows, num_cols) |
| | html.set_headers([...]) |
| | for i in range(num_rows): |
| | for j in range(num_cols): |
| | html.set_cell(i, j, text=..., image=...) |
| | html.save('visualize.html') |
| | """ |
| |
|
| | def __init__(self, |
| | num_rows=0, |
| | num_cols=0, |
| | grid_size=0, |
| | is_portrait=False, |
| | viz_size=None): |
| | if grid_size > 0: |
| | num_rows, num_cols = get_grid_shape( |
| | grid_size, row=num_rows, col=num_cols, is_portrait=is_portrait) |
| | assert num_rows > 0 and num_cols > 0 |
| |
|
| | self.num_rows = num_rows |
| | self.num_cols = num_cols |
| | self.viz_size = viz_size |
| | self.headers = ['' for _ in range(self.num_cols)] |
| | self.cells = [[{ |
| | 'text': '', |
| | 'image': '', |
| | } for _ in range(self.num_cols)] for _ in range(self.num_rows)] |
| |
|
| | def set_header(self, column_idx, content): |
| | """Sets the content of a particular header by column index.""" |
| | self.headers[column_idx] = content |
| |
|
| | def set_headers(self, contents): |
| | """Sets the contents of all headers.""" |
| | if isinstance(contents, str): |
| | contents = [contents] |
| | assert isinstance(contents, (list, tuple)) |
| | assert len(contents) == self.num_cols |
| | for column_idx, content in enumerate(contents): |
| | self.set_header(column_idx, content) |
| |
|
| | def set_cell(self, row_idx, column_idx, text='', image=None): |
| | """Sets the content of a particular cell. |
| | |
| | Basically, a cell contains some text as well as an image. Both text and |
| | image can be empty. |
| | |
| | Args: |
| | row_idx: Row index of the cell to edit. |
| | column_idx: Column index of the cell to edit. |
| | text: Text to add into the target cell. |
| | image: Image to show in the target cell. Should be with `RGB` channel |
| | order. |
| | """ |
| | self.cells[row_idx][column_idx]['text'] = text |
| | self.cells[row_idx][column_idx]['image'] = encode_image_to_html_str( |
| | image, self.viz_size) |
| |
|
| | def save(self, save_path): |
| | """Saves the html page.""" |
| | html = '' |
| | for i in range(self.num_rows): |
| | html += f'<tr>\n' |
| | for j in range(self.num_cols): |
| | text = self.cells[i][j]['text'] |
| | image = self.cells[i][j]['image'] |
| | if text: |
| | html += f' <td>{text}<br><br>{image}</td>\n' |
| | else: |
| | html += f' <td>{image}</td>\n' |
| | html += f'</tr>\n' |
| |
|
| | header = get_sortable_html_header(self.headers) |
| | footer = get_sortable_html_footer() |
| |
|
| | with open(save_path, 'w') as f: |
| | f.write(header + html + footer) |
| |
|
| |
|
| | class VideoReader(object): |
| | """Defines the video reader. |
| | |
| | This class can be used to read frames from a given video. |
| | """ |
| |
|
| | def __init__(self, path): |
| | """Initializes the video reader by loading the video from disk.""" |
| | if not os.path.isfile(path): |
| | raise ValueError(f'Video `{path}` does not exist!') |
| |
|
| | self.path = path |
| | self.video = cv2.VideoCapture(path) |
| | assert self.video.isOpened() |
| | self.position = 0 |
| |
|
| | self.length = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT)) |
| | self.frame_height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
| | self.frame_width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)) |
| | self.fps = self.video.get(cv2.CAP_PROP_FPS) |
| |
|
| | def __del__(self): |
| | """Releases the opened video.""" |
| | self.video.release() |
| |
|
| | def read(self, position=None): |
| | """Reads a certain frame. |
| | |
| | NOTE: The returned frame is assumed to be with `RGB` channel order. |
| | |
| | Args: |
| | position: Optional. If set, the reader will read frames from the exact |
| | position. Otherwise, the reader will read next frames. (default: None) |
| | """ |
| | if position is not None and position < self.length: |
| | self.video.set(cv2.CAP_PROP_POS_FRAMES, position) |
| | self.position = position |
| |
|
| | success, frame = self.video.read() |
| | self.position = self.position + 1 |
| |
|
| | return frame[:, :, ::-1] if success else None |
| |
|
| |
|
| | class VideoWriter(object): |
| | """Defines the video writer. |
| | |
| | This class can be used to create a video. |
| | |
| | NOTE: `.avi` and `DIVX` is the most recommended codec format since it does not |
| | rely on other dependencies. |
| | """ |
| |
|
| | def __init__(self, path, frame_height, frame_width, fps=24, codec='DIVX'): |
| | """Creates the video writer.""" |
| | self.path = path |
| | self.frame_height = frame_height |
| | self.frame_width = frame_width |
| | self.fps = fps |
| | self.codec = codec |
| |
|
| | self.video = cv2.VideoWriter(filename=path, |
| | fourcc=cv2.VideoWriter_fourcc(*codec), |
| | fps=fps, |
| | frameSize=(frame_width, frame_height)) |
| |
|
| | def __del__(self): |
| | """Releases the opened video.""" |
| | self.video.release() |
| |
|
| | def write(self, frame): |
| | """Writes a target frame. |
| | |
| | NOTE: The input frame is assumed to be with `RGB` channel order. |
| | """ |
| | self.video.write(frame[:, :, ::-1]) |
| |
|