Spaces:
Runtime error
Runtime error
| # Lint as: python2, python3 | |
| # Copyright 2020 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Common utility for object detection tf.train.SequenceExamples.""" | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import numpy as np | |
| import tensorflow.compat.v1 as tf | |
| def context_float_feature(ndarray): | |
| """Converts a numpy float array to a context float feature. | |
| Args: | |
| ndarray: A numpy float array. | |
| Returns: | |
| A context float feature. | |
| """ | |
| feature = tf.train.Feature() | |
| for val in ndarray: | |
| feature.float_list.value.append(val) | |
| return feature | |
| def context_int64_feature(ndarray): | |
| """Converts a numpy array to a context int64 feature. | |
| Args: | |
| ndarray: A numpy int64 array. | |
| Returns: | |
| A context int64 feature. | |
| """ | |
| feature = tf.train.Feature() | |
| for val in ndarray: | |
| feature.int64_list.value.append(val) | |
| return feature | |
| def context_bytes_feature(ndarray): | |
| """Converts a numpy bytes array to a context bytes feature. | |
| Args: | |
| ndarray: A numpy bytes array. | |
| Returns: | |
| A context bytes feature. | |
| """ | |
| feature = tf.train.Feature() | |
| for val in ndarray: | |
| if isinstance(val, np.ndarray): | |
| val = val.tolist() | |
| feature.bytes_list.value.append(tf.compat.as_bytes(val)) | |
| return feature | |
| def sequence_float_feature(ndarray): | |
| """Converts a numpy float array to a sequence float feature. | |
| Args: | |
| ndarray: A numpy float array. | |
| Returns: | |
| A sequence float feature. | |
| """ | |
| feature_list = tf.train.FeatureList() | |
| for row in ndarray: | |
| feature = feature_list.feature.add() | |
| if row.size: | |
| feature.float_list.value[:] = row | |
| return feature_list | |
| def sequence_int64_feature(ndarray): | |
| """Converts a numpy int64 array to a sequence int64 feature. | |
| Args: | |
| ndarray: A numpy int64 array. | |
| Returns: | |
| A sequence int64 feature. | |
| """ | |
| feature_list = tf.train.FeatureList() | |
| for row in ndarray: | |
| feature = feature_list.feature.add() | |
| if row.size: | |
| feature.int64_list.value[:] = row | |
| return feature_list | |
| def sequence_bytes_feature(ndarray): | |
| """Converts a bytes float array to a sequence bytes feature. | |
| Args: | |
| ndarray: A numpy bytes array. | |
| Returns: | |
| A sequence bytes feature. | |
| """ | |
| feature_list = tf.train.FeatureList() | |
| for row in ndarray: | |
| if isinstance(row, np.ndarray): | |
| row = row.tolist() | |
| feature = feature_list.feature.add() | |
| if row: | |
| row = [tf.compat.as_bytes(val) for val in row] | |
| feature.bytes_list.value[:] = row | |
| return feature_list | |
| def boxes_to_box_components(bboxes): | |
| """Converts a list of numpy arrays (boxes) to box components. | |
| Args: | |
| bboxes: A numpy array of bounding boxes. | |
| Returns: | |
| Bounding box component lists. | |
| """ | |
| ymin_list = [] | |
| xmin_list = [] | |
| ymax_list = [] | |
| xmax_list = [] | |
| for bbox in bboxes: | |
| bbox = np.array(bbox).astype(np.float32) | |
| ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1) | |
| ymin_list.append(np.reshape(ymin, [-1])) | |
| xmin_list.append(np.reshape(xmin, [-1])) | |
| ymax_list.append(np.reshape(ymax, [-1])) | |
| xmax_list.append(np.reshape(xmax, [-1])) | |
| return ymin_list, xmin_list, ymax_list, xmax_list | |
| def make_sequence_example(dataset_name, | |
| video_id, | |
| encoded_images, | |
| image_height, | |
| image_width, | |
| image_format=None, | |
| image_source_ids=None, | |
| timestamps=None, | |
| is_annotated=None, | |
| bboxes=None, | |
| label_strings=None, | |
| detection_bboxes=None, | |
| detection_classes=None, | |
| detection_scores=None): | |
| """Constructs tf.SequenceExamples. | |
| Args: | |
| dataset_name: String with dataset name. | |
| video_id: String with video id. | |
| encoded_images: A [num_frames] list (or numpy array) of encoded image | |
| frames. | |
| image_height: Height of the images. | |
| image_width: Width of the images. | |
| image_format: Format of encoded images. | |
| image_source_ids: (Optional) A [num_frames] list of unique string ids for | |
| each image. | |
| timestamps: (Optional) A [num_frames] list (or numpy array) array with image | |
| timestamps. | |
| is_annotated: (Optional) A [num_frames] list (or numpy array) array | |
| in which each element indicates whether the frame has been annotated | |
| (1) or not (0). | |
| bboxes: (Optional) A list (with num_frames elements) of [num_boxes_i, 4] | |
| numpy float32 arrays holding boxes for each frame. | |
| label_strings: (Optional) A list (with num_frames_elements) of [num_boxes_i] | |
| numpy string arrays holding object string labels for each frame. | |
| detection_bboxes: (Optional) A list (with num_frames elements) of | |
| [num_boxes_i, 4] numpy float32 arrays holding prediction boxes for each | |
| frame. | |
| detection_classes: (Optional) A list (with num_frames_elements) of | |
| [num_boxes_i] numpy int64 arrays holding predicted classes for each frame. | |
| detection_scores: (Optional) A list (with num_frames_elements) of | |
| [num_boxes_i] numpy float32 arrays holding predicted object scores for | |
| each frame. | |
| Returns: | |
| A tf.train.SequenceExample. | |
| """ | |
| num_frames = len(encoded_images) | |
| image_encoded = np.expand_dims(encoded_images, axis=-1) | |
| if timestamps is None: | |
| timestamps = np.arange(num_frames) | |
| image_timestamps = np.expand_dims(timestamps, axis=-1) | |
| # Context fields. | |
| context_dict = { | |
| 'example/dataset_name': context_bytes_feature([dataset_name]), | |
| 'clip/start/timestamp': context_int64_feature([image_timestamps[0][0]]), | |
| 'clip/end/timestamp': context_int64_feature([image_timestamps[-1][0]]), | |
| 'clip/frames': context_int64_feature([num_frames]), | |
| 'image/channels': context_int64_feature([3]), | |
| 'image/height': context_int64_feature([image_height]), | |
| 'image/width': context_int64_feature([image_width]), | |
| 'clip/media_id': context_bytes_feature([video_id]) | |
| } | |
| # Sequence fields. | |
| feature_list = { | |
| 'image/encoded': sequence_bytes_feature(image_encoded), | |
| 'image/timestamp': sequence_int64_feature(image_timestamps), | |
| } | |
| # Add optional fields. | |
| if image_format is not None: | |
| context_dict['image/format'] = context_bytes_feature([image_format]) | |
| if image_source_ids is not None: | |
| feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids) | |
| if bboxes is not None: | |
| bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes) | |
| feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin) | |
| feature_list['region/bbox/xmax'] = sequence_float_feature(bbox_xmax) | |
| feature_list['region/bbox/ymin'] = sequence_float_feature(bbox_ymin) | |
| feature_list['region/bbox/ymax'] = sequence_float_feature(bbox_ymax) | |
| if is_annotated is None: | |
| is_annotated = np.ones(num_frames, dtype=np.int64) | |
| is_annotated = np.expand_dims(is_annotated, axis=-1) | |
| feature_list['region/is_annotated'] = sequence_int64_feature(is_annotated) | |
| if label_strings is not None: | |
| feature_list['region/label/string'] = sequence_bytes_feature( | |
| label_strings) | |
| if detection_bboxes is not None: | |
| det_bbox_ymin, det_bbox_xmin, det_bbox_ymax, det_bbox_xmax = ( | |
| boxes_to_box_components(detection_bboxes)) | |
| feature_list['predicted/region/bbox/xmin'] = sequence_float_feature( | |
| det_bbox_xmin) | |
| feature_list['predicted/region/bbox/xmax'] = sequence_float_feature( | |
| det_bbox_xmax) | |
| feature_list['predicted/region/bbox/ymin'] = sequence_float_feature( | |
| det_bbox_ymin) | |
| feature_list['predicted/region/bbox/ymax'] = sequence_float_feature( | |
| det_bbox_ymax) | |
| if detection_classes is not None: | |
| feature_list['predicted/region/label/index'] = sequence_int64_feature( | |
| detection_classes) | |
| if detection_scores is not None: | |
| feature_list['predicted/region/label/confidence'] = sequence_float_feature( | |
| detection_scores) | |
| context = tf.train.Features(feature=context_dict) | |
| feature_lists = tf.train.FeatureLists(feature_list=feature_list) | |
| sequence_example = tf.train.SequenceExample( | |
| context=context, | |
| feature_lists=feature_lists) | |
| return sequence_example | |