Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Copyright (c) 2020 Carleton University Biomedical Informatics Collaboratory | |
| This source code is licensed under the MIT license found in the | |
| LICENSE file in the root directory of this source tree. | |
| """ | |
| from typing import List | |
| from types import SimpleNamespace | |
| import argparse, os, json, shutil | |
| from tqdm import tqdm | |
| import os.path as path | |
| import numpy as np | |
| from PIL import Image | |
| LABEL_CLASS_INDICES = { | |
| "250": 0, | |
| ".25": 1, | |
| "500": 2, | |
| ".5": 3, | |
| "1000": 4, | |
| "1": 5, | |
| "1K": 6, | |
| "2000": 7, | |
| "2": 8, | |
| "2K": 9, | |
| "4000": 10, | |
| "4": 11, | |
| "4K": 12, | |
| "8000": 13, | |
| "8": 14, | |
| "8K": 15, | |
| "0": 16, | |
| "20": 17, | |
| "40": 18, | |
| "60": 19, | |
| "80": 20, | |
| "100": 21, | |
| "120": 22 | |
| } | |
| def extract_labels(annotation: dict, image: Image) -> List[tuple]: | |
| """Extracts the bounding boxes of labels into a tuple compatible | |
| the YOLOv5 format. | |
| Parameters | |
| ---------- | |
| annotation : dict | |
| A dictionary containing the annotations for the audiograms in a report. | |
| image : Image | |
| The image in PIL format corresponding to the annotation. | |
| Returns | |
| ------- | |
| tuple | |
| A tuple of the form | |
| (class index, x_center, y_center, width, height) where all coordinates | |
| and dimensions are normalized to the width/height of the image. | |
| """ | |
| label_label_tuples = [] | |
| image_width, image_height = image.size | |
| for audiogram in annotation: | |
| for label in audiogram["labels"]: | |
| bounding_box = label["boundingBox"] | |
| x_center = (bounding_box["x"] + bounding_box["width"] / 2) / image_width | |
| y_center = (bounding_box["y"] + bounding_box["height"] / 2) / image_height | |
| box_width = bounding_box["width"] / image_width | |
| box_height = bounding_box["height"] / image_width | |
| try: | |
| label_label_tuples.append((LABEL_CLASS_INDICES[label["value"]], x_center, y_center, box_width, box_height)) | |
| except: | |
| continue | |
| return label_label_tuples | |
| def create_directory_structure(data_dir: str): | |
| try: | |
| shutil.rmtree(path.join(data_dir)) | |
| except: | |
| pass | |
| os.mkdir(path.join(data_dir)) | |
| os.mkdir(path.join(data_dir, "images")) | |
| os.mkdir(path.join(data_dir, "images", "train")) | |
| os.mkdir(path.join(data_dir, "images", "validation")) | |
| os.mkdir(path.join(data_dir, "labels")) | |
| os.mkdir(path.join(data_dir, "labels", "train")) | |
| os.mkdir(path.join(data_dir, "labels", "validation")) | |
| def create_yolov5_file(bboxes: List[tuple], filename: str): | |
| # Turn the bounding boxes into a string with a bounding box | |
| # on each line | |
| file_content = "\n".join([ | |
| f"{bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]} {bbox[4]}" | |
| for bbox in bboxes | |
| ]) | |
| # Save to a file | |
| with open(filename, "w") as output_file: | |
| output_file.write(file_content) | |
| def all_labels_valid(labels: List[tuple]): | |
| for label in labels: | |
| for value in label[1:]: | |
| if value < 0 or value > 1: | |
| return False | |
| return True | |
| def main(args: SimpleNamespace): | |
| # Find all the JSON files in the input directory | |
| report_ids = [ | |
| filename.rstrip(".json") | |
| for filename in os.listdir(path.join(args.annotations_dir)) | |
| if filename.endswith(".json") | |
| and path.exists(path.join(args.images_dir, filename.rstrip(".json") + ".jpg")) | |
| ] | |
| # Shuffle | |
| np.random.seed(seed=42) # for reproducibility of the shuffle | |
| np.random.shuffle(report_ids) | |
| # Create the directory structure in which the images and annotations | |
| # are to be stored | |
| create_directory_structure(args.data_dir) | |
| # Iterate through the report ids, extract the annotations in YOLOv5 format | |
| # and place the file in the correct directory, and the image in the correct | |
| # directory. | |
| for i, report_id in enumerate(tqdm(report_ids)): | |
| # Decide if the image is going into the training set or validation set | |
| directory = ( | |
| "train" if i < args.train_frac * len(report_ids) else "validation" | |
| ) | |
| # Load the annotation` | |
| annotation_content = open( | |
| path.join(args.annotations_dir, f"{report_id}.json") | |
| ) | |
| image = Image.open(os.path.join(args.images_dir, f"{report_id}.jpg")) | |
| annotation = json.load(annotation_content) | |
| bounding_boxes = extract_labels(annotation, image) | |
| if not all_labels_valid(bounding_boxes): | |
| continue | |
| # Open the corresponding image to get its dimensions | |
| image = Image.open(os.path.join(args.images_dir, f"{report_id}.jpg")) | |
| create_yolov5_file( | |
| bounding_boxes, | |
| path.join(args.data_dir, "labels", directory, f"{report_id}.txt") | |
| ) | |
| image.save( | |
| path.join(args.data_dir, "images", directory, f"{report_id}.jpg") | |
| ) | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser(description=( | |
| "Script that formats the training set for transfer learning of labels detection via " | |
| "the YOLOv5 model." | |
| )) | |
| parser.add_argument("-d", "--data_dir", type=str, required=True, help=( | |
| "Path to the directory where the training set should be created." | |
| )) | |
| parser.add_argument("-a", "--annotations_dir", type=str, required=True, help=( | |
| "Path to the directory containing the annotations in the JSON format." | |
| )) | |
| parser.add_argument("-i", "--images_dir", type=str, required=True, help=( | |
| "Path to the directory containing the images." | |
| )) | |
| parser.add_argument("-f", "--train_frac", type=float, required=True, help=( | |
| "Fraction of images to be used for training. (e.g. 0.8)" | |
| )) | |
| args = parser.parse_args() | |
| main(args) | |