stm32-modelzoo-app / object_detection /tf /src /datasets /utils /dataset_converters.py
FBAGSTM's picture
STM32 AI Experimentation Hub
747451d
# /*---------------------------------------------------------------------------------------------
# * Copyright (c) 2025 STMicroelectronics.
# * All rights reserved.
# * This software is licensed under terms that can be found in the LICENSE file in
# * the root directory of this software component.
# * If no LICENSE file comes with this software, it is provided AS-IS.
# *--------------------------------------------------------------------------------------------*/
import os
import sys
import json
import hydra
import shutil
import argparse
from hydra.core.hydra_config import HydraConfig
from tqdm import tqdm
from munch import DefaultMunch
from omegaconf import OmegaConf
from omegaconf import DictConfig
import xml.etree.ElementTree as ET
from PIL import Image
def classes_inspector(non_existing_classes: list = None,
available_classes: list = None) -> None:
"""
Ensure all defined classes are well present in the dataset
Args:
non_existing_classes (list): list of non found classes from the dataset
available_classes (list): list of detected classes in the dataset
Returns:
None
"""
if len(non_existing_classes) > 0:
print("The following classes were not found: {}".format(non_existing_classes))
print("Please make sure that your selected classes exist in the following list: {}".format(available_classes))
print("Exiting the script...")
sys.exit()
else:
print("Converting the dataset ...")
def verify_voc_classes(xml_folder: str = None,
classes: list = None) -> None:
"""
Check if all expected classes are well present in the provided dataset
Args:
xml_folder (str): path to the xml directory
classes (list): list of the provided classes (from the yaml file)
Returns:
None
"""
print("Analyzing the dataset ...")
available_classes = set()
xml_files = [file for file in os.listdir(xml_folder) if file.endswith('.xml')]
for filename in tqdm(xml_files):
xml_path = os.path.join(xml_folder, filename)
tree = ET.parse(xml_path)
root = tree.getroot()
for obj in root.findall('object'):
name = obj.find('name').text
print(name)
available_classes.add(name)
non_existing_classes = [c for c in classes if c not in available_classes]
classes_inspector(non_existing_classes,
available_classes)
def convert_voc_to_yolo(xml_folder: str = None,
images_folder: str = None,
classes: list = None,
export_folder: str = None) -> None:
"""
Core routine that converts voc data to yolo format and exports them
Args:
xml_folder (str): path to the xml directory
images_folder (str): path to the images directory
classes (list): list of the provided classes (from the yaml file)
export_folder (str): path converted dataset will be stored
Returns:
None
"""
verify_voc_classes(xml_folder,
classes)
if not os.path.exists(export_folder):
os.makedirs(export_folder)
xml_files = [file for file in os.listdir(xml_folder) if file.endswith('.xml')]
for filename in tqdm(xml_files):
copy_image = False
xml_path = os.path.join(xml_folder, filename)
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
txt_filename = os.path.splitext(filename)[0] + '.txt'
txt_path = os.path.join(export_folder, txt_filename)
for obj in root.findall('object'):
name = obj.find('name').text
if name in classes:
with open(txt_path, 'a') as label_file:
copy_image = True
class_id = classes.index(name)
bbox = obj.find('bndbox')
xmin = float(bbox.find('xmin').text)
ymin = float(bbox.find('ymin').text)
xmax = float(bbox.find('xmax').text)
ymax = float(bbox.find('ymax').text)
x_center = (xmin + xmax) / (2 * width)
y_center = (ymin + ymax) / (2 * height)
w = (xmax - xmin) / width
h = (ymax - ymin) / height
label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
if copy_image:
image_file = os.path.splitext(filename)[0] + '.jpg'
image_path = os.path.join(images_folder, image_file)
shutil.copy(image_path, export_folder)
def verify_coco_classes(coco_annotations_file: str = None,
classes: list = None) -> None:
"""
Check if all expected classes are well present in the provided dataset
Args:
coco_annotations_file (str): path to the coco annotation file
classes (list): list of the provided classes (from the yaml file)
Returns:
None
"""
print("Analyzing the dataset ...")
with open(coco_annotations_file, 'r') as f:
coco_data = json.load(f)
class_names = set()
for annotation in tqdm(coco_data['annotations']):
category_id = annotation['category_id']
for category in coco_data['categories']:
if category['id'] == category_id:
class_name = category['name']
class_names.add(class_name)
available_classes = list(class_names)
non_existing_classes = [c for c in classes if c not in available_classes]
classes_inspector(non_existing_classes,
available_classes)
def convert_coco_to_yolo(coco_annotations_file: str = None,
coco_images_dir: str = None,
classes: list = None,
export_folder: str = None) -> None:
"""
Core routine that converts coco data to yolo format and exports them
Args:
coco_annotations_file (str): path to the coco annotations directory
coco_images_dir (str): path to the images directory
classes (list): list of the provided classes (from the yaml file)
export_folder (str): path converted dataset will be stored
Returns:
None
"""
verify_coco_classes(coco_annotations_file,
classes)
if not os.path.exists(export_folder):
os.makedirs(export_folder)
with open(coco_annotations_file, 'r') as f:
coco_data = json.load(f)
for image_info in tqdm(coco_data['images']):
copy_image = False
image_file_name = image_info['file_name']
label_file_name = os.path.splitext(image_file_name)[0] + '.txt'
label_file_path = os.path.join(export_folder, label_file_name)
for annotation in coco_data['annotations']:
if annotation['image_id'] == image_info['id']:
category_id = annotation['category_id']
class_name = None
for category in coco_data['categories']:
if category['id'] == category_id:
class_name = category['name']
break
if class_name in classes:
copy_image = True
class_id = classes.index(class_name)
x, y, w, h = annotation['bbox']
x_center = x + (w / 2)
y_center = y + (h / 2)
x_center /= image_info['width']
y_center /= image_info['height']
w /= image_info['width']
h /= image_info['height']
with open(label_file_path, 'a') as label_file:
label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
if copy_image:
image_path = os.path.join(coco_images_dir, image_file_name)
shutil.copy(image_path, export_folder)
def verify_kitti_classes(kitti_annotations_dir: str = None, classes: list = None) -> None:
"""
Check if all expected classes are well present in the provided dataset
Args:
kitti_annotations_dir (str): path to the KITTI annotations directory
classes (list): list of the provided classes (from the yaml file)
Returns:
None
"""
print("Analyzing the dataset ...")
available_classes = set()
annotation_files = [file for file in os.listdir(kitti_annotations_dir) if file.endswith('.txt')]
for filename in tqdm(annotation_files):
annotation_path = os.path.join(kitti_annotations_dir, filename)
with open(annotation_path, 'r') as f:
for line in f:
parts = line.strip().split()
class_name = parts[0]
available_classes.add(class_name)
non_existing_classes = [c for c in classes if c not in available_classes]
classes_inspector(non_existing_classes, available_classes)
def convert_kitti_to_yolo(kitti_annotations_dir: str = None,
kitti_images_dir: str = None,
classes: list = None,
export_folder: str = None) -> None:
"""
Core routine that converts KITTI data to YOLO format and exports them
Args:
kitti_annotations_dir (str): path to the KITTI annotations directory
kitti_images_dir (str): path to the images directory
classes (list): list of the provided classes (from the yaml file)
export_folder (str): path converted dataset will be stored
Returns:
None
"""
verify_kitti_classes(kitti_annotations_dir, classes)
if not os.path.exists(export_folder):
os.makedirs(export_folder)
annotation_files = [file for file in os.listdir(kitti_annotations_dir) if file.endswith('.txt')]
for filename in tqdm(annotation_files):
copy_image = False
annotation_path = os.path.join(kitti_annotations_dir, filename)
txt_filename = os.path.splitext(filename)[0] + '.txt'
txt_path = os.path.join(export_folder, txt_filename)
image_file = os.path.splitext(filename)[0] + '.jpg'
image_path = os.path.join(kitti_images_dir, image_file)
if not os.path.exists(image_path):
continue
image = Image.open(image_path)
width_image, height_image = image.size
with open(annotation_path, 'r') as f:
for line in f:
parts = line.strip().split()
class_name = parts[0]
if class_name in classes:
copy_image = True
class_id = classes.index(class_name)
xmin, ymin, xmax, ymax = map(float, parts[4:8])
width = float(parts[8])
height = float(parts[9])
if width == 0 and height == 0:
w = xmax - xmin
h = ymax - ymin
x_center = (xmin + xmax) / 2
y_center = (ymin + ymax) / 2
else:
x_center = (xmin + xmax) / (2 * width)
y_center = (ymin + ymax) / (2 * height)
w = (xmax - xmin) / width
h = (ymax - ymin) / height
x_center /= width_image
y_center /= height_image
w /= width_image
h /= height_image
with open(txt_path, 'a') as label_file:
label_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
if copy_image:
shutil.copy(image_path, export_folder)
def convert_val_dataset_to_yolo(cfg: DictConfig) -> None:
"""
Converts only the validatin dataset to YOLO Darknet format according to the config.
This is required for the case when loading the validation dataset only.
Args:
cfg (DictConfig): Configuration dictionary containing:
- dataset.format: Format of the input dataset
- dataset.val_annotations_path: Path to validation annotations (optional)
- dataset.val_images_path: Path to validation images (optional)
- dataset.class_names: List of class names
- dataset.training_path: Output path for converted training data
- dataset.validation_path: Output path for converted validation data (optional)
Returns:
None
"""
if cfg.dataset.format == "coco":
if (hasattr(cfg.dataset, 'val_annotations_file_path') and
hasattr(cfg.dataset, 'val_images_path') and
hasattr(cfg.dataset, 'data_dir') and
cfg.dataset.val_annotations_file_path and
cfg.dataset.val_images_path and
cfg.dataset.data_dir):
print("\nConverting validation set to YOLO format...")
convert_coco_to_yolo(cfg.dataset.val_annotations_file_path,
cfg.dataset.val_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
else:
raise ValueError(f"val_annotations_file_path, val_images_path and data_dir must be specified while evaluating on COCO format dataset.")
elif cfg.dataset.format == "pascal_voc":
# Convert validation set if all required paths are provided
if (hasattr(cfg.dataset, 'val_xml_files_path') and
hasattr(cfg.dataset, 'val_images_path') and
hasattr(cfg.dataset, 'data_dir') and
cfg.dataset.val_xml_dir and
cfg.dataset.val_images_path and
cfg.dataset.data_dir):
print("\nConverting validation set to YOLO format...")
convert_voc_to_yolo(cfg.dataset.val_xml_dir,
cfg.dataset.val_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
else:
raise ValueError(f"val_xml_files_path, val_images_path and data_dir must be specified while evaluating on PASCAL_VOC format dataset.")
elif cfg.dataset.format == "yolo_darknet":
print("Dataset is already in YOLO format. No conversion needed.")
return
else:
print("Please make sure that you selected one of the following formats: {}, {}, {}, {}".format(
"coco", "pascal_voc", "yolo_darknet", "kitti"))
print("Exiting the script...")
sys.exit()
def convert_dataset_to_yolo(cfg: DictConfig) -> None:
"""
Converts the dataset to YOLO Darknet format according to the config.
If validation paths are provided, converts both training and validation sets.
Args:
cfg (DictConfig): Configuration dictionary containing:
- dataset.format: Format of the input dataset
- dataset.train_annotations_path: Path to training annotations
- dataset.train_images_path: Path to training images
- dataset.val_annotations_path: Path to validation annotations (optional)
- dataset.val_images_path: Path to validation images (optional)
- dataset.class_names: List of class names
- dataset.training_path: Output path for converted training data
- dataset.validation_path: Output path for converted validation data (optional)
Returns:
None
"""
if cfg.dataset.format == "coco":
# Convert training set
print("Converting training set to YOLO format...")
convert_coco_to_yolo(cfg.dataset.train_annotations_path,
cfg.dataset.train_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
# Convert validation set if all required paths are provided
if (hasattr(cfg.dataset, 'val_annotations_file_path') and
hasattr(cfg.dataset, 'val_images_path') and
hasattr(cfg.dataset, 'data_dir') and
cfg.dataset.val_annotations_file_path and
cfg.dataset.val_images_path and
cfg.dataset.data_dir):
print("\nConverting validation set to YOLO format...")
convert_coco_to_yolo(cfg.dataset.val_annotations_file_path,
cfg.dataset.val_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
elif cfg.dataset.format == "pascal_voc":
# Convert training set
print("Converting training set to YOLO format...")
convert_voc_to_yolo(cfg.dataset.train_xml_dir,
cfg.dataset.train_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
# Convert validation set if all required paths are provided
if (hasattr(cfg.dataset, 'val_xml_files_path') and
hasattr(cfg.dataset, 'val_images_path') and
hasattr(cfg.dataset, 'data_dir') and
cfg.dataset.val_xml_dir and
cfg.dataset.val_images_path and
cfg.dataset.data_dir):
print("\nConverting validation set to YOLO format...")
convert_voc_to_yolo(cfg.dataset.val_xml_dir,
cfg.dataset.val_images_path,
cfg.dataset.class_names,
cfg.dataset.data_dir)
elif cfg.dataset.format == "yolo_darknet":
print("Dataset is already in YOLO format. No conversion needed.")
return
else:
print("Please make sure that you selected one of the following formats: {}, {}, {}, {}".format(
"coco", "pascal_voc", "yolo_darknet", "kitti"))
print("Exiting the script...")
sys.exit()