|
|
import os |
|
|
import xml.etree.ElementTree as ET |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
image_dir_train = 'Dataset/Images/Train/' |
|
|
image_dir_test = 'Dataset/Images/Test/' |
|
|
annotations_dir_train = 'Dataset/Annotations/Train/' |
|
|
annotations_dir_test = 'Dataset/Annotations/Test/' |
|
|
|
|
|
|
|
|
def get_correct_extension(image_path): |
|
|
"""Get the image extension, either '.jpeg' or '.jpg'.""" |
|
|
if image_path.suffix.lower() in ['.jpeg', '.jpg']: |
|
|
return image_path.suffix.lower() |
|
|
return '.jpg' |
|
|
|
|
|
|
|
|
def update_xml_extension(xml_file, image_path): |
|
|
"""Update the image path in the XML to match the image extension.""" |
|
|
tree = ET.parse(xml_file) |
|
|
root = tree.getroot() |
|
|
|
|
|
|
|
|
filename_tag = root.find('filename') |
|
|
if filename_tag is not None: |
|
|
|
|
|
correct_extension = get_correct_extension(image_path) |
|
|
current_filename = filename_tag.text |
|
|
|
|
|
|
|
|
if not current_filename.endswith(correct_extension): |
|
|
|
|
|
new_filename = current_filename.rsplit('.', 1)[0] + correct_extension |
|
|
filename_tag.text = new_filename |
|
|
|
|
|
|
|
|
path_tag = root.find('path') |
|
|
if path_tag is not None: |
|
|
current_path = path_tag.text |
|
|
new_path = Path(current_path).with_name(new_filename) |
|
|
path_tag.text = str(new_path) |
|
|
|
|
|
|
|
|
tree.write(xml_file) |
|
|
print(f"Updated XML: {xml_file} to use {new_filename} and path {new_path}") |
|
|
|
|
|
|
|
|
def process_xml_files(image_dir, annotations_dir): |
|
|
for xml_file in Path(annotations_dir).rglob('*.xml'): |
|
|
|
|
|
image_filename = xml_file.stem + '.jpg' |
|
|
image_path = Path(image_dir) / image_filename |
|
|
|
|
|
|
|
|
if not image_path.exists(): |
|
|
image_filename = xml_file.stem + '.jpeg' |
|
|
image_path = Path(image_dir) / image_filename |
|
|
|
|
|
|
|
|
if image_path.exists(): |
|
|
update_xml_extension(xml_file, image_path) |
|
|
else: |
|
|
print(f"Image not found for XML: {xml_file}") |
|
|
|
|
|
|
|
|
process_xml_files(image_dir_train, annotations_dir_train) |
|
|
process_xml_files(image_dir_test, annotations_dir_test) |
|
|
|