import os import xml.etree.ElementTree as ET from pathlib import Path # Define paths for image and annotation directories image_dir_train = 'Dataset/Images/Train/' image_dir_test = 'Dataset/Images/Test/' annotations_dir_train = 'Dataset/Annotations/Train/' annotations_dir_test = 'Dataset/Annotations/Test/' # Function to get the correct extension for the image def get_correct_extension(image_path): """Get the image extension, either '.jpeg' or '.jpg'.""" if image_path.suffix.lower() in ['.jpeg', '.jpg']: return image_path.suffix.lower() return '.jpg' # Default to '.jpg' if the extension is incorrect or unsupported. # Function to update XML with correct image filename and path extension def update_xml_extension(xml_file, image_path): """Update the image path in the XML to match the image extension.""" tree = ET.parse(xml_file) root = tree.getroot() # Find the 'filename' tag which contains the image filename filename_tag = root.find('filename') if filename_tag is not None: # Get the correct extension correct_extension = get_correct_extension(image_path) current_filename = filename_tag.text # If the extension is incorrect, update it if not current_filename.endswith(correct_extension): # Replace the old extension with the correct one new_filename = current_filename.rsplit('.', 1)[0] + correct_extension filename_tag.text = new_filename # Update the path to match the new extension in the 'path' tag (if it exists) path_tag = root.find('path') if path_tag is not None: current_path = path_tag.text new_path = Path(current_path).with_name(new_filename) path_tag.text = str(new_path) # Save the modified XML file tree.write(xml_file) print(f"Updated XML: {xml_file} to use {new_filename} and path {new_path}") # Process all XML files in both Train and Test directories def process_xml_files(image_dir, annotations_dir): for xml_file in Path(annotations_dir).rglob('*.xml'): # Get corresponding image file (same name, different extension) image_filename = xml_file.stem + '.jpg' # Default to '.jpg' first image_path = Path(image_dir) / image_filename # If the .jpg file doesn't exist, try the .jpeg version if not image_path.exists(): image_filename = xml_file.stem + '.jpeg' image_path = Path(image_dir) / image_filename # If the image exists, update the XML file if image_path.exists(): update_xml_extension(xml_file, image_path) else: print(f"Image not found for XML: {xml_file}") # Run for both training and testing datasets process_xml_files(image_dir_train, annotations_dir_train) process_xml_files(image_dir_test, annotations_dir_test)