File size: 2,972 Bytes
d3cf882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98e86c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import xml.etree.ElementTree as ET
from pathlib import Path

# Define paths for image and annotation directories
image_dir_train = 'Dataset/Images/Train/'
image_dir_test = 'Dataset/Images/Test/'
annotations_dir_train = 'Dataset/Annotations/Train/'
annotations_dir_test = 'Dataset/Annotations/Test/'

# Function to get the correct extension for the image
def get_correct_extension(image_path):
    """Get the image extension, either '.jpeg' or '.jpg'."""
    if image_path.suffix.lower() in ['.jpeg', '.jpg']:
        return image_path.suffix.lower()
    return '.jpg'  # Default to '.jpg' if the extension is incorrect or unsupported.

# Function to update XML with correct image filename and path extension
def update_xml_extension(xml_file, image_path):
    """Update the image path in the XML to match the image extension."""
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Find the 'filename' tag which contains the image filename
    filename_tag = root.find('filename')
    if filename_tag is not None:
        # Get the correct extension
        correct_extension = get_correct_extension(image_path)
        current_filename = filename_tag.text
        
        # If the extension is incorrect, update it
        if not current_filename.endswith(correct_extension):
            # Replace the old extension with the correct one
            new_filename = current_filename.rsplit('.', 1)[0] + correct_extension
            filename_tag.text = new_filename
            
            # Update the path to match the new extension in the 'path' tag (if it exists)
            path_tag = root.find('path')
            if path_tag is not None:
                current_path = path_tag.text
                new_path = Path(current_path).with_name(new_filename)
                path_tag.text = str(new_path)
            
            # Save the modified XML file
            tree.write(xml_file)
            print(f"Updated XML: {xml_file} to use {new_filename} and path {new_path}")

# Process all XML files in both Train and Test directories
def process_xml_files(image_dir, annotations_dir):
    for xml_file in Path(annotations_dir).rglob('*.xml'):
        # Get corresponding image file (same name, different extension)
        image_filename = xml_file.stem + '.jpg'  # Default to '.jpg' first
        image_path = Path(image_dir) / image_filename
        
        # If the .jpg file doesn't exist, try the .jpeg version
        if not image_path.exists():
            image_filename = xml_file.stem + '.jpeg'
            image_path = Path(image_dir) / image_filename
        
        # If the image exists, update the XML file
        if image_path.exists():
            update_xml_extension(xml_file, image_path)
        else:
            print(f"Image not found for XML: {xml_file}")

# Run for both training and testing datasets
process_xml_files(image_dir_train, annotations_dir_train)
process_xml_files(image_dir_test, annotations_dir_test)