File size: 6,658 Bytes
da418aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bc67c8
da418aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdcb4d1
 
 
 
 
 
c904ca6
 
fdcb4d1
 
 
 
c904ca6
 
 
f7d411d
7e885be
c904ca6
f7d411d
c904ca6
 
 
 
3bc67c8
c904ca6
 
 
fdcb4d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f75213
fdcb4d1
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import cv2
from PIL import Image
import numpy as np
import os
from pathlib import Path
import pandas as pd
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt

class DataAnalyst():
    def __init__(self,source_route, output_route, annotations_route):
        #ROUTE THAT I AM GOING TO USE TO READ/WRITE ON FOLDERS
        self.route = source_route
        self.output_folder = output_route
        self.annotations_route = annotations_route

        #LIST THAT I USE TO STORE TRAINING LABELS
        self.training_features = []
        self.training_labels = []
        self.elements_names = []

        #LIST THAT I USE TO IDENTIFY CLASSES THAT WILL NEED MORE DATA AUGMENTATION
        self.dc_circuits = []
        self.ac_circuits = []

        #DICTIONARY TO ENCODE LABELS
        self.names_dict = {
            'Resistor' : 0,
            'Capacitor': 1,
            'Inductor' : 2,
            'DC voltage source': 3,
            'AC voltage source': 4
        }

    def image_size_searching(self):
        """THIS FUNCTION IS GOING TO IDENTIFY THE MEAN WIDTH AND HEIGHT OF THE IMAGES
        THAT WAY I CAN DEFINE A CORRECT SIZE TO RESIZE THESE IMAGES"""
        self.original_width = []
        self.original_height = []

    
        for file in os.listdir(self.route):
            if file.lower().endswith(".jpg"):
                image_path = os.path.join(self.route,file)
                try:
                    image = cv2.imread(image_path)
                except Exception as e:
                    print(f"Error while trying to open an image: {e}")
            
            self.original_width.append(image.shape[1])
            self.original_height.append(image.shape[0])
    
        self.width_mean = np.mean(self.original_width)
        self.height_mean = np.mean(self.original_height)

        self.image_resize_save()

    def image_resize_save(self):
        """THIS FUNCTION IS USED TO RESIZE THE DATASET TO THE IMAGE SIZES DEFINED BEFORE"""
        #new_size = (int(self.width_mean), int(self.height_mean))
        self.new_size = (384,384)

        for file in os.listdir(self.route):
            file_route = os.path.join(self.route,file)
            try:
                image = cv2.resize(cv2.imread(file_route),self.new_size, interpolation=cv2.INTER_AREA)
                output_path = os.path.join(self.output_folder, file)
                cv2.imwrite(output_path, image)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(image)
                self.training_features.append(image)

            except Exception as e:
                print(f"Error while trying to resize and save and image {e}")

    def dataset_info_summary(self):
        """HERE I AM GOING TO VISUALIZE SOME DATASET INFORMATION, TYPICAL HEIGHT AN WIDTH, CLASS DISTRIBUTION"""
        elements_dataframe = pd.DataFrame(self.elements_names)
        elements_names =elements_dataframe.value_counts()
        fig,axes = plt.subplots(1,1)
        axes.pie(elements_names, labels= elements_names.index, autopct='%1.2f%%', colors=['gold', 'skyblue', 'lightgreen', 'red', 'green'], startangle=90)
        plt.show()

    def xml_data_builder(self,mode,anotations_route):
        """THIS IS THE FUNCTION THAT WILL EXTRACT LABELS DATA FROM THE XML FILES
           THESE XML FILES DESCRIBE THE BOUNDING BOXES AROUND THE CIRCUIT ELEMENTS"""

        route = "./datasets/data/dataset/images/train"
        label_route = "./datasets/data/dataset/labels/train"
        route = route.replace("train", mode)
        label_route = label_route.replace("train", mode)

        for file in os.listdir(route):
            if file.endswith(".jpg"):
                xml_file = os.path.join(anotations_route, file)
                xml_file = os.path.splitext(xml_file)[0] + ".xml"
                xml_name = os.path.basename(xml_file)
                xml_name = os.path.splitext(xml_name)[0] + ".txt"
                xml_results = self.xml_data_getter(xml_file)
                final_path = os.path.join(label_route, xml_name)

                with open(final_path, 'w') as f:

                    for item in xml_results:
                        x_center, y_center, width, height = self.get_coordinates([int(item[1]), int(item[2]), int(item[3]), int(item[4])], (384,384))
                        new_line = f"{item[0]} {x_center} {y_center} {width} {height}\n"
                        f.write(new_line)
                    f.close()


    def xml_data_getter(self, file_path):
        """THIS IS THE FUNCTION THAT WILL EXTRACT LABELS DATA FROM THE XML FILES
           THESE XML FILES DESCRIBE THE BOUNDING BOXES AROUND THE CIRCUIT ELEMENTS"""
        try:
            with open(file_path, 'r') as f:
                result_list = []
                file_data = f.read()
                beauti_data = BeautifulSoup(file_data, 'lxml-xml')
                elements_list = beauti_data.find_all('object')

                for element in elements_list:
                    name = element.find("name").text
                    if name == "Capactitor":
                        name = "Capacitor"

                    self.elements_names.append(name)
                    coord_x1 = element.find("xmin").text
                    coord_x2 = element.find("xmax").text
                    coord_y1 = element.find("ymin").text
                    coord_y2 = element.find("ymax").text
                    name = self.names_dict[name]
                    coord_lines = [name,coord_x1, coord_x2, coord_y1, coord_y2]
                    result_list.append(coord_lines)
                return result_list
        except Exception as e:
            print(f"Error while reading xml files and getting their information {e}")

    def get_coordinates(self,bbox_tensor, image_size):
        """THIS FUNCTION WILL PREPARE THE COORDINATES THEY WAY YOLO EXPECTS IT
        FIRST GET COORDINATES NORMALIZED, SINCE YOLO EXPECTS TO BE BETWEEN 0 AND 1
        THIS IS BECAUSE YOLO RESEARCHES REALISED THAT IT WAYS EASIER TO USE OFFSETS OF GRID CELLS RATHER THAN PURE COORDINATES"""
        normalized_width = 1.0 / image_size[0]
        normalized_height = 1.0 / image_size[1]
        width = (bbox_tensor[1] - bbox_tensor[0]) * normalized_width
        height = (bbox_tensor[3] - bbox_tensor[2]) * normalized_height

        # GET X AND Y AXIS CENTERS
        x_center = ((bbox_tensor[1] + bbox_tensor[0]) / 2.0) * normalized_width
        y_center = ((bbox_tensor[3] + bbox_tensor[2]) / 2.0) * normalized_height
        print(f"{x_center} {y_center} {width} {height}")

        return x_center, y_center, width, height