Spaces:

luismidv
/

PlainConcepts

Runtime error

App Files Files Community

PlainConcepts / data_analysis.py

luismidv

Commited plain concepts trial

e5cbecb about 1 year ago

raw

history blame contribute delete

6.66 kB

	import cv2
	from PIL import Image
	import numpy as np
	import os
	from pathlib import Path
	import pandas as pd
	from bs4 import BeautifulSoup
	import pandas as pd
	import matplotlib.pyplot as plt

	class DataAnalyst():
	def __init__(self,source_route, output_route, annotations_route):
	#ROUTE THAT I AM GOING TO USE TO READ/WRITE ON FOLDERS
	self.route = source_route
	self.output_folder = output_route
	self.annotations_route = annotations_route

	#LIST THAT I USE TO STORE TRAINING LABELS
	self.training_features = []
	self.training_labels = []
	self.elements_names = []

	#LIST THAT I USE TO IDENTIFY CLASSES THAT WILL NEED MORE DATA AUGMENTATION
	self.dc_circuits = []
	self.ac_circuits = []

	#DICTIONARY TO ENCODE LABELS
	self.names_dict = {
	'Resistor' : 0,
	'Capacitor': 1,
	'Inductor' : 2,
	'DC voltage source': 3,
	'AC voltage source': 4
	}

	def image_size_searching(self):
	"""THIS FUNCTION IS GOING TO IDENTIFY THE MEAN WIDTH AND HEIGHT OF THE IMAGES
	THAT WAY I CAN DEFINE A CORRECT SIZE TO RESIZE THESE IMAGES"""
	self.original_width = []
	self.original_height = []


	for file in os.listdir(self.route):
	if file.lower().endswith(".jpg"):
	image_path = os.path.join(self.route,file)
	try:
	image = cv2.imread(image_path)
	except Exception as e:
	print(f"Error while trying to open an image: {e}")

	self.original_width.append(image.shape[1])
	self.original_height.append(image.shape[0])

	self.width_mean = np.mean(self.original_width)
	self.height_mean = np.mean(self.original_height)

	self.image_resize_save()

	def image_resize_save(self):
	"""THIS FUNCTION IS USED TO RESIZE THE DATASET TO THE IMAGE SIZES DEFINED BEFORE"""
	#new_size = (int(self.width_mean), int(self.height_mean))
	self.new_size = (384,384)

	for file in os.listdir(self.route):
	file_route = os.path.join(self.route,file)
	try:
	image = cv2.resize(cv2.imread(file_route),self.new_size, interpolation=cv2.INTER_AREA)
	output_path = os.path.join(self.output_folder, file)
	cv2.imwrite(output_path, image)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	image = Image.fromarray(image)
	self.training_features.append(image)

	except Exception as e:
	print(f"Error while trying to resize and save and image {e}")

	def dataset_info_summary(self):
	"""HERE I AM GOING TO VISUALIZE SOME DATASET INFORMATION, TYPICAL HEIGHT AN WIDTH, CLASS DISTRIBUTION"""
	elements_dataframe = pd.DataFrame(self.elements_names)
	elements_names =elements_dataframe.value_counts()
	fig,axes = plt.subplots(1,1)
	axes.pie(elements_names, labels= elements_names.index, autopct='%1.2f%%', colors=['gold', 'skyblue', 'lightgreen', 'red', 'green'], startangle=90)
	plt.show()

	def xml_data_builder(self,mode,anotations_route):
	"""THIS IS THE FUNCTION THAT WILL EXTRACT LABELS DATA FROM THE XML FILES
	THESE XML FILES DESCRIBE THE BOUNDING BOXES AROUND THE CIRCUIT ELEMENTS"""

	route = "./datasets/data/dataset/images/train"
	label_route = "./datasets/data/dataset/labels/train"
	route = route.replace("train", mode)
	label_route = label_route.replace("train", mode)

	for file in os.listdir(route):
	if file.endswith(".jpg"):
	xml_file = os.path.join(anotations_route, file)
	xml_file = os.path.splitext(xml_file)[0] + ".xml"
	xml_name = os.path.basename(xml_file)
	xml_name = os.path.splitext(xml_name)[0] + ".txt"
	xml_results = self.xml_data_getter(xml_file)
	final_path = os.path.join(label_route, xml_name)

	with open(final_path, 'w') as f:

	for item in xml_results:
	x_center, y_center, width, height = self.get_coordinates([int(item[1]), int(item[2]), int(item[3]), int(item[4])], (384,384))
	new_line = f"{item[0]} {x_center} {y_center} {width} {height}\n"
	f.write(new_line)
	f.close()


	def xml_data_getter(self, file_path):
	"""THIS IS THE FUNCTION THAT WILL EXTRACT LABELS DATA FROM THE XML FILES
	THESE XML FILES DESCRIBE THE BOUNDING BOXES AROUND THE CIRCUIT ELEMENTS"""
	try:
	with open(file_path, 'r') as f:
	result_list = []
	file_data = f.read()
	beauti_data = BeautifulSoup(file_data, 'lxml-xml')
	elements_list = beauti_data.find_all('object')

	for element in elements_list:
	name = element.find("name").text
	if name == "Capactitor":
	name = "Capacitor"

	self.elements_names.append(name)
	coord_x1 = element.find("xmin").text
	coord_x2 = element.find("xmax").text
	coord_y1 = element.find("ymin").text
	coord_y2 = element.find("ymax").text
	name = self.names_dict[name]
	coord_lines = [name,coord_x1, coord_x2, coord_y1, coord_y2]
	result_list.append(coord_lines)
	return result_list
	except Exception as e:
	print(f"Error while reading xml files and getting their information {e}")

	def get_coordinates(self,bbox_tensor, image_size):
	"""THIS FUNCTION WILL PREPARE THE COORDINATES THEY WAY YOLO EXPECTS IT
	FIRST GET COORDINATES NORMALIZED, SINCE YOLO EXPECTS TO BE BETWEEN 0 AND 1
	THIS IS BECAUSE YOLO RESEARCHES REALISED THAT IT WAYS EASIER TO USE OFFSETS OF GRID CELLS RATHER THAN PURE COORDINATES"""
	normalized_width = 1.0 / image_size[0]
	normalized_height = 1.0 / image_size[1]
	width = (bbox_tensor[1] - bbox_tensor[0]) * normalized_width
	height = (bbox_tensor[3] - bbox_tensor[2]) * normalized_height

	# GET X AND Y AXIS CENTERS
	x_center = ((bbox_tensor[1] + bbox_tensor[0]) / 2.0) * normalized_width
	y_center = ((bbox_tensor[3] + bbox_tensor[2]) / 2.0) * normalized_height
	print(f"{x_center} {y_center} {width} {height}")

	return x_center, y_center, width, height