Spaces:

jeyanthangj2004
/

ocr

Runtime error

App Files Files Community

ocr / test_all.py

jeyanthangj2004

Upload 110 files

3f42a6f verified about 2 months ago

raw

history blame contribute delete

10.9 kB

	import json, os, glob, cv2, time, csv, math
	import numpy as np
	from edocr2 import tools
	from pdf2image import convert_from_path

	def process_json_labels(folder_path):
	def order_points_clockwise(points):
	# Find the point with the lowest x value (and lowest y if tied)
	start_point = min(points, key=lambda p: (p[0], p[1]))

	# Calculate the centroid of the polygon (for angle sorting)
	centroid_x = sum(p[0] for p in points) / len(points)
	centroid_y = sum(p[1] for p in points) / len(points)

	# Function to calculate the angle from the start point to the point
	def angle_from_start(point):
	# Calculate the angle using atan2 to get the angle relative to the start point
	return math.atan2(point[1] - start_point[1], point[0] - start_point[0])

	# Sort points based on the angle relative to the start point
	# Exclude the start point from the sorting
	sorted_points = sorted((p for p in points if p != start_point), key=angle_from_start)

	# Construct the final ordered list starting with the lowest x point
	ordered_points = [start_point] + sorted_points

	return ordered_points

	def convert_json_to_coordinates(json_data):
	converted_data = []

	for shape in json_data['shapes']:
	if shape['shape_type'] == 'rectangle':
	label = shape['label']
	# Extract the top-left and bottom-right points
	(x1, y1), (x2, y2) = shape['points']

	# Calculate the top-right and bottom-left points
	x3, y3 = x2, y1 # top-right
	x4, y4 = x1, y2 # bottom-left

	# Append the formatted output: x1, y1, x2, y2, x3, y3, x4, y4, label
	converted_data.append([x1, y1, x3, y3, x2, y2, x4, y4, label])
	elif shape['shape_type'] == 'polygon':
	label = shape['label']
	# Extract all points in the polygon
	points = shape['points']
	# Order them in clockwise direction
	ordered_points = order_points_clockwise(points)

	# Flatten the ordered points and append the label
	flattened_points = [coord for point in ordered_points for coord in point]
	converted_data.append(flattened_points + [label])

	return converted_data

	json_files = glob.glob(os.path.join(folder_path, '*.json'))

	for file in json_files:
	with open(file, 'r') as f:
	json_data = json.load(f)
	converted_data = convert_json_to_coordinates(json_data)
	output_csv_file = os.path.splitext(file)[0] + '.csv'

	# Write the converted data to the respective text file
	with open(output_csv_file, 'w', newline='') as csvfile:
	csvwriter = csv.writer(csvfile)
	# Write each converted row into the CSV
	for item in converted_data:
	item_int = [int(x) if isinstance(x, float) else x for x in item]
	csvwriter.writerow(item_int)

	def compute_metrics(filename, predictions, mask_img, iou_thres = 0.2):
	correct_detections = 0
	detection_iou_scores = []
	total_chars, correct_chars = 0, 0
	cum_gt, cum_pred = '', ''

	with open(filename, newline='') as csvfile:
	ground_truth = []
	csvreader = csv.reader(csvfile)
	for row in csvreader:
	# Convert coordinates to int and append label
	ground_truth.append((row[8], np.array([[row[0], row[1]], [row[2], row[3]], [row[4], row[5]], [row[6], row[7]]])))

	for gt in ground_truth:
	best_pred = None
	best_iou = 0
	for pred in predictions:
	iou = tools.train_tools.calculate_iou(pred[1], gt[1])
	if iou > best_iou:
	best_iou = iou
	best_pred = pred

	if best_iou >= iou_thres:
	if gt[0] != 'other_info' and best_pred[0] != 'other_info': #If dimension detected and correct acc. to gt
	#Detection
	correct_detections += 1
	detection_iou_scores.append(best_iou)
	#Recognition
	label = gt[0]
	recog = best_pred[0]
	cum_gt += label
	cum_pred += recog
	correct_char = tools.train_tools.compare_characters(label, recog)
	total_chars += len(label)
	correct_chars += correct_char

	gt_dim = [gt for gt in ground_truth if gt[0] != 'other_info']
	predictions_dim = [pr for pr in predictions if pr[0] != 'other_info']
	precision = correct_detections / len(predictions_dim) if len(predictions_dim) > 0 else 0
	recall = correct_detections / len(gt_dim) if len(gt_dim) > 0 else 0
	average_iou = np.mean(detection_iou_scores)
	dim_metrics = {'precision':precision, "recall": recall, 'IoU': average_iou}
	char_recall = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
	cer = tools.train_tools.get_cer(cum_pred, cum_gt)
	recog_metrics = {'char_recall': char_recall, 'CER': cer}
	for gt in ground_truth:
	box = gt[1]
	pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64)
	mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)
	return dim_metrics, recog_metrics, mask_img, [correct_detections, len(predictions_dim), len(gt_dim), detection_iou_scores, correct_chars, total_chars, cum_pred, cum_gt]

	folder_path = 'tests/test_samples/'
	process_json_labels(folder_path)
	language = 'eng'

	#region Set Session ##############################
	start_time = time.time()
	#os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
	import tensorflow as tf
	from edocr2.keras_ocr.recognition import Recognizer
	from edocr2.keras_ocr.detection import Detector

	# Configure GPU memory growth
	gpus = tf.config.list_physical_devices('GPU')
	for gpu in gpus:
	tf.config.experimental.set_memory_growth(gpu, True)

	# Load models
	gdt_model = 'edocr2/models/recognizer_gdts.keras'
	dim_model = 'edocr2/models/recognizer_dimensions_2.keras'
	detector_model = None #'edocr2/models/detector_12_46.keras'

	recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model))
	recognizer_gdt.model.load_weights(gdt_model)

	alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model)
	recognizer_dim = Recognizer(alphabet=alphabet_dim)
	recognizer_dim.model.load_weights(dim_model)
	detector = Detector()

	if detector_model:
	detector.model.load_weights(detector_model)

	end_time = time.time()
	print(f"\033[1;33mLoading session took {end_time - start_time:.6f} seconds to run.\033[0m")
	#endregion

	precision =[]
	recall=[]
	iou=[]
	char_recall=[]
	cer=[]
	metrics_tools=[]
	times = []

	for file in os.listdir(folder_path):
	if file.endswith(".jpg") or file.endswith(".png"):
	start_time = time.time()
	#Loading drawing
	if file.endswith('.pdf') or file.endswith(".PDF"):
	img = convert_from_path(os.path.join(folder_path, file))
	img = np.array(img[0])
	else:
	img = cv2.imread(os.path.join(folder_path, file))

	filename = os.path.splitext(os.path.basename(file))[0]

	#Segmentation
	img_boxes, frame, gdt_boxes, tables, dim_boxes = tools.layer_segm.segment_img(img, autoframe = True, frame_thres=0.7, GDT_thres = 0.02, binary_thres=127)

	#Tables
	process_img = img.copy()
	table_results, updated_tables, process_img= tools.ocr_pipelines.ocr_tables(tables, process_img , language)

	#G&DTs
	gdt_results, updated_gdt_boxes, process_img = tools.ocr_pipelines.ocr_gdt(process_img, gdt_boxes, recognizer_gdt)

	#Dimensions
	process_img = process_img[frame.y : frame.y + frame.h, frame.x : frame.x + frame.w]
	dimensions, other_info, process_img, dim_pyt = tools.ocr_pipelines.ocr_dimensions(process_img, detector, recognizer_dim, alphabet_dim, dim_boxes, cluster_thres=20, max_img_size=1240, language=language, backg_save=False)

	#Masking
	mask_img = tools.output_tools.mask_img(img, updated_gdt_boxes, tables, dimensions, frame, other_info)
	end_time = time.time()
	times.append(end_time-start_time)
	#Postprocessing for metric computation
	if frame:
	offset = (frame.x, frame.y)
	else:
	offset = (0, 0)
	update_dimensions = []
	for dim in dimensions:
	box = dim[1]
	pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)])
	update_dimensions.append([dim[0], pts])
	for dim in other_info:
	box = dim[1]
	pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)])
	update_dimensions.append(['other_info', pts])

	#Metrics computation
	dim_metrics, recog_metrics, mask_img, m_t = compute_metrics(os.path.join(folder_path, filename + '.csv'), update_dimensions, mask_img)
	#Dimensions
	for d in dimensions:
	print(d[0])
	#Other info
	print('---------Other info:----------')
	for o in other_info:
	print(o[0])
	print(dim_metrics)
	print(recog_metrics)
	precision.append(dim_metrics['precision'])
	recall.append(dim_metrics['recall'])
	iou.append(dim_metrics['IoU'])
	char_recall.append(recog_metrics['char_recall'])
	cer.append(recog_metrics['CER'])
	metrics_tools.append(m_t)
	#Display
	#cv2.imwrite(file + '.png', mask_img)
	'''cv2.imshow('boxes', mask_img)
	cv2.waitKey(0)
	cv2.destroyAllWindows()'''

	print('------------------')
	print('Micro Average Results')
	print('Precision:', np.mean(precision)*100, '%')
	print('Recall:', np.mean(recall)*100, '%')
	print('IoU', np.mean(iou)*100, '%')
	print('Character Recall', np.mean(char_recall))
	print('CER', np.mean(cer)*100, '%')


	print('------------------')
	print('Macro Average Results')
	correct_detections = sum(row[0] for row in metrics_tools)
	predictions = sum(row[1] for row in metrics_tools)
	print('Precision:', correct_detections/predictions*100, '%')
	gt = sum(row[2] for row in metrics_tools)
	print('Recall:', correct_detections/gt*100, '%')
	iou = [item for row in metrics_tools for item in row[3]]
	print('IoU:', np.mean(iou)*100, '%')
	correct_char = sum(row[4] for row in metrics_tools)
	total_chars = sum(row[5] for row in metrics_tools)
	print('Character Recall:', correct_char/total_chars*100, '%')
	cum_pred = ''.join(row[6] for row in metrics_tools)
	cum_gt = ''.join(row[7] for row in metrics_tools)
	print('CER:',tools.train_tools.get_cer(cum_pred, cum_gt)*100, '%')

	print('------------------')
	print('Average time:', np.mean(times), 's')