ocr / test_all.py
jeyanthangj2004's picture
Upload 110 files
3f42a6f verified
import json, os, glob, cv2, time, csv, math
import numpy as np
from edocr2 import tools
from pdf2image import convert_from_path
def process_json_labels(folder_path):
def order_points_clockwise(points):
# Find the point with the lowest x value (and lowest y if tied)
start_point = min(points, key=lambda p: (p[0], p[1]))
# Calculate the centroid of the polygon (for angle sorting)
centroid_x = sum(p[0] for p in points) / len(points)
centroid_y = sum(p[1] for p in points) / len(points)
# Function to calculate the angle from the start point to the point
def angle_from_start(point):
# Calculate the angle using atan2 to get the angle relative to the start point
return math.atan2(point[1] - start_point[1], point[0] - start_point[0])
# Sort points based on the angle relative to the start point
# Exclude the start point from the sorting
sorted_points = sorted((p for p in points if p != start_point), key=angle_from_start)
# Construct the final ordered list starting with the lowest x point
ordered_points = [start_point] + sorted_points
return ordered_points
def convert_json_to_coordinates(json_data):
converted_data = []
for shape in json_data['shapes']:
if shape['shape_type'] == 'rectangle':
label = shape['label']
# Extract the top-left and bottom-right points
(x1, y1), (x2, y2) = shape['points']
# Calculate the top-right and bottom-left points
x3, y3 = x2, y1 # top-right
x4, y4 = x1, y2 # bottom-left
# Append the formatted output: x1, y1, x2, y2, x3, y3, x4, y4, label
converted_data.append([x1, y1, x3, y3, x2, y2, x4, y4, label])
elif shape['shape_type'] == 'polygon':
label = shape['label']
# Extract all points in the polygon
points = shape['points']
# Order them in clockwise direction
ordered_points = order_points_clockwise(points)
# Flatten the ordered points and append the label
flattened_points = [coord for point in ordered_points for coord in point]
converted_data.append(flattened_points + [label])
return converted_data
json_files = glob.glob(os.path.join(folder_path, '*.json'))
for file in json_files:
with open(file, 'r') as f:
json_data = json.load(f)
converted_data = convert_json_to_coordinates(json_data)
output_csv_file = os.path.splitext(file)[0] + '.csv'
# Write the converted data to the respective text file
with open(output_csv_file, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# Write each converted row into the CSV
for item in converted_data:
item_int = [int(x) if isinstance(x, float) else x for x in item]
csvwriter.writerow(item_int)
def compute_metrics(filename, predictions, mask_img, iou_thres = 0.2):
correct_detections = 0
detection_iou_scores = []
total_chars, correct_chars = 0, 0
cum_gt, cum_pred = '', ''
with open(filename, newline='') as csvfile:
ground_truth = []
csvreader = csv.reader(csvfile)
for row in csvreader:
# Convert coordinates to int and append label
ground_truth.append((row[8], np.array([[row[0], row[1]], [row[2], row[3]], [row[4], row[5]], [row[6], row[7]]])))
for gt in ground_truth:
best_pred = None
best_iou = 0
for pred in predictions:
iou = tools.train_tools.calculate_iou(pred[1], gt[1])
if iou > best_iou:
best_iou = iou
best_pred = pred
if best_iou >= iou_thres:
if gt[0] != 'other_info' and best_pred[0] != 'other_info': #If dimension detected and correct acc. to gt
#Detection
correct_detections += 1
detection_iou_scores.append(best_iou)
#Recognition
label = gt[0]
recog = best_pred[0]
cum_gt += label
cum_pred += recog
correct_char = tools.train_tools.compare_characters(label, recog)
total_chars += len(label)
correct_chars += correct_char
gt_dim = [gt for gt in ground_truth if gt[0] != 'other_info']
predictions_dim = [pr for pr in predictions if pr[0] != 'other_info']
precision = correct_detections / len(predictions_dim) if len(predictions_dim) > 0 else 0
recall = correct_detections / len(gt_dim) if len(gt_dim) > 0 else 0
average_iou = np.mean(detection_iou_scores)
dim_metrics = {'precision':precision, "recall": recall, 'IoU': average_iou}
char_recall = (correct_chars / total_chars) * 100 if total_chars > 0 else 0
cer = tools.train_tools.get_cer(cum_pred, cum_gt)
recog_metrics = {'char_recall': char_recall, 'CER': cer}
for gt in ground_truth:
box = gt[1]
pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64)
mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)
return dim_metrics, recog_metrics, mask_img, [correct_detections, len(predictions_dim), len(gt_dim), detection_iou_scores, correct_chars, total_chars, cum_pred, cum_gt]
folder_path = 'tests/test_samples/'
process_json_labels(folder_path)
language = 'eng'
#region Set Session ##############################
start_time = time.time()
#os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
import tensorflow as tf
from edocr2.keras_ocr.recognition import Recognizer
from edocr2.keras_ocr.detection import Detector
# Configure GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Load models
gdt_model = 'edocr2/models/recognizer_gdts.keras'
dim_model = 'edocr2/models/recognizer_dimensions_2.keras'
detector_model = None #'edocr2/models/detector_12_46.keras'
recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model))
recognizer_gdt.model.load_weights(gdt_model)
alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model)
recognizer_dim = Recognizer(alphabet=alphabet_dim)
recognizer_dim.model.load_weights(dim_model)
detector = Detector()
if detector_model:
detector.model.load_weights(detector_model)
end_time = time.time()
print(f"\033[1;33mLoading session took {end_time - start_time:.6f} seconds to run.\033[0m")
#endregion
precision =[]
recall=[]
iou=[]
char_recall=[]
cer=[]
metrics_tools=[]
times = []
for file in os.listdir(folder_path):
if file.endswith(".jpg") or file.endswith(".png"):
start_time = time.time()
#Loading drawing
if file.endswith('.pdf') or file.endswith(".PDF"):
img = convert_from_path(os.path.join(folder_path, file))
img = np.array(img[0])
else:
img = cv2.imread(os.path.join(folder_path, file))
filename = os.path.splitext(os.path.basename(file))[0]
#Segmentation
img_boxes, frame, gdt_boxes, tables, dim_boxes = tools.layer_segm.segment_img(img, autoframe = True, frame_thres=0.7, GDT_thres = 0.02, binary_thres=127)
#Tables
process_img = img.copy()
table_results, updated_tables, process_img= tools.ocr_pipelines.ocr_tables(tables, process_img , language)
#G&DTs
gdt_results, updated_gdt_boxes, process_img = tools.ocr_pipelines.ocr_gdt(process_img, gdt_boxes, recognizer_gdt)
#Dimensions
process_img = process_img[frame.y : frame.y + frame.h, frame.x : frame.x + frame.w]
dimensions, other_info, process_img, dim_pyt = tools.ocr_pipelines.ocr_dimensions(process_img, detector, recognizer_dim, alphabet_dim, dim_boxes, cluster_thres=20, max_img_size=1240, language=language, backg_save=False)
#Masking
mask_img = tools.output_tools.mask_img(img, updated_gdt_boxes, tables, dimensions, frame, other_info)
end_time = time.time()
times.append(end_time-start_time)
#Postprocessing for metric computation
if frame:
offset = (frame.x, frame.y)
else:
offset = (0, 0)
update_dimensions = []
for dim in dimensions:
box = dim[1]
pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)])
update_dimensions.append([dim[0], pts])
for dim in other_info:
box = dim[1]
pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)])
update_dimensions.append(['other_info', pts])
#Metrics computation
dim_metrics, recog_metrics, mask_img, m_t = compute_metrics(os.path.join(folder_path, filename + '.csv'), update_dimensions, mask_img)
#Dimensions
for d in dimensions:
print(d[0])
#Other info
print('---------Other info:----------')
for o in other_info:
print(o[0])
print(dim_metrics)
print(recog_metrics)
precision.append(dim_metrics['precision'])
recall.append(dim_metrics['recall'])
iou.append(dim_metrics['IoU'])
char_recall.append(recog_metrics['char_recall'])
cer.append(recog_metrics['CER'])
metrics_tools.append(m_t)
#Display
#cv2.imwrite(file + '.png', mask_img)
'''cv2.imshow('boxes', mask_img)
cv2.waitKey(0)
cv2.destroyAllWindows()'''
print('------------------')
print('Micro Average Results')
print('Precision:', np.mean(precision)*100, '%')
print('Recall:', np.mean(recall)*100, '%')
print('IoU', np.mean(iou)*100, '%')
print('Character Recall', np.mean(char_recall))
print('CER', np.mean(cer)*100, '%')
print('------------------')
print('Macro Average Results')
correct_detections = sum(row[0] for row in metrics_tools)
predictions = sum(row[1] for row in metrics_tools)
print('Precision:', correct_detections/predictions*100, '%')
gt = sum(row[2] for row in metrics_tools)
print('Recall:', correct_detections/gt*100, '%')
iou = [item for row in metrics_tools for item in row[3]]
print('IoU:', np.mean(iou)*100, '%')
correct_char = sum(row[4] for row in metrics_tools)
total_chars = sum(row[5] for row in metrics_tools)
print('Character Recall:', correct_char/total_chars*100, '%')
cum_pred = ''.join(row[6] for row in metrics_tools)
cum_gt = ''.join(row[7] for row in metrics_tools)
print('CER:',tools.train_tools.get_cer(cum_pred, cum_gt)*100, '%')
print('------------------')
print('Average time:', np.mean(times), 's')