import json, os, glob, cv2, time, csv, math import numpy as np from edocr2 import tools from pdf2image import convert_from_path def process_json_labels(folder_path): def order_points_clockwise(points): # Find the point with the lowest x value (and lowest y if tied) start_point = min(points, key=lambda p: (p[0], p[1])) # Calculate the centroid of the polygon (for angle sorting) centroid_x = sum(p[0] for p in points) / len(points) centroid_y = sum(p[1] for p in points) / len(points) # Function to calculate the angle from the start point to the point def angle_from_start(point): # Calculate the angle using atan2 to get the angle relative to the start point return math.atan2(point[1] - start_point[1], point[0] - start_point[0]) # Sort points based on the angle relative to the start point # Exclude the start point from the sorting sorted_points = sorted((p for p in points if p != start_point), key=angle_from_start) # Construct the final ordered list starting with the lowest x point ordered_points = [start_point] + sorted_points return ordered_points def convert_json_to_coordinates(json_data): converted_data = [] for shape in json_data['shapes']: if shape['shape_type'] == 'rectangle': label = shape['label'] # Extract the top-left and bottom-right points (x1, y1), (x2, y2) = shape['points'] # Calculate the top-right and bottom-left points x3, y3 = x2, y1 # top-right x4, y4 = x1, y2 # bottom-left # Append the formatted output: x1, y1, x2, y2, x3, y3, x4, y4, label converted_data.append([x1, y1, x3, y3, x2, y2, x4, y4, label]) elif shape['shape_type'] == 'polygon': label = shape['label'] # Extract all points in the polygon points = shape['points'] # Order them in clockwise direction ordered_points = order_points_clockwise(points) # Flatten the ordered points and append the label flattened_points = [coord for point in ordered_points for coord in point] converted_data.append(flattened_points + [label]) return converted_data json_files = glob.glob(os.path.join(folder_path, '*.json')) for file in json_files: with open(file, 'r') as f: json_data = json.load(f) converted_data = convert_json_to_coordinates(json_data) output_csv_file = os.path.splitext(file)[0] + '.csv' # Write the converted data to the respective text file with open(output_csv_file, 'w', newline='') as csvfile: csvwriter = csv.writer(csvfile) # Write each converted row into the CSV for item in converted_data: item_int = [int(x) if isinstance(x, float) else x for x in item] csvwriter.writerow(item_int) def compute_metrics(filename, predictions, mask_img, iou_thres = 0.2): correct_detections = 0 detection_iou_scores = [] total_chars, correct_chars = 0, 0 cum_gt, cum_pred = '', '' with open(filename, newline='') as csvfile: ground_truth = [] csvreader = csv.reader(csvfile) for row in csvreader: # Convert coordinates to int and append label ground_truth.append((row[8], np.array([[row[0], row[1]], [row[2], row[3]], [row[4], row[5]], [row[6], row[7]]]))) for gt in ground_truth: best_pred = None best_iou = 0 for pred in predictions: iou = tools.train_tools.calculate_iou(pred[1], gt[1]) if iou > best_iou: best_iou = iou best_pred = pred if best_iou >= iou_thres: if gt[0] != 'other_info' and best_pred[0] != 'other_info': #If dimension detected and correct acc. to gt #Detection correct_detections += 1 detection_iou_scores.append(best_iou) #Recognition label = gt[0] recog = best_pred[0] cum_gt += label cum_pred += recog correct_char = tools.train_tools.compare_characters(label, recog) total_chars += len(label) correct_chars += correct_char gt_dim = [gt for gt in ground_truth if gt[0] != 'other_info'] predictions_dim = [pr for pr in predictions if pr[0] != 'other_info'] precision = correct_detections / len(predictions_dim) if len(predictions_dim) > 0 else 0 recall = correct_detections / len(gt_dim) if len(gt_dim) > 0 else 0 average_iou = np.mean(detection_iou_scores) dim_metrics = {'precision':precision, "recall": recall, 'IoU': average_iou} char_recall = (correct_chars / total_chars) * 100 if total_chars > 0 else 0 cer = tools.train_tools.get_cer(cum_pred, cum_gt) recog_metrics = {'char_recall': char_recall, 'CER': cer} for gt in ground_truth: box = gt[1] pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64) mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2) return dim_metrics, recog_metrics, mask_img, [correct_detections, len(predictions_dim), len(gt_dim), detection_iou_scores, correct_chars, total_chars, cum_pred, cum_gt] folder_path = 'tests/test_samples/' process_json_labels(folder_path) language = 'eng' #region Set Session ############################## start_time = time.time() #os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' import tensorflow as tf from edocr2.keras_ocr.recognition import Recognizer from edocr2.keras_ocr.detection import Detector # Configure GPU memory growth gpus = tf.config.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Load models gdt_model = 'edocr2/models/recognizer_gdts.keras' dim_model = 'edocr2/models/recognizer_dimensions_2.keras' detector_model = None #'edocr2/models/detector_12_46.keras' recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model)) recognizer_gdt.model.load_weights(gdt_model) alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model) recognizer_dim = Recognizer(alphabet=alphabet_dim) recognizer_dim.model.load_weights(dim_model) detector = Detector() if detector_model: detector.model.load_weights(detector_model) end_time = time.time() print(f"\033[1;33mLoading session took {end_time - start_time:.6f} seconds to run.\033[0m") #endregion precision =[] recall=[] iou=[] char_recall=[] cer=[] metrics_tools=[] times = [] for file in os.listdir(folder_path): if file.endswith(".jpg") or file.endswith(".png"): start_time = time.time() #Loading drawing if file.endswith('.pdf') or file.endswith(".PDF"): img = convert_from_path(os.path.join(folder_path, file)) img = np.array(img[0]) else: img = cv2.imread(os.path.join(folder_path, file)) filename = os.path.splitext(os.path.basename(file))[0] #Segmentation img_boxes, frame, gdt_boxes, tables, dim_boxes = tools.layer_segm.segment_img(img, autoframe = True, frame_thres=0.7, GDT_thres = 0.02, binary_thres=127) #Tables process_img = img.copy() table_results, updated_tables, process_img= tools.ocr_pipelines.ocr_tables(tables, process_img , language) #G&DTs gdt_results, updated_gdt_boxes, process_img = tools.ocr_pipelines.ocr_gdt(process_img, gdt_boxes, recognizer_gdt) #Dimensions process_img = process_img[frame.y : frame.y + frame.h, frame.x : frame.x + frame.w] dimensions, other_info, process_img, dim_pyt = tools.ocr_pipelines.ocr_dimensions(process_img, detector, recognizer_dim, alphabet_dim, dim_boxes, cluster_thres=20, max_img_size=1240, language=language, backg_save=False) #Masking mask_img = tools.output_tools.mask_img(img, updated_gdt_boxes, tables, dimensions, frame, other_info) end_time = time.time() times.append(end_time-start_time) #Postprocessing for metric computation if frame: offset = (frame.x, frame.y) else: offset = (0, 0) update_dimensions = [] for dim in dimensions: box = dim[1] pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)]) update_dimensions.append([dim[0], pts]) for dim in other_info: box = dim[1] pts=np.array([(box[0] + offset), (box[1] + offset), (box[2] + offset), (box[3] + offset)]) update_dimensions.append(['other_info', pts]) #Metrics computation dim_metrics, recog_metrics, mask_img, m_t = compute_metrics(os.path.join(folder_path, filename + '.csv'), update_dimensions, mask_img) #Dimensions for d in dimensions: print(d[0]) #Other info print('---------Other info:----------') for o in other_info: print(o[0]) print(dim_metrics) print(recog_metrics) precision.append(dim_metrics['precision']) recall.append(dim_metrics['recall']) iou.append(dim_metrics['IoU']) char_recall.append(recog_metrics['char_recall']) cer.append(recog_metrics['CER']) metrics_tools.append(m_t) #Display #cv2.imwrite(file + '.png', mask_img) '''cv2.imshow('boxes', mask_img) cv2.waitKey(0) cv2.destroyAllWindows()''' print('------------------') print('Micro Average Results') print('Precision:', np.mean(precision)*100, '%') print('Recall:', np.mean(recall)*100, '%') print('IoU', np.mean(iou)*100, '%') print('Character Recall', np.mean(char_recall)) print('CER', np.mean(cer)*100, '%') print('------------------') print('Macro Average Results') correct_detections = sum(row[0] for row in metrics_tools) predictions = sum(row[1] for row in metrics_tools) print('Precision:', correct_detections/predictions*100, '%') gt = sum(row[2] for row in metrics_tools) print('Recall:', correct_detections/gt*100, '%') iou = [item for row in metrics_tools for item in row[3]] print('IoU:', np.mean(iou)*100, '%') correct_char = sum(row[4] for row in metrics_tools) total_chars = sum(row[5] for row in metrics_tools) print('Character Recall:', correct_char/total_chars*100, '%') cum_pred = ''.join(row[6] for row in metrics_tools) cum_gt = ''.join(row[7] for row in metrics_tools) print('CER:',tools.train_tools.get_cer(cum_pred, cum_gt)*100, '%') print('------------------') print('Average time:', np.mean(times), 's')