ocr / ocr_it.py
jeyanthangj2004's picture
Upload 110 files
3f42a6f verified
import cv2, string, os, time
import numpy as np
from edocr2 import tools
from pdf2image import convert_from_path
def ocr_drawing(file_path, recognizer_gdt, dimension_tuple, #Must have
autoframe = True, language = 'eng', binary_thres= 127, frame_thres = 0.85, #general
GDT_thres = 0.02, #GD&Ts
cluster_thres= 15, max_char = 15, max_img_size=2048, #Dimensions
output_path='.', save_mask=False, save_raw_output=False, backg_save=False #Output
):
#Read file
if file_path.endswith('.pdf') or file_path.endswith(".PDF"):
img = convert_from_path(file_path)
img = np.array(img[0])
else:
img = cv2.imread(file_path)
#Layer Segmentation
times = []
start_time = time.time()
_, frame, gdt_boxes, tables, dim_boxes = tools.layer_segm.segment_img(img, autoframe = autoframe, frame_thres=frame_thres, GDT_thres = GDT_thres, binary_thres= binary_thres)
end_time = time.time()
times.append(end_time-start_time)
print('Segmentation Done')
#OCR Tables
process_img = img.copy()
table_results, updated_tables, process_img = tools.ocr_pipelines.ocr_tables(tables, process_img, language=language)
end_time = time.time()
times.append(end_time-sum(times)-start_time)
print('Prediction on Tables Done')
#GD&T OCR
gdt_results, updated_gdt_boxes, process_img = tools.ocr_pipelines.ocr_gdt(process_img, gdt_boxes,recognizer=recognizer_gdt)
end_time = time.time()
times.append(end_time-sum(times)-start_time)
print('Prediction on GD&T Done')
#Dimension OCR
if frame:
process_img = process_img[frame.y : frame.y + frame.h, frame.x : frame.x + frame.w]
dimensions, other_info, process_img, _ = tools.ocr_pipelines.ocr_dimensions(process_img, dimension_tuple[0], dimension_tuple[1], dimension_tuple[2],
frame, dim_boxes, max_img_size=max_img_size, cluster_thres=cluster_thres, backg_save=backg_save)
end_time = time.time()
times.append(end_time-sum(times)-start_time)
print('Prediction on dimensions and extra information Done')
#Saving
if save_mask or save_raw_output:
filename = os.path.splitext(os.path.basename(file_path))[0]
output_path = os.path.join(output_path, filename)
os.makedirs(output_path, exist_ok=True)
if save_mask:
mask_img = tools.output_tools.mask_img(img, updated_gdt_boxes, updated_tables, dimensions, frame, other_info)
cv2.imwrite(os.path.join(output_path, filename + '_mask.png'), mask_img)
print('Mask saved')
table_results, gdt_results, dimensions, other_info = tools.output_tools.process_raw_output(output_path, table_results, gdt_results, dimensions, other_info, save=save_raw_output)
end_time = time.time()
times.append(end_time-sum(times)-start_time)
print('Raw output saved')
return {'tab': table_results, 'gdts': gdt_results, 'dim': dimensions, 'other': other_info}, times, updated_tables, img, process_img
def ocr_one_drawing(file_path = '/home/javvi51/edocr2/tests/test_samples/4132864.jpg'):
#Session Loading
start_time = time.time()
from edocr2.keras_ocr.recognition import Recognizer
from edocr2.keras_ocr.detection import Detector
gdt_model = 'edocr2/models/recognizer_gdts.keras'
recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model))
recognizer_gdt.model.load_weights(gdt_model)
dim_model = 'edocr2/models/recognizer_dimensions_2.keras'
alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model)
recognizer_dim = Recognizer(alphabet=alphabet_dim)
recognizer_dim.model.load_weights(dim_model)
detector = Detector()
#detector.model.load_weights('edocr2/models/detector_8_31.keras')
#Warming up models:
dummy_image = np.zeros((1, 1, 3), dtype=np.float32)
_ = recognizer_gdt.recognize(dummy_image)
_ = recognizer_dim.recognize(dummy_image)
dummy_image = np.zeros((32, 32, 3), dtype=np.float32)
_ = detector.detect([dummy_image])
end_time = time.time()
kwargs = {
#General
'file_path': file_path, #MUST: Image or pdf path to OCR
'binary_thres': 127, #Pixel value (0-255) to detect contourns, i.e, identify rectangles in the image
'language': 'eng', #Language of the drawing, require installation of tesseract speficic language if not english
'autoframe' : False, #Do we want to spot a frame as the maximum rectangle?
'frame_thres': 0.95, #Frame boundary in % of img, if autoframe, this setting is overruled
#GD&T
'recognizer_gdt': recognizer_gdt, #MUST: A Tuple with (gdt alphabet, model path)
'GDT_thres': 0.02, #Maximum porcentage of the image area to consider a cluster of rectangles a GD&T box
#Dimensions
'dimension_tuple': (detector, recognizer_dim, alphabet_dim), #MUST: A Tuple with (detector, dimension alphabet, model path)
'cluster_thres': 20, #Minimum distance in pixels between two text predictions to consider the same text box
'max_char': 15, #Max number of characters to consider a text prediction a dimension, otherwise -> other info
'max_img_size': 2048, #Max size after applying scale for the img patch, bigger, better prediction and higher times
#Output
'backg_save': False, #Option to save the background once all text and boxes have been removed, for synth training purposes
'output_path': '.', #Output path
'save_mask': True, #Option to save the mask output
'save_raw_output': True, #Option to save raw ouput, i.e, OCR text and box position,
}
results, times, _ = ocr_drawing(**kwargs)
final_time = time.time()
print(
"Session Timing Report:\n"
"Loading session: {:.3f} s\n"
"----------------------\n"
"Drawing segmentation: {:.3f} s\n"
"Table prediction: {:.3f} s\n"
"GD&T prediction: {:.3f} s\n"
"Dimension & other info: {:.3f} s\n"
"Saving & processing: {:.3f} s\n"
"----------------------\n"
"OCR in {}: {:.3f} s\n"
"----------------------\n"
"Total time: {:.3f} s\n"
.format(end_time - start_time, times[0], times[1], times[2], times[3], times[4], os.path.basename(file_path), sum(times), final_time - start_time)
)
def ocr_folder(folder_path = '/home/javvi51/edocr2/tests/test_samples/Washers'):
#Session Loading
start_time = time.time()
from edocr2.keras_ocr.recognition import Recognizer
from edocr2.keras_ocr.detection import Detector
gdt_model = 'edocr2/models/recognizer_gdts.keras'
recognizer_gdt = Recognizer(alphabet=tools.ocr_pipelines.read_alphabet(gdt_model))
recognizer_gdt.model.load_weights(gdt_model)
dim_model = 'edocr2/models/recognizer_dimensions_2.keras'
alphabet_dim = tools.ocr_pipelines.read_alphabet(dim_model)
recognizer_dim = Recognizer(alphabet=alphabet_dim)
recognizer_dim.model.load_weights(dim_model)
detector = Detector()
#detector.model.load_weights('path/to/custom/detector')
#Warming up models:
dummy_image = np.zeros((1, 1, 3), dtype=np.float32)
_ = recognizer_gdt.recognize(dummy_image)
_ = recognizer_dim.recognize(dummy_image)
dummy_image = np.zeros((32, 32, 3), dtype=np.float32)
_ = detector.detect([dummy_image])
end_time = time.time()
file_paths = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, filename))]
times =[]
results = {}
for file_path in file_paths:
kwargs = {
#General
'file_path': file_path, #MUST: Image or pdf path to OCR
'binary_thres': 127, #Pixel value (0-255) to detect contourns, i.e, identify rectangles in the image
'language': 'eng', #Language of the drawing, require installation of tesseract speficic language if not english
'autoframe' : False, #Do we want to spot a frame as the maximum rectangle?
'frame_thres': 0.9, #Frame boundary in % of img, if autoframe, this setting is overruled
#GD&T
'recognizer_gdt': recognizer_gdt, #MUST: A Tuple with (gdt alphabet, model path)
'GDT_thres': 0.02, #Maximum porcentage of the image area to consider a cluster of rectangles a GD&T box
#Dimensions
'dimension_tuple': (detector, recognizer_dim, alphabet_dim), #MUST: A Tuple with (detector, dimension alphabet, model path)
'cluster_thres': 20, #Minimum distance in pixels between two text predictions to consider the same text box
'max_char': 15, #Max number of characters to consider a text prediction a dimension, otherwise -> other info
'max_img_size': 1024, #Max size after applying scale for the img patch, bigger, better prediction, but more computationally expensive
#Output
'backg_save': False, #Option to save the background once all text and boxes have been removed, for synth training purposes
'output_path': 'shit/', #Output path
'save_mask': True, #Option to save the mask output
'save_raw_output': True, #Option to save raw ouput, i.e, OCR text and box position,
}
results_, times_, _ = ocr_drawing(**kwargs)
results[os.path.basename(file_path)]= results_
times.append(sum(times_))
print(
"OCR in {}:\n"
"Drawing segmentation: {:.3f} s\n"
"Table prediction: {:.3f} s\n"
"GD&T prediction: {:.3f} s\n"
"Dimension & other info: {:.3f} s\n"
"Saving & processing: {:.3f} s\n"
"----------------------\n"
"Total time: {:.3f} s\n"
.format(os.path.basename(file_path),times_[0], times_[1], times_[2], times_[3], times_[4], sum(times_)))
final_time = time.time()
print(
"Session Timing Report:\n"
"Loading session: {:.3f} s\n"
"----------------------\n"
.format(end_time - start_time))
for i in range(len(file_paths)):
print("OCR in {}: {:.3f} s".format(os.path.basename(file_paths[i]), times[i]))
print(
"----------------------\n"
"Total time: {:.3f} s\n"
.format(final_time - start_time))
return results