Spaces:
Runtime error
Runtime error
| import cv2, math, os | |
| import numpy as np | |
| def read_alphabet(keras_path): | |
| txt_path = os.path.splitext(keras_path)[0] + '.txt' | |
| with open(txt_path, 'r') as file: | |
| content = file.readline().strip() | |
| return content | |
| ###################### Tables and Others Pipeline ################################# | |
| def ocr_img_cv2(image_cv2, language = None, psm = 11): | |
| """Recognize text in an OpenCV image using pytesseract and return both text and positions. | |
| Args: | |
| image_cv2: OpenCV image object. | |
| Returns: | |
| A list of dictionaries containing recognized text and their positions (left, top, width, height). | |
| """ | |
| import pytesseract | |
| # Convert the OpenCV image to RGB format (pytesseract expects this) | |
| img_rgb = cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB) | |
| # Custom configuration to recognize a more complete set of characters | |
| if language: | |
| custom_config = f'--psm {psm} -l {language}' | |
| else: | |
| custom_config = f'--psm {psm}' | |
| # Perform OCR and get bounding box details | |
| ocr_data = pytesseract.image_to_data(img_rgb, config=custom_config, output_type=pytesseract.Output.DICT) | |
| # Prepare result: text with their positions | |
| result = [] | |
| all_text = '' | |
| for i in range(len(ocr_data['text'])): | |
| if ocr_data['text'][i].strip(): # If text is not empty | |
| text_info = { | |
| 'text': ocr_data['text'][i], | |
| 'left': ocr_data['left'][i], | |
| 'top': ocr_data['top'][i], | |
| 'width': ocr_data['width'][i], | |
| 'height': ocr_data['height'][i] | |
| } | |
| all_text += ocr_data['text'][i] | |
| result.append(text_info) | |
| return result, all_text | |
| def ocr_tables(tables, process_img, language = None): | |
| results = [] | |
| updated_tables = [] | |
| tables = sorted(tables, key=lambda cluster_dict: next(iter(cluster_dict)).y * 10000 + next(iter(cluster_dict)).x, reverse=True) | |
| for table in tables: | |
| for b in table: | |
| img = process_img[b.y : b.y + b.h, b.x : b.x + b.w][:] | |
| result, all_text = ocr_img_cv2(img, language) | |
| if result == [] or len(all_text) < 5: | |
| continue | |
| else: | |
| for r in result: | |
| r['left'] += b.x | |
| r['top'] += b.y | |
| results.append(result) | |
| updated_tables.append(table) | |
| for table in updated_tables: | |
| for b in table: | |
| process_img[b.y : b.y + b.h, b.x : b.x + b.w][:] = 255 | |
| return results, updated_tables, process_img | |
| ##################### GDT Pipeline ##################################### | |
| def img_not_empty(roi, color_thres = 100): | |
| # Convert the ROI to grayscale | |
| gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) | |
| # Check if all pixels are near black or near white | |
| min_val, max_val, _, _ = cv2.minMaxLoc(gray_roi) | |
| # If the difference between min and max pixel values is greater than the threshold, the box contains color | |
| if (max_val - min_val) < color_thres: | |
| return False | |
| return True | |
| def is_not_empty(img, boxes, color_thres): | |
| for box in boxes: | |
| # Extract the region of interest (ROI) from the image | |
| roi = img[box.y + 2:box.y + box.h - 4, box.x + 2:box.x + box.w -4] | |
| if img_not_empty(roi, color_thres) == False: | |
| return False | |
| return True | |
| def sort_gdt_boxes(boxes, y_thres = 3): | |
| """Sorts boxes in reading order: left-to-right, then top-to-bottom. | |
| Args: | |
| boxes: List of Rect objects or any object with x, y, w, h attributes. | |
| y_threshold: A threshold to group boxes that are on the same line (default is 10 pixels). | |
| Returns: | |
| A list of boxes sorted in reading order. | |
| """ | |
| # Sort by the y-coordinate first (top-to-bottom) | |
| boxes.sort(key=lambda b: b.y) | |
| sorted_boxes = [] | |
| current_line = [] | |
| current_y = boxes[0].y | |
| for box in boxes: | |
| # If the box's y-coordinate is close to the current line's y-coordinate, add it to the same line | |
| if abs(box.y - current_y) <= y_thres: | |
| current_line.append(box) | |
| else: | |
| # Sort the current line by x-coordinate (left-to-right) | |
| current_line.sort(key=lambda b: b.x) | |
| sorted_boxes.extend(current_line) | |
| # Start a new line with the current box | |
| current_line = [box] | |
| current_y = box.y | |
| # Sort the last line and add it | |
| current_line.sort(key=lambda b: b.x) | |
| sorted_boxes.extend(current_line) | |
| return sorted_boxes | |
| def recognize_gdt(img, block, recognizer): | |
| roi = img[block[0].y + 2:block[0].y + block[0].h - 4, block[0].x + 2:block[0].x + block[0].w - 4] | |
| pred = recognizer.recognize(image = roi) | |
| #cv2.imwrite(f"{0}.png", roi) | |
| for i in range(1, len(block)): | |
| new_line = block[i].y - block[i - 1].y > 5 | |
| roi = img[block[i].y:block[i].y + block[i].h, block[i].x:block[i].x + block[i].w] | |
| p = recognizer.recognize(image = roi) | |
| #cv2.imwrite(f"{i}.png", roi) | |
| if new_line: | |
| pred += '\n' + p | |
| else: | |
| pred += '|' + p | |
| if any(char.isdigit() for char in pred): | |
| return pred | |
| else: | |
| return None | |
| def ocr_gdt(img, gdt_boxes, recognizer): | |
| updated_gdts = [] | |
| results = [] | |
| if gdt_boxes: | |
| for block in gdt_boxes: | |
| for _, bl_list in block.items(): | |
| if is_not_empty(img, bl_list, 50): | |
| sorted_block = sort_gdt_boxes(bl_list, 3) | |
| pred = recognize_gdt(img, sorted_block, recognizer) | |
| if pred: | |
| updated_gdts.append(block) | |
| results.append([pred, (sorted_block[0].x, sorted_block[0].y)]) | |
| for gdt in updated_gdts: | |
| for g in gdt.values(): | |
| for b in g: | |
| img[b.y - 5 : b.y + b.h + 10, b.x - 5 : b.x + b.w + 10][:] = 255 | |
| return results, updated_gdts, img | |
| ##################### Dimension Pipeline ############################### | |
| class Pipeline: | |
| """A wrapper for a combination of detector and recognizer. | |
| Args: | |
| detector: The detector to use | |
| recognizer: The recognizer to use | |
| scale: The scale factor to apply to input images | |
| max_size: The maximum single-side dimension of images for | |
| inference. | |
| """ | |
| def __init__(self, detector, recognizer, alphabet_dimensions, cluster_t = 20, scale = 2, matching_t = 0.6, max_size = 1024, language = 'eng'): | |
| self.scale = scale | |
| self.detector = detector | |
| self.recognizer = recognizer | |
| self.max_size = max_size | |
| self.language = language | |
| self.alphabet_dimensions = alphabet_dimensions | |
| self.cluster_t = cluster_t | |
| self.matching_t = matching_t | |
| def symbol_search(self, img, dimensions, folder_code = 'u2300', char = '⌀'): | |
| def template_matching(img_, cnts, folder_path, thres, angle, xy2, rotate): | |
| angle = math.radians(angle) | |
| box_points = None | |
| for cnt in cnts: | |
| x, y, w, h = cv2.boundingRect(cnt) | |
| if h > img_.shape[0]*0.3: | |
| img_2 = img_[y:y + h, x:x + w] | |
| y_pad, x_pad = int(img_2.shape[0]*0.3), 40 | |
| pad_img = cv2.copyMakeBorder(img_2, y_pad, y_pad, x_pad, x_pad, cv2.BORDER_CONSTANT, value=[255,255,255]) | |
| #cv2.imshow('pads', pad_img) | |
| for file in os.listdir(folder_path): | |
| symb = cv2.imread(os.path.join(folder_path, file)) | |
| if rotate: | |
| cv2.rotate(symb,cv2.ROTATE_90_COUNTERCLOCKWISE) | |
| gray = cv2.cvtColor(symb, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) | |
| contours_smb, _ = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| x_, y_, w_, h_ = cv2.boundingRect(contours_smb[0]) | |
| symb_img = symb[y_:y_ + h_, x_:x_ + w_] | |
| # Calculate scale factor to resize the symbol to the target height | |
| scale_factor = h / h_ | |
| if scale_factor < 2: | |
| scaled_symb = cv2.resize(symb_img, (0, 0), fx=scale_factor, fy=scale_factor) | |
| # Perform template matching | |
| result = cv2.matchTemplate(pad_img, scaled_symb, cv2.TM_CCOEFF_NORMED) | |
| _, max_val, _, _ = cv2.minMaxLoc(result) | |
| if max_val >= thres: | |
| local = [ | |
| (x, y), # top-left | |
| (x + w, y), # top-right | |
| (x + w, y + h), # bottom-right | |
| (x, y + h ) # bottom-left | |
| ] | |
| box_points = [ | |
| (xy2[0] + math.cos(angle)*local[0][0] - math.sin(angle)*local[0][1] , xy2[1] + math.cos(angle)*local[0][1] + math.sin(angle)*local[0][0]), # top-left | |
| (xy2[0] + math.cos(angle)*local[1][0] - math.sin(angle)*local[1][1] , xy2[1] + math.cos(angle)*local[1][1] + math.sin(angle)*local[1][0]), # top-right | |
| (xy2[0] + math.cos(angle)*local[2][0] - math.sin(angle)*local[2][1] , xy2[1] + math.cos(angle)*local[2][1] + math.sin(angle)*local[2][0]), # bottom-right | |
| (xy2[0] + math.cos(angle)*local[3][0] - math.sin(angle)*local[3][1] , xy2[1] + math.cos(angle)*local[3][1] + math.sin(angle)*local[3][0]) # bottom-left | |
| ] | |
| #cv2.imshow('symb', scaled_symb) | |
| #cv2.circle(mask_img, (int(xy2[0]), int(xy2[1])), radius=1, color=(255, 0, 0), thickness=-1) | |
| thres = max_val | |
| #cv2.waitKey(0) | |
| #cv2.destroyAllWindows() | |
| return box_points | |
| from shapely.geometry import Polygon | |
| from shapely.ops import unary_union | |
| mask_img = img.copy() | |
| old_dim, new_dimensions, boxes = [], [], [] | |
| folder_path = os.path.join('edocr2/tools/symbol_match', folder_code) | |
| for dim in dimensions: | |
| #filter out dim wit diameter symbol: | |
| if char in dim[0]: | |
| continue | |
| else: | |
| rect = cv2.minAreaRect(np.array(dim[1], dtype=np.float32)) | |
| if len(dim[0]) == 1: | |
| #Expansion on the short side | |
| w_multiplier, h_multiplier = 1.3, max([2*min(rect[1]), 300])/min(rect[1]) | |
| img_, cnts, angle = postprocess_detection(img, dim[1], w_multiplier, h_multiplier, 5) | |
| scaled_rect = (rect[0], (img_.shape[0], img_.shape[1]), angle-90) | |
| rotate = True | |
| else: | |
| #Expansion on the long side | |
| w_multiplier, h_multiplier = max([2*max(rect[1]), 300])/ max(rect[1]), 1.3 | |
| img_, cnts, angle = postprocess_detection(img, dim[1], w_multiplier, h_multiplier, 5) | |
| scaled_rect = (rect[0], (img_.shape[0], img_.shape[1]), angle-90) | |
| rotate = False | |
| '''cv2.imshow('matches', img_) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows()''' | |
| polygon_ = cv2.boxPoints(scaled_rect) | |
| xy2 = (rect[0][0] - scaled_rect[1][1]/2*math.cos(math.radians(angle)) + scaled_rect[1][0]/2*math.sin(math.radians(angle)), | |
| rect[0][1] - scaled_rect[1][1]/2*math.sin(math.radians(angle)) - scaled_rect[1][0]/2*math.cos(math.radians(angle))) | |
| box= list(polygon_) | |
| '''pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64) | |
| mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 0, 255), thickness=2)''' | |
| box = template_matching(img_, cnts, folder_path, self.matching_t, angle, xy2, rotate) | |
| if box: | |
| pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64) | |
| mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2) | |
| poly2 = Polygon(box) | |
| poly1 = Polygon(cv2.boxPoints(rect)) | |
| merged_poly = unary_union([poly1, poly2]) | |
| final_box = merged_poly.minimum_rotated_rectangle.exterior.coords[0:4] | |
| #new_dim, _, _ = self.recognize_dimensions(np.int32([final_box]), np.array(img)) | |
| boxes.append(final_box) | |
| old_dim.append(dim) | |
| '''cv2.imshow('matches', mask_img) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows() ''' | |
| for o in old_dim: | |
| dimensions.remove(o) | |
| boxes = group_polygons_by_proximity(boxes, eps = self.cluster_t) | |
| new_group = [box for box in boxes] | |
| new_dimensions, _, _ = self.recognize_dimensions(np.int32(new_group), np.array(img)) | |
| for nd in new_dimensions: | |
| if char in nd[0]: | |
| dimensions.append(nd) | |
| elif nd[0][0] in set('0,).D:Z°Bx'): | |
| dimensions.append((char + nd[0][1:], nd[1])) | |
| else: | |
| dimensions.append((char + nd[0], nd[1])) | |
| '''if new_dimensions: | |
| cv2.imshow('matches', mask_img) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows()''' | |
| return dimensions | |
| def detect(self, img, detection_kwargs = None): | |
| """Run the pipeline on one or multiples images. | |
| Args: | |
| images: The images to parse (numpy array) | |
| detection_kwargs: Arguments to pass to the detector call | |
| recognition_kwargs: Arguments to pass to the recognizer call | |
| Returns: | |
| A list of lists of (text, box) tuples. | |
| """ | |
| from edocr2.keras_ocr.tools import adjust_boxes | |
| if np.max((img.shape[0], img.shape[1])) < self.max_size / self.scale: | |
| scale = self.scale | |
| else: | |
| scale = self.max_size / np.max((img.shape[0], img.shape[1])) | |
| if detection_kwargs is None: | |
| detection_kwargs = {} | |
| new_size = (int(img.shape[1]* scale), int(img.shape[0]* scale)) | |
| img = cv2.resize(img, new_size, interpolation=cv2.INTER_LINEAR) | |
| box_groups = self.detector.detect(images=[img], **detection_kwargs) | |
| box_groups = [ | |
| adjust_boxes(boxes=boxes, boxes_format="boxes", scale=1 / scale) | |
| if scale != 1 | |
| else boxes | |
| for boxes, scale in zip(box_groups, [scale]) | |
| ] | |
| return box_groups | |
| def ocr_the_rest(self, img, lang): | |
| def sort_boxes_by_centers(boxes, y_threshold=20): | |
| # Sort primarily by the y_center (top-to-bottom), and secondarily by x_center (left-to-right) | |
| sorted_boxes = sorted(boxes, key=lambda box: (box['top'], box['left'])) # Sort by (y_center, x_center) | |
| final_sorted_text = "" | |
| current_line = [] | |
| current_y = sorted_boxes[0]['top'] # y_center of the first box | |
| for box in sorted_boxes: | |
| if abs(box['top'] - current_y) <= y_threshold: # If y_center is within threshold, same line | |
| current_line.append(box) | |
| else: | |
| # Sort the current line by x_center (left-to-right) | |
| current_line = sorted(current_line, key=lambda b: b['left']) # Sort by x_center | |
| line_text = ' '.join([b['text'] for b in current_line]) # Join text in current line | |
| final_sorted_text += line_text + '\n' # Add the text for the line and a newline | |
| current_line = [box] # Start a new line | |
| current_y = box['top'] | |
| # Sort the last line and add to final result | |
| current_line = sorted(current_line, key=lambda b: b['left']) | |
| line_text = ' '.join([b['text'] for b in current_line]) | |
| final_sorted_text += line_text # No newline for the last line | |
| return final_sorted_text | |
| results, _ = ocr_img_cv2(img, lang) | |
| if results: | |
| text = sort_boxes_by_centers(results) | |
| return text | |
| return '' | |
| def dimension_criteria(self, img): | |
| pred_nor = self.ocr_the_rest(img, 'nor') #Norwegian include a char for the o-slash (Ø and ø) Convinient for the diameter recognition ⌀ | |
| pred_eng = self.ocr_the_rest(img, 'eng') #However, its performance is worse than english, can't trust it | |
| allowed_exceptions_nor = set('''-.»Ø,/!«Æ()Å:'"[];|“?Ö=*Ä”&É<>+$£%—€øåæöéIZNOoPXiLlk \n''') | |
| allowed_exceptions_eng = set('''?—!@#~;¢«#_%\&€$»[é]®§¥©‘™="~'£<*“”I|ZNOXiLlk \n''') | |
| ok_nor = all(char in set(self.alphabet_dimensions) or char in allowed_exceptions_nor for char in pred_nor) | |
| ok_eng = all(char in set(self.alphabet_dimensions) or char in allowed_exceptions_eng for char in pred_eng) | |
| if ok_nor or ok_eng or len(pred_eng) < 2 or len(pred_nor) < 2: | |
| return True #In any case, any prediction can yet be fully trusted, the edocr recognizer should perform better, if the chars are present | |
| return False | |
| def recognize_dimensions(self, box_groups, img): | |
| predictions=[] | |
| predictions_pyt=[] | |
| other_info=[] | |
| def adjust_padding(img): | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV) | |
| cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns | |
| if cnts: | |
| x, y, w, h = cv2.boundingRect(np.concatenate(cnts)) | |
| # Crop the image using the bounding box | |
| img = img[y:y+h, x:x+w] | |
| img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=[255,255,255]) | |
| return img | |
| def adjust_stroke(img): | |
| # Create an empty image to store the final result | |
| img_ = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(img_, 200, 255, cv2.THRESH_BINARY_INV) | |
| contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns | |
| final_img = np.full_like(img_, 255) | |
| stroke_averages = [] | |
| subimages =[] | |
| for contour in contours: | |
| # Get the bounding rectangle for the current contour | |
| x, y, w, h = cv2.boundingRect(contour) | |
| # Create a subimage using the bounding rectangle | |
| subimage = np.full_like(img_, 255) | |
| subimage[y:y+h, x:x+w] = img_[y:y+h, x:x+w] | |
| subimages.append(subimage) | |
| counts =[] | |
| # Accumulate all run lengths | |
| for i in range(y, y + h): | |
| row = subimage[i, :] | |
| classified = row < 180 | |
| current_length = 0 | |
| for val in classified: | |
| if val: # If True, increase current segment length | |
| current_length += 1 | |
| else: | |
| if current_length > 0: # When False, store the segment length if it exists | |
| counts.extend([current_length]) | |
| current_length = 0 | |
| # Append the last segment if it ends with a True value | |
| if current_length > 0: | |
| counts.extend([current_length]) | |
| outliers = find_outliers(counts, 1.5) | |
| filtered_counts = [c for c in counts if c not in outliers] | |
| avg_stroke = np.mean(filtered_counts) | |
| stroke_averages.append(avg_stroke) | |
| outliers = find_outliers(stroke_averages, 3) | |
| if len(outliers) > 0 or any(st < 2.5 for st in stroke_averages): | |
| for i in range(len(contours)): | |
| processed_subimage = subimages[i] | |
| # Apply dilation or erosion based on the average stroke | |
| if len(outliers) > 0 and len(stroke_averages) < 2: | |
| if stroke_averages[i] < np.min(outliers) or stroke_averages[i] < 2.5: | |
| # Dilation | |
| kernel = np.ones((3, 3), np.uint8) | |
| processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1) | |
| elif len(stroke_averages) == 2: | |
| if np.max(stroke_averages) - stroke_averages[i] > 1.5 or stroke_averages[i] < 2.5: | |
| # Dilation | |
| kernel = np.ones((3, 3), np.uint8) | |
| processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1) | |
| else: | |
| if stroke_averages[i] < 2.5: | |
| # Dilation | |
| kernel = np.ones((3, 3), np.uint8) | |
| processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1) | |
| _, thresh = cv2.threshold(processed_subimage, 200, 255, cv2.THRESH_BINARY_INV) | |
| cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns | |
| x, y, w, h = cv2.boundingRect(cnts[0]) | |
| final_img[y:y+h, x:x+w] = processed_subimage[y:y+h, x:x+w] | |
| return cv2.cvtColor(final_img, cv2.COLOR_GRAY2BGR) | |
| return img | |
| def pad_image(img, pad_percent): | |
| y_pad, x_pad = int(img.shape[0]*pad_percent), int(img.shape[1]*pad_percent) | |
| pad_img = cv2.copyMakeBorder(img, y_pad, y_pad, x_pad, x_pad, cv2.BORDER_CONSTANT, value=[255,255,255]) | |
| return pad_img | |
| for box in box_groups: | |
| img_croped, cnts, _ = postprocess_detection(img, box) | |
| if len(cnts)==1: | |
| #pred=self.recognizer.recognize(image=cv2.rotate(img_croped,cv2.ROTATE_90_COUNTERCLOCKWISE)) | |
| img_croped=cv2.rotate(img_croped,cv2.ROTATE_90_COUNTERCLOCKWISE) | |
| pred = self.recognizer.recognize(image=img_croped) | |
| if pred.isdigit(): | |
| predictions.append((pred, box)) | |
| else: | |
| pytess_img = pad_image(img_croped, 0.3) | |
| if self.dimension_criteria(pytess_img): | |
| arr=check_tolerances(img_croped) | |
| pred='' | |
| for img_ in arr: | |
| img_ = adjust_padding(img_) | |
| if img_.shape[0] *img_.shape[1] > 1200: | |
| img_ = adjust_stroke(img_) | |
| '''cv2.imshow('pred', img_) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows()''' | |
| pred_ = self.recognizer.recognize(image=img_) + ' ' | |
| if pred_==' ': | |
| pred=self.recognizer.recognize(image=img_croped)+' ' | |
| break | |
| else: | |
| pred += pred_ | |
| if any(char.isdigit() for char in pred): | |
| predictions.append((pred[:-1], box)) | |
| else: | |
| pred_pyt = self.ocr_the_rest(pytess_img, self.language) | |
| other_info.append((pred_pyt, box)) | |
| else: | |
| pred_pyt = self.ocr_the_rest(pytess_img, self.language) | |
| other_info.append((pred_pyt, box)) | |
| return predictions, other_info, predictions_pyt | |
| def ocr_img_patches(self, img, ol = 0.05): | |
| ''' | |
| This functions split the original images into patches and send it to the text detector. | |
| Groupes the predictions and recognize the text. | |
| Input: img | |
| patches : number of patches in both axis | |
| ol: overlap between patches | |
| cluster_t: threshold for grouping | |
| ''' | |
| patches = (int(img.shape[1] / self.max_size + 2), int(img.shape[0] / self.max_size + 2)) | |
| a_x = int((1 - ol) / (patches[0]) * img.shape[1]) # % of img covered in a patch (horizontal stride) | |
| b_x = a_x + int(ol* img.shape[1]) # Size of horizontal patch in % of img | |
| a_y = int((1 - ol) / (patches[1]) * img.shape[0]) # % of img covered in a patch (vertical stride) | |
| b_y = a_y + int(ol * img.shape[0]) # Size of horizontal patch in % of img | |
| box_groups = [] | |
| for i in range(0, patches[0]): | |
| for j in range(0, patches[1]): | |
| offset = (a_x * i, a_y * j) | |
| patch_boundary = (i * a_x + b_x, j * a_y + b_y) | |
| img_patch = img[offset[1] : patch_boundary[1], | |
| offset[0] : patch_boundary[0]] | |
| if img_not_empty(img_patch, 100): | |
| box_group=self.detect(img_patch) | |
| for b in box_group: | |
| for xy in b: | |
| xy = xy + offset | |
| box_groups.append(xy) | |
| '''mask_img = img.copy() | |
| for box in box_groups: | |
| pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.dtype('int32')) | |
| mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 127, 255), thickness=2)''' | |
| box_groups = group_polygons_by_proximity(box_groups, eps = self.cluster_t) | |
| box_groups = group_polygons_by_proximity(box_groups, eps = self.cluster_t-5) #To double check if still overlapping | |
| print('Detection finished. Starting Recognition...') | |
| new_group = [box for box in box_groups] | |
| '''for box in box_groups: | |
| pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.dtype('int32')) | |
| mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(255, 127, 0), thickness=2) | |
| cv2.imwrite('detect.png', mask_img)''' | |
| dimensions, other_info, dimensions_pyt = self.recognize_dimensions(np.int32(new_group), np.array(img)) | |
| print('Recognition finished. Performing template matching...') | |
| dimensions = self.symbol_search(img, dimensions) | |
| return dimensions, other_info, dimensions_pyt | |
| def group_polygons_by_proximity(polygons, eps=20): | |
| from shapely.geometry import Polygon, MultiPolygon | |
| from shapely.ops import unary_union | |
| def polygon_intersects_or_close(p1, p2, eps): | |
| """ | |
| Check if two polygons either intersect or are within the distance threshold `eps`. | |
| """ | |
| # Create Polygon objects from the arrays | |
| poly1 = Polygon(p1) | |
| poly2 = Polygon(p2) | |
| # Check if the polygons intersect | |
| if poly1.intersects(poly2): | |
| return True | |
| # If not, check the minimum distance between their boundaries | |
| return poly1.distance(poly2) <= eps | |
| n = len(polygons) | |
| parent = list(range(n)) # Union-find structure to track connected components | |
| def find(x): | |
| if parent[x] != x: | |
| parent[x] = find(parent[x]) | |
| return parent[x] | |
| def union(x, y): | |
| rootX = find(x) | |
| rootY = find(y) | |
| if rootX != rootY: | |
| parent[rootX] = rootY | |
| # Compare all polygon pairs | |
| for i in range(n): | |
| for j in range(i + 1, n): | |
| if polygon_intersects_or_close(polygons[i], polygons[j], eps): | |
| union(i, j) | |
| # Group polygons by connected components and merge them | |
| grouped_polygons = {} | |
| for i in range(n): | |
| root = find(i) | |
| if root not in grouped_polygons: | |
| grouped_polygons[root] = [] | |
| grouped_polygons[root].append(polygons[i]) | |
| # Now merge the polygons in each group | |
| merged_polygons = [] | |
| for group in grouped_polygons.values(): | |
| # Collect all points from the polygons in this group | |
| all_points = [] | |
| for polygon in group: | |
| all_points.extend(polygon) | |
| # Use Shapely to create a merged polygon | |
| merged_polygon = unary_union([Polygon(p) for p in group]) | |
| # Convert to coordinates for OpenCV to find the min-area bounding box | |
| if isinstance(merged_polygon, MultiPolygon): | |
| merged_polygon = unary_union(merged_polygon) | |
| if merged_polygon.is_empty: | |
| continue | |
| # Find the minimum rotated bounding box for the merged polygon | |
| min_rotated_box = merged_polygon.minimum_rotated_rectangle.exterior.coords[0:4] | |
| # Add the resulting rotated box to the list | |
| merged_polygons.append(min_rotated_box) | |
| return merged_polygons | |
| def check_tolerances(img): | |
| img_arr = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Convert img to grayscale | |
| flag=False | |
| tole = False | |
| ## Find top and bottom line | |
| for i in range(0, img_arr.shape[0] - 1): # find top line | |
| for j in range(0,img_arr.shape[1] - 1): | |
| if img_arr[i, j] < 200: | |
| top_line = i | |
| flag = True | |
| break | |
| if flag == True: | |
| flag = False | |
| break | |
| for i in range(img_arr.shape[0] - 1, top_line, -1): # find bottom line | |
| for j in range(0, img_arr.shape[1] - 1): | |
| if img_arr[i, j] < 200: | |
| bot_line = i | |
| flag = True | |
| break | |
| if flag == True: | |
| break | |
| ##Measure distance from right end backwards until it finds a black pixel from top line to bottom line | |
| stop_at = [] | |
| for i in range(top_line, bot_line): | |
| for j in range(img_arr.shape[1] -1, 0, -1): | |
| if img_arr[i,j] < 200: | |
| stop_at.append(img_arr.shape[1] - j) | |
| break | |
| else: | |
| stop_at.append(img_arr.shape[1]) | |
| ##Is there a normalized distance (l) relatively big with respect the others? | |
| for d in stop_at[int(0.3 * len(stop_at)): int(0.7 * len(stop_at))]: | |
| if d > img_arr.shape[0] * 0.8: | |
| tole = True | |
| tole_h_cut = stop_at.index(d) + top_line + 1 | |
| break | |
| else: | |
| tole = False | |
| #If yes -> Find last character from the measurement (no tolerance) | |
| if tole == True: | |
| if d < img_arr.shape[1]: #handle error | |
| tole_v_cut = None | |
| for j in range(img_arr.shape[1] - d, img_arr.shape[1]): | |
| if np.all(img_arr[int(0.3 * img_arr.shape[0]): int(0.7 * img_arr.shape[0]), j] > 200): | |
| tole_v_cut=j+2 | |
| break | |
| #-> crop images | |
| if tole_v_cut: #handle error | |
| try: | |
| measu_box = img_arr[:, :tole_v_cut] | |
| up_tole_box = img_arr[:tole_h_cut, tole_v_cut:] | |
| bot_tole_box = img_arr[tole_h_cut:, tole_v_cut:] | |
| return [cv2.cvtColor(measu_box, cv2.COLOR_GRAY2BGR), cv2.cvtColor(up_tole_box, cv2.COLOR_GRAY2BGR), cv2.cvtColor(bot_tole_box, cv2.COLOR_GRAY2BGR)] | |
| except: | |
| return [img] | |
| else: | |
| up_text=img_arr[:tole_h_cut, :] | |
| bot_text=img_arr[tole_h_cut:, :] | |
| return [cv2.cvtColor(up_text, cv2.COLOR_GRAY2BGR), cv2.cvtColor(bot_text, cv2.COLOR_GRAY2BGR)] | |
| return [img] | |
| def find_outliers(counts, t): | |
| # Use peak filtering on black_pixel_counts | |
| counts = np.array(counts) | |
| # Filter the peaks based on 70% of the maximum value | |
| mean = np.mean(counts) | |
| std = np.std(counts) | |
| # Calculate Z-scores | |
| z_scores = (counts - mean) / std | |
| # Identify outliers | |
| return counts[np.abs(z_scores) > t] | |
| def postprocess_detection(img, box, w_multiplier = 1.0, h_multiplier = 1.0, angle_t = 5): | |
| def get_box_angle(box): | |
| exp_box = np.vstack((box[3], box, box[0])) | |
| i = np.argmax(box[:, 1]) | |
| B = box[i] | |
| A = exp_box[i] | |
| C = exp_box[i + 2] | |
| AB_ = math.sqrt((A[0] - B[0]) ** 2 + (A[1] - B[1]) ** 2) | |
| BC_ = math.sqrt((C[0] - B[0]) ** 2+(C[1] - B[1])** 2) | |
| m = np.array([(A, AB_), (C, BC_)], dtype = object) | |
| j = np.argmax(m[:, 1]) | |
| O = m[j, 0] | |
| if B[0] == O[0]: | |
| alfa = math.pi / 2 | |
| else: | |
| alfa = math.atan((O[1] - B[1]) / (O[0] - B[0])) | |
| if alfa == 0: | |
| return alfa / math.pi * 180 | |
| elif B[0] < O[0]: | |
| return - alfa / math.pi * 180 | |
| else: | |
| return (math.pi - alfa) / math.pi * 180 | |
| def adjust_angle(alfa, i = 5): | |
| if -i < alfa < 90 - i: | |
| return - round(alfa / i)*i | |
| elif 90 - i < alfa < 90 + i: | |
| return round(alfa / i) * i - 180 | |
| elif 90 + i < alfa < 180 + i: | |
| return 180 - round(alfa / i) * i | |
| else: | |
| return alfa | |
| def subimage(image, center, theta, width, height): | |
| ''' | |
| Rotates OpenCV image around center with angle theta (in deg) | |
| then crops the image according to width and height. | |
| ''' | |
| padded_image =cv2.copyMakeBorder(image, 300, 300, 300, 300, cv2.BORDER_CONSTANT, value=(255, 255, 255)) | |
| shape = (padded_image.shape[1], padded_image.shape[0]) # cv2.warpAffine expects shape in (length, height) | |
| padded_center = (center[0] + 300, center[1] + 300) | |
| matrix = cv2.getRotationMatrix2D(center=padded_center, angle=theta, scale=1) | |
| image = cv2.warpAffine(src=padded_image, M=matrix, dsize=shape) | |
| x, y = (int( padded_center[0] - width/2 ),int( padded_center[1] - height/2 )) | |
| x2, y2 = x + width, y + height | |
| if x < 0: x = 0 | |
| if x2 > shape[0]: x2 = shape[0] | |
| if y < 0: y= 0 | |
| if y2 > shape[1]: y2 = shape[1] | |
| image = image[ y:y2, x:x2 ] | |
| return image | |
| def clean_h_lines(img_croped): | |
| gray = cv2.cvtColor(img_croped, cv2.COLOR_BGR2GRAY) #Convert img to grayscale | |
| _,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY_INV) #Threshold to binary image | |
| horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(img_croped.shape[1]*0.8),1)) | |
| detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) | |
| cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
| for c in cnts: | |
| img_croped = cv2.drawContours(img_croped, [c], -1, (255,255,255), 3) | |
| vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,int(img_croped.shape[1]*0.9))) | |
| detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) | |
| cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
| for c in cnts: | |
| img_croped = cv2.drawContours(img_croped, [c], -1, (255,255,255), 3) | |
| return img_croped, thresh | |
| def intel_pad(image, box, increment=3): | |
| def has_black_pixels(image, points): | |
| mask = np.zeros(image.shape[:2], dtype=np.uint8) | |
| cv2.drawContours(mask, [points.astype(int)], 0, 255, 1) # Draw boundary of the rect | |
| # Check if there are any black pixels along the boundary | |
| return np.any(image[mask == 255] < 70) | |
| # Get the center of the box by averaging its four points | |
| center = np.mean(box, axis=0) | |
| scaled_box = np.copy(box) | |
| #start by moving inwards to remove potential noise | |
| for i in range(4): | |
| direction = scaled_box[i] - center # Vector from center to point | |
| scaled_box[i] -= (9 * direction / np.linalg.norm(direction)).astype(int) # Move inward | |
| scale_factor = 0.91 | |
| # Continue scaling the box until the boundary has no black pixels | |
| while has_black_pixels(image, scaled_box) and scale_factor < 1.3: | |
| scale_factor += increment / 100.0 | |
| # Scale each point by moving it further from the center | |
| for i in range(4): | |
| direction = scaled_box[i] - center # Vector from center to point | |
| scaled_box[i] += (increment * direction / np.linalg.norm(direction)).astype(int) # Move outward | |
| return scaled_box | |
| #box = intel_pad(img, box) | |
| rect = cv2.minAreaRect(box) | |
| angle = get_box_angle(box) | |
| angle = adjust_angle(angle, angle_t) | |
| w=int(w_multiplier*max(rect[1]))+1 | |
| h=int(h_multiplier*min(rect[1]))+1 | |
| img_croped = subimage(img, rect[0], angle, w, h) | |
| if w > 50 and h > 30: | |
| img_croped,thresh=clean_h_lines(img_croped) | |
| gray = cv2.cvtColor(img_croped, cv2.COLOR_BGR2GRAY) | |
| _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) | |
| cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns | |
| '''cv2.imshow('boxes', img_croped) | |
| cv2.waitKey(0) | |
| cv2.destroyAllWindows()''' | |
| return img_croped, cnts, angle | |
| def ocr_dimensions(img, detector, recognizer, alphabet_dim, frame, dim_boxes = [], cluster_thres = 20, language = 'eng', max_img_size = 2048, backg_save = False): | |
| #OCR dim_boxes first | |
| dimensions_ = [] | |
| for d in dim_boxes: | |
| x, y = d.x -frame.x, d.y-frame.y | |
| if x + d.w < frame.x + frame.w and y + d.h < frame.y + frame.h: | |
| roi = img[y+2:y + d.h-4, x+2:x + d.w-4] | |
| if d.h > d.w: | |
| roi=cv2.rotate(roi,cv2.ROTATE_90_CLOCKWISE) | |
| p = recognizer.recognize(image = roi) | |
| if any (char.isdigit() for char in p) and len(p) > 1: | |
| box =np.array([[x, y], [x + d.w, y], [x + d.w, y + d.h], [x, y + d.h]]) | |
| dimensions_.append((p, box)) | |
| img[y:y + d.h, x:x + d.w] = 255 | |
| #OCR the rest of the dimensions | |
| pipeline = Pipeline(recognizer=recognizer, detector=detector, alphabet_dimensions=alphabet_dim, cluster_t=cluster_thres, max_size= max_img_size, language=language) | |
| dimensions, other_info, dim_pyt = pipeline.ocr_img_patches(img, 0.05) | |
| dimensions.extend(dimensions_) | |
| # patches background generation for synthetic data training | |
| for dim in dimensions: | |
| box = dim[1] | |
| pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]) | |
| cv2.fillPoly(img, [pts], (255, 255, 255)) | |
| for dim in other_info: | |
| box = dim[1] | |
| pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]) | |
| cv2.fillPoly(img, [pts], (255, 255, 255)) | |
| # Save the image | |
| if backg_save: | |
| backg_path = os.path.join(os.getcwd(), 'edocr2/tools/backgrounds') | |
| os.makedirs(backg_path, exist_ok=True) | |
| i = 0 | |
| for root_dir, cur_dir, files in os.walk(backg_path): | |
| i += len(files) | |
| image_filename = os.path.join(backg_path , f'backg_{i + 1}.png') | |
| cv2.imwrite(image_filename, img) | |
| return dimensions, other_info, img, dim_pyt | |