Spaces:

jeyanthangj2004
/

ocr

Runtime error

File size: 40,269 Bytes

3f42a6f

import cv2, math, os
import numpy as np

def read_alphabet(keras_path):
    txt_path = os.path.splitext(keras_path)[0] + '.txt'
    with open(txt_path, 'r') as file:
        content = file.readline().strip()
    return content

###################### Tables and Others Pipeline #################################
def ocr_img_cv2(image_cv2, language = None, psm = 11):
    """Recognize text in an OpenCV image using pytesseract and return both text and positions.
    
    Args:
        image_cv2: OpenCV image object.
        
    Returns:
        A list of dictionaries containing recognized text and their positions (left, top, width, height).
    """
    import pytesseract
    # Convert the OpenCV image to RGB format (pytesseract expects this)
    img_rgb = cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
    
    # Custom configuration to recognize a more complete set of characters
    if language:
        custom_config = f'--psm {psm} -l {language}'
    else:
        custom_config = f'--psm {psm}'

    # Perform OCR and get bounding box details
    ocr_data = pytesseract.image_to_data(img_rgb, config=custom_config, output_type=pytesseract.Output.DICT)

    # Prepare result: text with their positions
    result = []
    all_text = ''
    for i in range(len(ocr_data['text'])):
        if ocr_data['text'][i].strip():  # If text is not empty
            text_info = {
                'text': ocr_data['text'][i],
                'left': ocr_data['left'][i],
                'top': ocr_data['top'][i],
                'width': ocr_data['width'][i],
                'height': ocr_data['height'][i]
            }
            all_text += ocr_data['text'][i]
            result.append(text_info)
    
    return result, all_text

def ocr_tables(tables, process_img, language = None):
    results = []
    updated_tables = []

    tables = sorted(tables, key=lambda cluster_dict: next(iter(cluster_dict)).y * 10000 + next(iter(cluster_dict)).x, reverse=True)

    for table in tables:
        for b in table:
            img = process_img[b.y : b.y + b.h, b.x : b.x + b.w][:]
            result, all_text = ocr_img_cv2(img, language)
            if result == [] or len(all_text) < 5:
                continue
            else:
                for r in result:
                    r['left'] += b.x
                    r['top'] += b.y
                results.append(result)
                updated_tables.append(table)
    for table in updated_tables:
        for b in table:
            process_img[b.y : b.y + b.h, b.x : b.x + b.w][:] = 255
    
    return results, updated_tables, process_img



##################### GDT Pipeline #####################################

def img_not_empty(roi, color_thres = 100):
    # Convert the ROI to grayscale
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    # Check if all pixels are near black or near white
    min_val, max_val, _, _ = cv2.minMaxLoc(gray_roi)
    
    # If the difference between min and max pixel values is greater than the threshold, the box contains color
    if (max_val - min_val) < color_thres:
        return False
        
    return True

def is_not_empty(img, boxes, color_thres):
    for box in boxes:
            # Extract the region of interest (ROI) from the image
        roi = img[box.y + 2:box.y + box.h - 4, box.x + 2:box.x + box.w -4]
        
        if img_not_empty(roi, color_thres) == False:
            return False
             
    return True

def sort_gdt_boxes(boxes, y_thres = 3):
    """Sorts boxes in reading order: left-to-right, then top-to-bottom.
    
    Args:
        boxes: List of Rect objects or any object with x, y, w, h attributes.
        y_threshold: A threshold to group boxes that are on the same line (default is 10 pixels).
    
    Returns:
        A list of boxes sorted in reading order.
    """
    # Sort by the y-coordinate first (top-to-bottom)
    boxes.sort(key=lambda b: b.y)

    sorted_boxes = []
    current_line = []
    current_y = boxes[0].y

    for box in boxes:
        # If the box's y-coordinate is close to the current line's y-coordinate, add it to the same line
        if abs(box.y - current_y) <= y_thres:
            current_line.append(box)
        else:
            # Sort the current line by x-coordinate (left-to-right)
            current_line.sort(key=lambda b: b.x)
            sorted_boxes.extend(current_line)
            
            # Start a new line with the current box
            current_line = [box]
            current_y = box.y
    
    # Sort the last line and add it
    current_line.sort(key=lambda b: b.x)
    sorted_boxes.extend(current_line)
    
    return sorted_boxes

def recognize_gdt(img, block, recognizer):
    roi = img[block[0].y + 2:block[0].y + block[0].h - 4, block[0].x + 2:block[0].x + block[0].w - 4]
    pred = recognizer.recognize(image = roi)
    #cv2.imwrite(f"{0}.png", roi)

    for i in range(1, len(block)):
        new_line = block[i].y - block[i - 1].y > 5
        roi = img[block[i].y:block[i].y + block[i].h, block[i].x:block[i].x + block[i].w]
        p = recognizer.recognize(image = roi)
        #cv2.imwrite(f"{i}.png", roi)
        if new_line:
            pred += '\n' + p
        else:
            pred += '|' + p
    if any(char.isdigit() for char in pred):
        return pred
    else:
        return None

def ocr_gdt(img, gdt_boxes, recognizer):

    updated_gdts = []
    results = []
    if gdt_boxes:
        for block in gdt_boxes:
            for _, bl_list in block.items():
                if is_not_empty(img, bl_list, 50):
                    sorted_block = sort_gdt_boxes(bl_list, 3)
                    pred = recognize_gdt(img, sorted_block, recognizer)
                    if pred:
                        updated_gdts.append(block)
                        results.append([pred, (sorted_block[0].x, sorted_block[0].y)])
    for gdt in updated_gdts:
        for g in gdt.values():
            for b in g:
                img[b.y - 5 : b.y + b.h + 10, b.x - 5 : b.x + b.w + 10][:] = 255
    return results, updated_gdts, img

##################### Dimension Pipeline ###############################

class Pipeline:
    """A wrapper for a combination of detector and recognizer.
    Args:
        detector: The detector to use
        recognizer: The recognizer to use
        scale: The scale factor to apply to input images
        max_size: The maximum single-side dimension of images for
            inference.
    """
    def __init__(self, detector, recognizer, alphabet_dimensions, cluster_t = 20, scale = 2, matching_t = 0.6, max_size = 1024, language = 'eng'):
        self.scale = scale
        self.detector = detector
        self.recognizer = recognizer
        self.max_size = max_size
        self.language = language
        self.alphabet_dimensions = alphabet_dimensions
        self.cluster_t = cluster_t
        self.matching_t = matching_t

    def symbol_search(self, img, dimensions, folder_code = 'u2300', char = '⌀'):
        def template_matching(img_, cnts, folder_path, thres, angle, xy2, rotate):
            angle = math.radians(angle)
            box_points = None
            for cnt in cnts:
                x, y, w, h = cv2.boundingRect(cnt)
                if h > img_.shape[0]*0.3:
                    img_2 = img_[y:y + h, x:x + w]
                    y_pad, x_pad = int(img_2.shape[0]*0.3),  40
                    pad_img = cv2.copyMakeBorder(img_2, y_pad, y_pad, x_pad, x_pad, cv2.BORDER_CONSTANT, value=[255,255,255])
                    #cv2.imshow('pads', pad_img)
                    for file in os.listdir(folder_path):
                        symb = cv2.imread(os.path.join(folder_path, file))
                        if rotate:
                            cv2.rotate(symb,cv2.ROTATE_90_COUNTERCLOCKWISE)
                        gray = cv2.cvtColor(symb, cv2.COLOR_BGR2GRAY)
                        _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
                        contours_smb, _ = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                        x_, y_, w_, h_ = cv2.boundingRect(contours_smb[0])
                        symb_img = symb[y_:y_ + h_, x_:x_ + w_]
                        
                        # Calculate scale factor to resize the symbol to the target height
                        scale_factor = h / h_
                        if scale_factor < 2:
                            scaled_symb = cv2.resize(symb_img, (0, 0), fx=scale_factor, fy=scale_factor)

                            
                            # Perform template matching
                            result = cv2.matchTemplate(pad_img, scaled_symb, cv2.TM_CCOEFF_NORMED)
                            _, max_val, _, _ = cv2.minMaxLoc(result)
                            if max_val >= thres:
                                local = [
                                    (x, y),                        # top-left
                                    (x + w, y),             # top-right
                                    (x + w, y + h),   # bottom-right
                                    (x, y + h )             # bottom-left
                                ]
                                box_points = [
                                    (xy2[0] + math.cos(angle)*local[0][0] - math.sin(angle)*local[0][1] , xy2[1] + math.cos(angle)*local[0][1] + math.sin(angle)*local[0][0]),                        # top-left
                                    (xy2[0] + math.cos(angle)*local[1][0] - math.sin(angle)*local[1][1] , xy2[1] + math.cos(angle)*local[1][1] + math.sin(angle)*local[1][0]),             # top-right
                                    (xy2[0] + math.cos(angle)*local[2][0] - math.sin(angle)*local[2][1] , xy2[1] + math.cos(angle)*local[2][1] + math.sin(angle)*local[2][0]),   # bottom-right
                                    (xy2[0] + math.cos(angle)*local[3][0] - math.sin(angle)*local[3][1] , xy2[1] + math.cos(angle)*local[3][1] + math.sin(angle)*local[3][0])             # bottom-left
                                ]
                                #cv2.imshow('symb', scaled_symb)
                                #cv2.circle(mask_img, (int(xy2[0]), int(xy2[1])), radius=1, color=(255, 0, 0), thickness=-1)
                                thres = max_val 
                        #cv2.waitKey(0)
                        #cv2.destroyAllWindows()   
            return box_points

        from shapely.geometry import Polygon
        from shapely.ops import unary_union
        mask_img = img.copy()
        old_dim, new_dimensions, boxes = [], [], []
        folder_path = os.path.join('edocr2/tools/symbol_match', folder_code)
        for dim in dimensions:
            #filter out dim wit diameter symbol:
            if char in dim[0]:
                continue
            else:
                rect = cv2.minAreaRect(np.array(dim[1], dtype=np.float32))
                
                if len(dim[0]) == 1:
                    #Expansion on the short side
                    w_multiplier, h_multiplier = 1.3, max([2*min(rect[1]), 300])/min(rect[1])
                    img_, cnts, angle = postprocess_detection(img, dim[1], w_multiplier, h_multiplier, 5)
                    scaled_rect = (rect[0], (img_.shape[0], img_.shape[1]), angle-90)
                    rotate = True
                else:
                    #Expansion on the long side
                    w_multiplier, h_multiplier = max([2*max(rect[1]), 300])/ max(rect[1]), 1.3
                    img_, cnts, angle = postprocess_detection(img, dim[1], w_multiplier, h_multiplier, 5)
                    scaled_rect = (rect[0], (img_.shape[0], img_.shape[1]), angle-90)
                    rotate = False
                '''cv2.imshow('matches', img_)
                cv2.waitKey(0)
                cv2.destroyAllWindows()'''
                
                polygon_ = cv2.boxPoints(scaled_rect)
                xy2 = (rect[0][0] - scaled_rect[1][1]/2*math.cos(math.radians(angle)) + scaled_rect[1][0]/2*math.sin(math.radians(angle)),
                       rect[0][1] - scaled_rect[1][1]/2*math.sin(math.radians(angle)) - scaled_rect[1][0]/2*math.cos(math.radians(angle)))
                box= list(polygon_)
                '''pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64)
                mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 0, 255), thickness=2)'''
                
                box = template_matching(img_, cnts, folder_path, self.matching_t, angle, xy2, rotate)
                
                if box:
                    pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.int64)
                    mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)
                    poly2 = Polygon(box)
                    poly1 = Polygon(cv2.boxPoints(rect))
                    merged_poly = unary_union([poly1, poly2])
                    final_box = merged_poly.minimum_rotated_rectangle.exterior.coords[0:4]
                    #new_dim, _, _ = self.recognize_dimensions(np.int32([final_box]), np.array(img))
                    boxes.append(final_box)
                    old_dim.append(dim)
                    '''cv2.imshow('matches', mask_img)
                    cv2.waitKey(0)
                    cv2.destroyAllWindows() '''
        for o in old_dim:
            dimensions.remove(o)
        
        boxes = group_polygons_by_proximity(boxes, eps = self.cluster_t)
        new_group = [box for box in boxes]
        new_dimensions, _, _ = self.recognize_dimensions(np.int32(new_group), np.array(img))
        
        for nd in new_dimensions:
            if char in nd[0]:
                dimensions.append(nd)
            elif nd[0][0] in set('0,).D:Z°Bx'):
                dimensions.append((char + nd[0][1:], nd[1]))
            else:
                dimensions.append((char + nd[0], nd[1]))
        '''if new_dimensions:
            cv2.imshow('matches', mask_img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()'''
        return dimensions
            
    def detect(self, img, detection_kwargs = None):
        """Run the pipeline on one or multiples images.
        Args:
            images: The images to parse (numpy array)
            detection_kwargs: Arguments to pass to the detector call
            recognition_kwargs: Arguments to pass to the recognizer call
        Returns:
            A list of lists of (text, box) tuples.
        """ 
        from edocr2.keras_ocr.tools import adjust_boxes

        if np.max((img.shape[0], img.shape[1])) < self.max_size / self.scale:
            scale = self.scale
        else:
            scale = self.max_size / np.max((img.shape[0], img.shape[1]))

        if detection_kwargs is None:
            detection_kwargs = {}
        
        new_size = (int(img.shape[1]* scale), int(img.shape[0]* scale))
        img = cv2.resize(img, new_size, interpolation=cv2.INTER_LINEAR)

        box_groups = self.detector.detect(images=[img], **detection_kwargs)
        box_groups = [
            adjust_boxes(boxes=boxes, boxes_format="boxes", scale=1 / scale)
            if scale != 1
            else boxes
            for boxes, scale in zip(box_groups, [scale])
        ]
        return box_groups
    
    def ocr_the_rest(self, img, lang):

        def sort_boxes_by_centers(boxes, y_threshold=20):
            # Sort primarily by the y_center (top-to-bottom), and secondarily by x_center (left-to-right)
            sorted_boxes = sorted(boxes, key=lambda box: (box['top'], box['left']))  # Sort by (y_center, x_center)
            final_sorted_text = ""

            current_line = []
            current_y = sorted_boxes[0]['top']  # y_center of the first box

            for box in sorted_boxes:
                if abs(box['top'] - current_y) <= y_threshold:  # If y_center is within threshold, same line
                    current_line.append(box)
                else:
                    # Sort the current line by x_center (left-to-right)
                    current_line = sorted(current_line, key=lambda b: b['left'])  # Sort by x_center
                    line_text = ' '.join([b['text'] for b in current_line])  # Join text in current line
                    final_sorted_text += line_text + '\n'  # Add the text for the line and a newline
                    
                    current_line = [box]  # Start a new line
                    current_y = box['top']

            # Sort the last line and add to final result
            current_line = sorted(current_line, key=lambda b: b['left'])
            line_text = ' '.join([b['text'] for b in current_line])
            final_sorted_text += line_text  # No newline for the last line

            return final_sorted_text
    
        results, _ = ocr_img_cv2(img, lang)
        if results:
            text = sort_boxes_by_centers(results)
            return text
        return ''

    def dimension_criteria(self, img):
        pred_nor = self.ocr_the_rest(img, 'nor') #Norwegian include a char for the o-slash (Ø and ø) Convinient for the diameter recognition ⌀
        pred_eng = self.ocr_the_rest(img, 'eng') #However, its performance is worse than english, can't trust it
        allowed_exceptions_nor = set('''-.»Ø,/!«Æ()Å:'"[];|“?Ö=*Ä”&É<>+$£%—€øåæöéIZNOoPXiLlk \n''')
        allowed_exceptions_eng = set('''?—!@#~;¢«#_%\&€$»[é]®§¥©‘™="~'£<*“”I|ZNOXiLlk \n''')
        ok_nor = all(char in set(self.alphabet_dimensions) or char in allowed_exceptions_nor for char in pred_nor)
        ok_eng = all(char in set(self.alphabet_dimensions) or char in allowed_exceptions_eng for char in pred_eng)
        if ok_nor or ok_eng or len(pred_eng) < 2 or len(pred_nor) < 2:
            return True #In any case, any prediction can yet be fully trusted, the edocr recognizer should perform better, if the chars are present
        return False
                    
    def recognize_dimensions(self, box_groups, img):
        predictions=[]
        predictions_pyt=[]
        other_info=[]

        def adjust_padding(img):
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)
            cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns
            if cnts:
                x, y, w, h = cv2.boundingRect(np.concatenate(cnts))
                # Crop the image using the bounding box
                img = img[y:y+h, x:x+w]
                img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=[255,255,255])
            return img
        
        def adjust_stroke(img):
            

            # Create an empty image to store the final result
            img_ = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(img_, 200, 255, cv2.THRESH_BINARY_INV)
            contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns
            final_img = np.full_like(img_, 255)
            
            stroke_averages = []
            subimages =[]

            for contour in contours:

                # Get the bounding rectangle for the current contour
                x, y, w, h = cv2.boundingRect(contour)

                # Create a subimage using the bounding rectangle
                subimage = np.full_like(img_, 255)
                subimage[y:y+h, x:x+w] = img_[y:y+h, x:x+w]
                subimages.append(subimage)
                counts =[]

                # Accumulate all run lengths
                for i in range(y, y + h):
                    row = subimage[i, :]
                    classified = row < 180
                    current_length = 0

                    for val in classified:
                        if val:  # If True, increase current segment length
                            current_length += 1
                        else:
                            if current_length > 0:  # When False, store the segment length if it exists
                                counts.extend([current_length])
                                current_length = 0

                    # Append the last segment if it ends with a True value
                    if current_length > 0:
                        counts.extend([current_length])             

                outliers = find_outliers(counts, 1.5)
                filtered_counts = [c for c in counts if c not in outliers]
                
                avg_stroke = np.mean(filtered_counts)
                stroke_averages.append(avg_stroke)

            outliers = find_outliers(stroke_averages, 3)
            if len(outliers) > 0 or any(st < 2.5 for st in stroke_averages):
                for i in range(len(contours)):
                    processed_subimage = subimages[i]
                    # Apply dilation or erosion based on the average stroke
                    if len(outliers) > 0 and len(stroke_averages) < 2:
                        if stroke_averages[i] < np.min(outliers) or stroke_averages[i] < 2.5:
                            # Dilation
                            kernel = np.ones((3, 3), np.uint8)
                            processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1)                        

                    elif len(stroke_averages) == 2:
                        if np.max(stroke_averages) - stroke_averages[i] > 1.5 or stroke_averages[i] < 2.5:
                            # Dilation
                            kernel = np.ones((3, 3), np.uint8)
                            processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1)
                            
                    else:
                        if stroke_averages[i] < 2.5:
                            # Dilation
                            kernel = np.ones((3, 3), np.uint8)
                            processed_subimage = cv2.erode(processed_subimage, kernel, iterations=1)

                    _, thresh = cv2.threshold(processed_subimage, 200, 255, cv2.THRESH_BINARY_INV)
                    cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns
                    x, y, w, h = cv2.boundingRect(cnts[0])
                    final_img[y:y+h, x:x+w] = processed_subimage[y:y+h, x:x+w]
                return cv2.cvtColor(final_img, cv2.COLOR_GRAY2BGR)
            
            return img

        def pad_image(img, pad_percent):
            y_pad, x_pad = int(img.shape[0]*pad_percent),  int(img.shape[1]*pad_percent)
            pad_img = cv2.copyMakeBorder(img, y_pad, y_pad, x_pad, x_pad, cv2.BORDER_CONSTANT, value=[255,255,255])
            return pad_img
        
        for box in box_groups:
            img_croped, cnts, _ = postprocess_detection(img, box)
            
            if len(cnts)==1:
                #pred=self.recognizer.recognize(image=cv2.rotate(img_croped,cv2.ROTATE_90_COUNTERCLOCKWISE))
                img_croped=cv2.rotate(img_croped,cv2.ROTATE_90_COUNTERCLOCKWISE)
                pred = self.recognizer.recognize(image=img_croped)
                if pred.isdigit():
                    predictions.append((pred, box))
            else:
                pytess_img = pad_image(img_croped, 0.3) 
                if self.dimension_criteria(pytess_img):
                    arr=check_tolerances(img_croped)
                    pred=''
                    for img_ in arr:
                        img_ = adjust_padding(img_)
                        if img_.shape[0] *img_.shape[1] > 1200:
                            img_ = adjust_stroke(img_)
                        '''cv2.imshow('pred', img_)
                        cv2.waitKey(0)
                        cv2.destroyAllWindows()'''
                        pred_ = self.recognizer.recognize(image=img_) + ' '
                        if pred_==' ':
                            pred=self.recognizer.recognize(image=img_croped)+' '
                            break
                        else:
                            pred += pred_
                        
                    if any(char.isdigit() for char in pred):
                        predictions.append((pred[:-1], box))
                    else:
                        pred_pyt = self.ocr_the_rest(pytess_img, self.language)
                        other_info.append((pred_pyt, box))
                else:
                    pred_pyt = self.ocr_the_rest(pytess_img, self.language)
                    other_info.append((pred_pyt, box))
        return predictions, other_info, predictions_pyt

    def ocr_img_patches(self, img, ol = 0.05):

        '''
        This functions split the original images into patches and send it to the text detector. 
        Groupes the predictions and recognize the text.
        Input: img
        patches : number of patches in both axis
        ol: overlap between patches
        cluster_t: threshold for grouping
        '''
        patches = (int(img.shape[1] / self.max_size + 2), int(img.shape[0] / self.max_size + 2))
        a_x = int((1 - ol) / (patches[0]) * img.shape[1]) # % of img covered in a patch (horizontal stride)
        b_x = a_x + int(ol* img.shape[1]) # Size of horizontal patch in % of img
        a_y = int((1 - ol) / (patches[1]) * img.shape[0]) # % of img covered in a patch (vertical stride)
        b_y = a_y + int(ol * img.shape[0]) # Size of horizontal patch in % of img
        box_groups = []
        for i in range(0, patches[0]):
            for j in range(0, patches[1]):
                offset = (a_x * i, a_y * j)
                patch_boundary = (i * a_x + b_x, j * a_y + b_y)
                img_patch = img[offset[1] : patch_boundary[1], 
                                offset[0] : patch_boundary[0]]
                if img_not_empty(img_patch, 100):
                    box_group=self.detect(img_patch)
                    for b in box_group:
                        for xy in b:
                            xy = xy + offset
                            box_groups.append(xy)
        '''mask_img = img.copy()
        for box in box_groups:
            pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.dtype('int32'))
            mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(0, 127, 255), thickness=2)'''
        box_groups = group_polygons_by_proximity(box_groups, eps = self.cluster_t)
        box_groups = group_polygons_by_proximity(box_groups, eps = self.cluster_t-5) #To double check if still overlapping
        print('Detection finished. Starting Recognition...')
        new_group = [box for box in box_groups]
        '''for box in box_groups:
            pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])]).astype(np.dtype('int32'))
            mask_img = cv2.polylines(mask_img, [pts], isClosed=True, color=(255, 127, 0), thickness=2)
        cv2.imwrite('detect.png', mask_img)'''
        dimensions, other_info, dimensions_pyt = self.recognize_dimensions(np.int32(new_group), np.array(img))
        print('Recognition finished. Performing template matching...')
        dimensions = self.symbol_search(img, dimensions)
        return dimensions, other_info, dimensions_pyt

def group_polygons_by_proximity(polygons, eps=20):
        from shapely.geometry import Polygon, MultiPolygon
        from shapely.ops import unary_union

        def polygon_intersects_or_close(p1, p2, eps):
            
            """
            Check if two polygons either intersect or are within the distance threshold `eps`.
            """
            # Create Polygon objects from the arrays
            poly1 = Polygon(p1)
            poly2 = Polygon(p2)
            
            # Check if the polygons intersect
            if poly1.intersects(poly2):
                return True
            
            # If not, check the minimum distance between their boundaries
            return poly1.distance(poly2) <= eps

        n = len(polygons)
        parent = list(range(n))  # Union-find structure to track connected components
        
        def find(x):
            if parent[x] != x:
                parent[x] = find(parent[x])
            return parent[x]
        
        def union(x, y):
            rootX = find(x)
            rootY = find(y)
            if rootX != rootY:
                parent[rootX] = rootY
        
        # Compare all polygon pairs
        for i in range(n):
            for j in range(i + 1, n):
                if polygon_intersects_or_close(polygons[i], polygons[j], eps):
                    union(i, j)
        
        # Group polygons by connected components and merge them
        grouped_polygons = {}
        for i in range(n):
            root = find(i)
            if root not in grouped_polygons:
                grouped_polygons[root] = []
            grouped_polygons[root].append(polygons[i])
        
        # Now merge the polygons in each group
        merged_polygons = []
        for group in grouped_polygons.values():
            # Collect all points from the polygons in this group
            all_points = []
            for polygon in group:
                all_points.extend(polygon)
            
            # Use Shapely to create a merged polygon
            merged_polygon = unary_union([Polygon(p) for p in group])
            
            # Convert to coordinates for OpenCV to find the min-area bounding box
            if isinstance(merged_polygon, MultiPolygon):
                merged_polygon = unary_union(merged_polygon)
            if merged_polygon.is_empty:
                continue

            # Find the minimum rotated bounding box for the merged polygon
            min_rotated_box = merged_polygon.minimum_rotated_rectangle.exterior.coords[0:4]
            
            # Add the resulting rotated box to the list
            merged_polygons.append(min_rotated_box)
        
        return merged_polygons

def check_tolerances(img):
    img_arr = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Convert img to grayscale
    flag=False
    tole = False
    ## Find top and bottom line
    for i in range(0, img_arr.shape[0] - 1): # find top line
        for j in range(0,img_arr.shape[1] - 1):
            if img_arr[i, j] < 200:
                top_line = i
                flag = True
                break
        if flag == True:
            flag = False
            break
    for i in range(img_arr.shape[0] - 1, top_line, -1): # find bottom line
        for j in range(0, img_arr.shape[1] - 1):
            if img_arr[i, j] < 200:
                bot_line = i
                flag = True
                break
        if flag == True:
            break        
    ##Measure distance from right end backwards until it finds a black pixel from top line to bottom line
    stop_at = []
    for i in range(top_line, bot_line):
        for j in range(img_arr.shape[1] -1, 0, -1):
            if img_arr[i,j] < 200:
                stop_at.append(img_arr.shape[1] - j)
                break
        else:
            stop_at.append(img_arr.shape[1])
    ##Is there a normalized distance (l) relatively big with respect the others?
    for d in stop_at[int(0.3 * len(stop_at)): int(0.7 * len(stop_at))]:
        if d > img_arr.shape[0] * 0.8:
            tole = True
            tole_h_cut = stop_at.index(d) + top_line + 1
            break
        else:
            tole = False

    #If yes -> Find last character from the measurement (no tolerance)
    if tole == True:
        if d < img_arr.shape[1]: #handle error
            tole_v_cut = None
            for j in range(img_arr.shape[1] - d, img_arr.shape[1]):
                    if np.all(img_arr[int(0.3 * img_arr.shape[0]): int(0.7 * img_arr.shape[0]), j] > 200):
                        tole_v_cut=j+2
                        break
            #-> crop images
            if tole_v_cut: #handle error
                try:
                    measu_box = img_arr[:, :tole_v_cut]
                    up_tole_box = img_arr[:tole_h_cut, tole_v_cut:]
                    bot_tole_box = img_arr[tole_h_cut:, tole_v_cut:]
                    return [cv2.cvtColor(measu_box, cv2.COLOR_GRAY2BGR), cv2.cvtColor(up_tole_box, cv2.COLOR_GRAY2BGR), cv2.cvtColor(bot_tole_box, cv2.COLOR_GRAY2BGR)]
                except:
                    return [img]  
        else:
            up_text=img_arr[:tole_h_cut, :]
            bot_text=img_arr[tole_h_cut:, :]
            return [cv2.cvtColor(up_text, cv2.COLOR_GRAY2BGR), cv2.cvtColor(bot_text, cv2.COLOR_GRAY2BGR)] 
    return [img]

def find_outliers(counts, t):
                # Use peak filtering on black_pixel_counts
                counts = np.array(counts)
                
                # Filter the peaks based on 70% of the maximum value
                mean = np.mean(counts)
                std = np.std(counts)

                # Calculate Z-scores
                z_scores = (counts - mean) / std

                # Identify outliers
                return counts[np.abs(z_scores) > t]

def postprocess_detection(img, box, w_multiplier = 1.0, h_multiplier = 1.0, angle_t = 5):
    def get_box_angle(box):
        exp_box = np.vstack((box[3], box, box[0]))
        i = np.argmax(box[:, 1])
        B = box[i]
        A = exp_box[i]
        C = exp_box[i + 2]
        AB_ = math.sqrt((A[0] - B[0]) ** 2 + (A[1] - B[1]) ** 2)
        BC_ = math.sqrt((C[0] - B[0]) ** 2+(C[1] - B[1])** 2)
        m = np.array([(A, AB_), (C, BC_)], dtype = object)
        j = np.argmax(m[:, 1])
        O = m[j, 0]
        if B[0] == O[0]:
            alfa = math.pi / 2
        else:
            alfa = math.atan((O[1] - B[1]) / (O[0] - B[0]))
        if alfa == 0:
            return alfa / math.pi * 180
        elif B[0] < O[0]:
            return - alfa / math.pi * 180
        else:
            return (math.pi - alfa) / math.pi * 180
        
    def adjust_angle(alfa, i = 5):
        if -i < alfa < 90 - i:
            return - round(alfa / i)*i
        elif 90 - i < alfa < 90 + i:
            return round(alfa / i) * i - 180
        elif 90 + i < alfa < 180 + i:
            return 180 - round(alfa / i) * i
        else:
            return alfa

    def subimage(image, center, theta, width, height):
        ''' 
        Rotates OpenCV image around center with angle theta (in deg)
        then crops the image according to width and height.
        '''
        padded_image =cv2.copyMakeBorder(image, 300, 300, 300, 300, cv2.BORDER_CONSTANT, value=(255, 255, 255))
        shape = (padded_image.shape[1], padded_image.shape[0])  # cv2.warpAffine expects shape in (length, height)
        padded_center = (center[0] + 300, center[1] + 300)
        matrix = cv2.getRotationMatrix2D(center=padded_center, angle=theta, scale=1)
        image = cv2.warpAffine(src=padded_image, M=matrix, dsize=shape)
        x, y = (int( padded_center[0] - width/2 ),int( padded_center[1] - height/2 ))
        x2, y2 = x + width, y + height

        if x < 0: x = 0
        if x2 > shape[0]: x2 = shape[0]
        if y < 0: y= 0
        if y2 > shape[1]: y2 = shape[1]

        image = image[ y:y2, x:x2 ]
        
        return image

    def clean_h_lines(img_croped):
        gray = cv2.cvtColor(img_croped, cv2.COLOR_BGR2GRAY) #Convert img to grayscale
        _,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY_INV) #Threshold to binary image
        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(img_croped.shape[1]*0.8),1))
        detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
        cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        for c in cnts:
            img_croped = cv2.drawContours(img_croped, [c], -1, (255,255,255), 3)
        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,int(img_croped.shape[1]*0.9)))
        detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
        cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        for c in cnts:
            img_croped = cv2.drawContours(img_croped, [c], -1, (255,255,255), 3)
        return img_croped, thresh
       
    def intel_pad(image, box, increment=3):

        def has_black_pixels(image, points):
            mask = np.zeros(image.shape[:2], dtype=np.uint8)
            cv2.drawContours(mask, [points.astype(int)], 0, 255, 1)  # Draw boundary of the rect
            # Check if there are any black pixels along the boundary
            return np.any(image[mask == 255] < 70)

        # Get the center of the box by averaging its four points
        center = np.mean(box, axis=0)
        
        scaled_box = np.copy(box)
        #start by moving inwards to remove potential noise
        for i in range(4):
            direction = scaled_box[i] - center  # Vector from center to point
            scaled_box[i] -= (9 * direction / np.linalg.norm(direction)).astype(int)  # Move inward
        scale_factor = 0.91
        # Continue scaling the box until the boundary has no black pixels
        while has_black_pixels(image, scaled_box) and scale_factor < 1.3:
            scale_factor += increment / 100.0
            # Scale each point by moving it further from the center
            for i in range(4):
                direction = scaled_box[i] - center  # Vector from center to point
                scaled_box[i] += (increment * direction / np.linalg.norm(direction)).astype(int)  # Move outward

        return scaled_box
        
    #box = intel_pad(img, box)
    rect = cv2.minAreaRect(box)
    angle = get_box_angle(box)
    angle = adjust_angle(angle, angle_t)
    w=int(w_multiplier*max(rect[1]))+1
    h=int(h_multiplier*min(rect[1]))+1
    img_croped = subimage(img, rect[0], angle, w, h)
    if w > 50 and h > 30:
        img_croped,thresh=clean_h_lines(img_croped)
    gray = cv2.cvtColor(img_croped, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
    cnts = cv2.findContours(thresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] #Get contourns
    '''cv2.imshow('boxes', img_croped)
    cv2.waitKey(0)
    cv2.destroyAllWindows()'''
    return img_croped, cnts, angle

def ocr_dimensions(img, detector, recognizer, alphabet_dim, frame, dim_boxes = [], cluster_thres = 20, language = 'eng', max_img_size = 2048, backg_save = False):
    #OCR dim_boxes first
    dimensions_ = []
    for d in dim_boxes:
        x, y = d.x -frame.x, d.y-frame.y
        if x + d.w < frame.x + frame.w and y + d.h < frame.y + frame.h:
            roi = img[y+2:y + d.h-4, x+2:x + d.w-4]
            if d.h > d.w:
                roi=cv2.rotate(roi,cv2.ROTATE_90_CLOCKWISE)
            p = recognizer.recognize(image = roi)
            if any (char.isdigit() for char in p) and len(p) > 1:
                box =np.array([[x, y], [x + d.w, y], [x + d.w, y + d.h], [x, y + d.h]]) 
                dimensions_.append((p, box))
                img[y:y + d.h, x:x + d.w] = 255

    #OCR the rest of the dimensions
    pipeline = Pipeline(recognizer=recognizer, detector=detector, alphabet_dimensions=alphabet_dim, cluster_t=cluster_thres, max_size= max_img_size, language=language)
    dimensions, other_info, dim_pyt = pipeline.ocr_img_patches(img, 0.05)
    dimensions.extend(dimensions_)
    # patches background generation for synthetic data training
    for dim in dimensions:
        box = dim[1]
        pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])])
        cv2.fillPoly(img, [pts], (255, 255, 255))
    
    for dim in other_info:
        box = dim[1]
        pts=np.array([(box[0]),(box[1]),(box[2]),(box[3])])
        cv2.fillPoly(img, [pts], (255, 255, 255))
    
    
    # Save the image
    if backg_save:
        
        backg_path = os.path.join(os.getcwd(), 'edocr2/tools/backgrounds')
        os.makedirs(backg_path, exist_ok=True)
        i = 0
        for root_dir, cur_dir, files in os.walk(backg_path):
            i += len(files)
        image_filename = os.path.join(backg_path , f'backg_{i + 1}.png')
        cv2.imwrite(image_filename, img)
        
    return dimensions, other_info, img, dim_pyt