Spaces:

jeyanthangj2004
/

ocr

Runtime error

ocr

File size: 28,760 Bytes

3f42a6f

import random, time, os, math, cv2
import numpy as np
from collections import Counter
import typing

############ Synthetic Generation ###############################################
def get_and_process_fonts(dir_target):

    def move_file_to_directory(file_path, target_directory):
        """
        Move a file to a new directory.
        
        :param file_path: The path to the file that will be moved.
        :param target_directory: The directory where the file will be moved.
        """
        try:
            # Ensure the target directory exists
            if not os.path.exists(target_directory):
                os.makedirs(target_directory)

            # Move the file
            shutil.move(file_path, target_directory)
            print(f"Moved: {file_path} -> {target_directory}")

        except Exception as e:
            print(f"Error moving {file_path} to {target_directory}: {e}")
    
    #Download files from keras_ocr:
    from edocr2.keras_ocr.tools import download_and_verify
    import glob, zipfile, shutil
    fonts_zip_path = download_and_verify(
        url="https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/fonts.zip",
        sha256="d4d90c27a9bc4bf8fff1d2c0a00cfb174c7d5d10f60ed29d5f149ef04d45b700",
        filename="fonts.zip",
        cache_dir='.',
    )
    fonts_dir = os.path.join('.', "fonts")
    if len(glob.glob(os.path.join(fonts_dir, "**/*.ttf"))) != 2746:
        print("Unzipping fonts ZIP file.")
        with zipfile.ZipFile(fonts_zip_path) as zfile:
            zfile.extractall(fonts_dir)

    for root, dirs, _ in os.walk('fonts'):
        for dir in dirs:
            for _, _, files2 in os.walk(os.path.join(root, dir)):
                for file in files2:
                    if file.endswith("Regular.ttf"):
                        font_path = os.path.join(root, dir, file)
                        move_file_to_directory(font_path, dir_target)
    shutil.rmtree('fonts')

def check_fonts(folder_path = 'edocr2/tools/dimension_fonts/', characters = '(),.+-±:/°"⌀'):
    from PIL import Image, ImageDraw, ImageFont
    def draw_character_cv2(char, font_path, font_size, img_width, img_height):
        # Create a blank image using PIL (RGBA mode to handle transparency)
        pil_image = Image.new('RGBA', (img_width, img_height), (255, 255, 255, 0))
        draw = ImageDraw.Draw(pil_image)

        # Load the TTF font
        font = ImageFont.truetype(font_path, font_size)

        # Get the size of the text to center it in the image
        bbox = font.getbbox(char)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        
        # Calculate the position to center the character
        position = ((img_width - text_width) // 2, (img_height - text_height) // 2)

        # Draw the character onto the PIL image
        draw.text(position, char, font=font, fill=(0, 0, 0, 255))

        # Convert the PIL image to a format OpenCV can work with (BGR mode)
        cv_image = np.array(pil_image)
        cv_image = cv2.cvtColor(cv_image, cv2.COLOR_RGBA2BGRA)  # Preserve transparency

        return cv_image

    files = os.listdir(folder_path)
    for i in files:
        font_path = os.path.join(folder_path, i)
        img = draw_character_cv2(characters, font_path, 50, 400, 400)

        # Display the result with OpenCV
        cv2.imshow('Character', img)
        key = cv2.waitKey(0)

        if key == ord('1'):
            os.remove(font_path)
            print(f"File {i} has been removed.")
        elif key == ord('0'):
            print(f"File {i} was not removed.")

        cv2.destroyAllWindows()

def get_balanced_text_generator(alphabet, string_length=(5, 10), lowercase=False, bias_chars = '', bias_factor = 0.3):
    '''
    Generates batches of sentences ensuring perfectly balanced symbol distribution.
    Args:
        alphabet: string of characters
        batch_size: number of sentences per batch
        string_length: tuple defining range of sentence length
        lowercase: convert alphabet to lowercase
    Return:
        list of sentence strings
    '''
    # Initialize a counter to track the number of times each character is used
    symbol_counter = Counter({char: 0 for char in alphabet})
    
    while True:
        # Calculate the total number of generated symbols
        total_generated = sum(symbol_counter.values())

        # Adjust probabilities to balance the frequency of each symbol
        weights = {}
        for char in alphabet:
            # Apply the bias factor for specified characters
            weight = total_generated - symbol_counter[char] + 1
            if char in bias_chars:
                weight += bias_factor
            weights[char] = weight
        total_weight = sum(weights.values())
        probabilities = [weights[char] / total_weight for char in alphabet]

        # Sample a sentence based on the adjusted probabilities
        sentence = random.choices(alphabet, weights=probabilities, k=random.randint(string_length[0], string_length[1]))
        sentence = "".join(sentence)

        # Update the symbol counter
        symbol_counter.update(sentence)

        if lowercase:
            sentence = sentence.lower()
        
        yield sentence

def get_backgrounds(height, width, samples):
    backgrounds = []
    backg_path = os.path.join(os.getcwd(), 'edocr2/tools/backgrounds')
    backg_files = os.listdir(backg_path)
    for _ in range(samples):
        backg_file = random.choice(backg_files)
        img = cv2.imread(os.path.join(backg_path, backg_file))
        y, x = random.randint(0, img.shape[0] - height), random.randint(0, img.shape[1] - width)
        backg = img[y : y + height, x : x + width][:]
        backgrounds.append(backg)

    return backgrounds

def filter_wrong_samples(generator, white_pixel_threshold=0.05):
    """A generator wrapper that filters out samples with too many white pixels.
    
    Args:
    generator: The original generator that produces image samples.
    white_pixel_threshold: The maximum allowed ratio of white pixels.
    
    Yields:
    Valid samples that meet the white pixel threshold criteria.
    """
    for image, text in generator:
        # Convert image to grayscale to count white pixels
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Threshold to create a binary image (white pixels = 255, other = 0)
        _, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)

        # Calculate total pixels and the number of white pixels
        total_pixels = binary_image.size
        white_pixels = np.sum(binary_image == 255)
        
        # Calculate the percentage of white pixels
        white_pixel_ratio = white_pixels / total_pixels
        
        # Yield the sample only if the white pixel ratio is within the acceptable threshold
        if white_pixel_ratio >= white_pixel_threshold:
            yield cv2.bitwise_not(image), text
        '''else:
            print(f"Skipping sample due to low white pixel ratio ({white_pixel_ratio:.2%})")'''

def generate_drawing_imgs(image_gen_params, backgrounds):
    def check_overlap(text_img, background_img):
            """Check if there is an overlap between black pixels of the background and white pixels of the text.
            Args:
            text_img: A binary image where the text is white (255) on black (0).
            background_img: A grayscale or RGB background image.
            Returns:
            bool: True if there is an overlap, False otherwise.
            """
            # Ensure both images are of the same size
            if text_img.shape != background_img.shape[:2]:
                raise ValueError("Text image and background image must have the same dimensions")
            
            # Convert background to grayscale if it's RGB
            if len(background_img.shape) == 3:
                background_gray = cv2.cvtColor(background_img, cv2.COLOR_BGR2GRAY)
            else:
                background_gray = background_img

            # Identify where the text image has white pixels (text pixels)
            text_mask = text_img < 127
            # Identify where the background has black pixels (0 value)
            background_black_mask = background_gray < 127
            # Check if any black background pixels overlap with the white text pixels
            overlap = np.any(np.logical_and(text_mask, background_black_mask))

            return overlap

    def apply_text_on_background(text_img, text_binary, background_img):
        """Apply the text image over the background, assuming no overlap."""
        # Create a mask where text_binary is white (255), ndicating text
        text_mask = text_binary == 0
        
        # Create a copy of background_img to avoid modifying the original image
        result = background_img.copy()

        inverted_text_img = cv2.bitwise_not(text_img)
        result[text_mask] = inverted_text_img[text_mask]

        return result
    
    def compact_bounding_box(box_group):
        from edocr2.tools.ocr_pipelines import group_polygons_by_proximity
        box_groups = []
        for b in box_group:
            for xy, _ in b:
                box_groups.append(xy)
                    
        box_groups = group_polygons_by_proximity(box_groups, eps = 10)
        
        dummy_char = '1'
        dummy_box_groups = []

        for box in box_groups:
            dummy_box_groups.append([(np.array(box).astype(np.int32), dummy_char)])

        return dummy_box_groups
    
    def reposition(text_img, lines):
        new_lines = []
        for line in lines:
            x_coords = []
            y_coords = []
            for li in line:
                x_coords.extend([li[0][0][0], li[0][1][0], li[0][2][0], li[0][3][0]])  # [x1, x2, x3, x4]
                y_coords.extend([li[0][0][1], li[0][1][1], li[0][2][1], li[0][3][1]])  # [y1, y2, y3, y4]
            
            x_min = int(min(x_coords))
            y_min = int(min(y_coords))
            x_max = int(max(x_coords))
            y_max = int(max(y_coords))
            
            # Crop the text region using the bounding box coordinates
            cropped_text = text_img[y_min:y_max, x_min:x_max]
            x_offset = random.randint(10, text_img.shape[1] - x_max + x_min - 10)
            y_offset = random.randint(10, text_img.shape[0] - y_max + y_min - 10)
            
            text_img[y_offset:y_offset+ cropped_text.shape[0], x_offset:x_offset+ cropped_text.shape[1]] = cropped_text
            text_img[y_min:y_max, x_min:x_max] = 0
            new_line = []
            for li in line:
                new_li = []
                for coord in li[0]:  # Iterate through each (x, y) pair in the bounding box
                    new_x = coord[0] - x_min + x_offset
                    new_y = coord[1] - y_min + y_offset
                    new_li.append([new_x, new_y])
                new_line.append([new_li, li[1]])
            new_lines.append(new_line)

        return text_img, new_lines
    
    from edocr2.keras_ocr import data_generation

    while True:
        backg = random.choice(backgrounds)
        # Initialize the final image as the background
        image = backg.copy()
        lines = []  # Store the bounding boxes for all text images

        # Randomly choose a number of text images to place (between 1 and 5, for example)
        num_images = random.randint(1, 5)

        for _ in range(num_images):
            for _ in range(100):  # Retry mechanism if overlap occurs
                image_gen = data_generation.get_image_generator(**image_gen_params)
                text_img, new_lines = next(image_gen)
                text_img, new_lines = reposition(text_img, new_lines)
                _, binary_text_img = cv2.threshold(cv2.cvtColor(text_img, cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_BINARY_INV)
                # Check if the new text image overlaps with the current image
                
                if not check_overlap(binary_text_img, image):
                    # If no overlap, apply the text image onto the background
                    image = apply_text_on_background(text_img, binary_text_img, image)

                    # Compact the bounding boxes and add them to the list
                    new_lines = compact_bounding_box(new_lines)
                    lines.extend(new_lines)
                    break  # Exit the loop once the image has been successfully placed
            else:
                continue  # Retry if overlap occurred

        # Yield the final image with the applied text and the compacted bounding boxes
        yield image, lines
    
def save_recog_samples(alphabet, fonts, samples, recognizer, save_path = './recog_samples'):
    """Generate and save a few samples along with their labels.
    
    Args:
    recognizer: The recognizer model (trained or not).
    image_generator: The generator to produce the images.
    sample_count: Number of samples to generate.
    save_path: Path where the samples will be saved.
    """
    from edocr2.keras_ocr import data_generation

    # Create directory if it doesn't exist
    os.makedirs(save_path, exist_ok=True)
    # Generate and save the samples
    for i in range(samples):

        text_generator = get_balanced_text_generator(alphabet, (5, 10))

        image_gen_params = {
        'height': 256,
        'width': 256,
        'text_generator': text_generator,
        'font_groups': {alphabet: fonts},  # Use all fonts
        'font_size': (20, 40),
        'margin': 10,
        }

        # Create image generators for training and validation
        image_generators_train = data_generation.get_image_generator(**image_gen_params)

        # Helper function to convert image generators to recognizer input
        def convert_generators(image_generators):
            return data_generation.convert_image_generator_to_recognizer_input(
                    image_generator=image_generators,
                    max_string_length=min(recognizer.training_model.input_shape[1][1], 10),
                    target_width=recognizer.model.input_shape[2],
                    target_height=recognizer.model.input_shape[1],
                    margin=1) 

        # Convert training and validation image generators
        recog_img_gen_train = convert_generators(image_generators_train)
        filter_gen = filter_wrong_samples(recog_img_gen_train, white_pixel_threshold=0.05)
        image, text = next(filter_gen)
        
        # Save the image
        image_filename = os.path.join(save_path, f'{i + 1}.png')
        cv2.imwrite(image_filename, image)
        
        # Save the label in a text file
        label_filename = os.path.join(save_path, f'{i + 1}.txt')
        with open(label_filename, 'w') as label_file:
            label_file.write(text)

def save_detect_samples(alphabet, fonts, samples, save_path = './detect_samples'):
    
    os.makedirs(save_path, exist_ok=True)

    text_generator = get_balanced_text_generator(alphabet, (1, 10))
    height, width = 640, 640
    backgrounds = get_backgrounds(height, width, samples)

    image_gen_params = {
    'height': height,
    'width': width,
    'text_generator': text_generator,
    'font_groups': {alphabet: fonts},  # Use all fonts
    'font_size': (25, 50),
    'margin': 20,
    'rotationZ': (-90, 90)
    }

    image_gen = generate_drawing_imgs(image_gen_params, backgrounds)
    for i in range(samples):
        image, lines = next(image_gen)

        # Save the image
        image_filename = os.path.join(save_path, f'img_{i + 1}.png')
        cv2.imwrite(image_filename, image)

        label_filename = os.path.join(save_path, f'gt_img_{i + 1}.txt')
        label = ''

        for box in lines:
            for xy, _ in box:
                for vertex in xy:
                    label += str(int(vertex[0])) + ', ' + str(int(vertex[1])) + ', '
                #pts=np.array([(xy[0]),(xy[1]),(xy[2]),(xy[3])], dtype=np.int32).reshape((-1, 1, 2))
                #cv2.polylines(image, [pts], isClosed=True, color=(255, 0, 0), thickness=2)
                label += '### \n'

        with open(label_filename, 'w') as txt_file:
            txt_file.write(label)

        #cv2.imshow('Image with Oriented Bounding Box', image)
        #cv2.waitKey(0)  # Wait for a key press to close the image
        #cv2.destroyAllWindows()

############ Synthetic Training ################################################

def train_synth_recognizer(alphabet, fonts, pretrained = None, bias_char = '', samples = 1000, batch_size = 256, epochs = 10, string_length = (5, 10), basepath = os.getcwd(), val_split = 0.2):
    '''Starts the training of the recognizer on generated data.
    Args:
    alphabet: string of characters
    backgrounds: list of backgrounds images
    fonts: list of fonts with format *.ttf
    batch_size: batch size for training
    recognizer_basepath: desired path to recognizer
    pretrained_model: path to pretrained weights

    '''
    import tensorflow as tf
    from edocr2 import keras_ocr
    current_time = time.localtime(time.time())
    basepath = os.path.join(basepath,
    f'recognizer_{current_time.tm_hour}_{current_time.tm_min}')

    text_generator = get_balanced_text_generator(alphabet, string_length, bias_chars=bias_char)

    image_gen_params = {
    'height': 256,
    'width': 256,
    'text_generator': text_generator,
    'font_groups': {alphabet: fonts},  # Use all fonts
    'font_size': (20, 40),
    'margin': 10
    }

    # Create image generators for training and validation
    image_generators_train = keras_ocr.data_generation.get_image_generator(**image_gen_params)
    image_generators_val = keras_ocr.data_generation.get_image_generator(**image_gen_params)
    
    recognizer = keras_ocr.recognition.Recognizer(alphabet=alphabet)
    if pretrained:
        recognizer.model.load_weights(pretrained)
    recognizer.compile()
    #for layer in recognizer.backbone.layers:
     #   layer.trainable = False

    # Helper function to convert image generators to recognizer input
    def convert_generators(image_generators):
        return keras_ocr.data_generation.convert_image_generator_to_recognizer_input(
                image_generator=image_generators,
                max_string_length=min(recognizer.training_model.input_shape[1][1], string_length[1]),
                target_width=recognizer.model.input_shape[2],
                target_height=recognizer.model.input_shape[1],
                margin=1) 

    # Convert training and validation image generators
    recog_img_gen_train = filter_wrong_samples(convert_generators(image_generators_train))
    recog_img_gen_val = filter_wrong_samples(convert_generators(image_generators_val))

    recognition_train_generator = recognizer.get_batch_generator(recog_img_gen_train, batch_size)
    recognition_val_generator = recognizer.get_batch_generator(recog_img_gen_val, batch_size)
    with open(f'{basepath}.txt', 'w') as file:
        file.write(alphabet)
    recognizer.training_model.fit(
        recognition_train_generator,
        epochs=epochs,
        steps_per_epoch=math.ceil((1 - val_split) * samples / batch_size),
        callbacks=[
            tf.keras.callbacks.EarlyStopping(restore_best_weights=True, patience=5),
            tf.keras.callbacks.CSVLogger(f'{basepath}.csv', append=True),
            tf.keras.callbacks.ModelCheckpoint(filepath=f'{basepath}.keras',save_best_only=True),
        ],
        validation_data=recognition_val_generator,
        validation_steps=math.ceil(val_split * samples / batch_size),
    )
    return basepath

def train_synth_detector(alphabet, fonts, pretrained = None, samples = 100, batch_size = 8, epochs = 1, string_length = (1, 10), basepath = os.getcwd(), val_split = 0.2):
    import tensorflow as tf
    from edocr2 import keras_ocr
    current_time = time.localtime(time.time())
    basepath = os.path.join(basepath,
    f'detector_{current_time.tm_hour}_{current_time.tm_min}')

    text_generator = get_balanced_text_generator(alphabet, string_length)
    height, width = 640, 640
    backgrounds = get_backgrounds(height, width, samples)

    image_gen_params = {
    'height': height,
    'width': width,
    'text_generator': text_generator,
    'font_groups': {alphabet: fonts},  # Use all fonts
    'font_size': (25, 50),
    'margin': 0,
    'rotationZ': (-90, 90)
    }

    # Create image generators for training and validation
    image_generator_train  = generate_drawing_imgs(image_gen_params, backgrounds)
    image_generator_val  = generate_drawing_imgs(image_gen_params, backgrounds)

    detector = keras_ocr.detection.Detector(weights='clovaai_general')
    if pretrained:
        detector.model.load_weights(pretrained)
    
    detection_train_generator = detector.get_batch_generator(image_generator=image_generator_train,batch_size=batch_size)
    detection_val_generator = detector.get_batch_generator(image_generator=image_generator_val,batch_size=batch_size)

    detector.model.fit(
        detection_train_generator,
        steps_per_epoch=math.ceil((1 - val_split) * samples / batch_size),
        epochs=epochs,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(restore_best_weights=True, patience=5),
            tf.keras.callbacks.CSVLogger(f'{basepath}.csv'),
            tf.keras.callbacks.ModelCheckpoint(filepath=f'{basepath}.keras')
        ],
        validation_data=detection_val_generator,
        validation_steps=math.ceil(val_split * samples / batch_size),
        batch_size=batch_size
    )
    return basepath

############ Testing ##########################################################
def compare_characters(label, prediction):
    # Count occurrences of each character in label and prediction
    label_chars = Counter(label)    # e.g., {'1': 1, '4': 1, '0': 1}
    pred_chars = Counter(prediction)  # e.g., {'4': 1, '0': 1}

    correct_count = 0

    # Iterate over characters in the prediction
    for char in pred_chars:
        if char in label_chars:
            # Add the minimum of occurrences in both to correct_count
            correct_count += min(pred_chars[char], label_chars[char])
    return correct_count

def get_cer(
    preds: typing.Union[str, typing.List[str]],
    target: typing.Union[str, typing.List[str]],
    ) -> float:
    
    def edit_distance(prediction_tokens: typing.List[str], reference_tokens: typing.List[str]) -> int:
        """ Standard dynamic programming algorithm to compute the Levenshtein Edit Distance Algorithm

        Args:
            prediction_tokens: A tokenized predicted sentence
            reference_tokens: A tokenized reference sentence
        Returns:
            Edit distance between the predicted sentence and the reference sentence
        """
        # Initialize a matrix to store the edit distances
        dp = [[0] * (len(reference_tokens) + 1) for _ in range(len(prediction_tokens) + 1)]

        # Fill the first row and column with the number of insertions needed
        for i in range(len(prediction_tokens) + 1):
            dp[i][0] = i
        
        for j in range(len(reference_tokens) + 1):
            dp[0][j] = j

        # Iterate through the prediction and reference tokens
        for i, p_tok in enumerate(prediction_tokens):
            for j, r_tok in enumerate(reference_tokens):
                # If the tokens are the same, the edit distance is the same as the previous entry
                if p_tok == r_tok:
                    dp[i+1][j+1] = dp[i][j]
                # If the tokens are different, the edit distance is the minimum of the previous entries plus 1
                else:
                    dp[i+1][j+1] = min(dp[i][j+1], dp[i+1][j], dp[i][j]) + 1

        # Return the final entry in the matrix as the edit distance     
        return dp[-1][-1]
    """ Update the cer score with the current set of references and predictions.

    Args:
        preds (typing.Union[str, typing.List[str]]): list of predicted sentences
        target (typing.Union[str, typing.List[str]]): list of target words

    Returns:
        Character error rate score
    """
    if isinstance(preds, str):
        preds = [preds]
    if isinstance(target, str):
        target = [target]

    total, errors = 0, 0
    for pred_tokens, tgt_tokens in zip(preds, target):
        errors += edit_distance(list(pred_tokens), list(tgt_tokens))
        total += len(tgt_tokens)

    if total == 0:
        return 0.0

    cer = errors / total

    return cer

def test_recog(test_path, recognizer):

    # To track ground truth and predictions for word-level accuracy
    total_chars = 0  # Total number of characters in all labels
    pred_chars = 0 
    cer = []
    correct_chars = 0  # Total number of correctly predicted characters
    samples = len(os.listdir(test_path)) / 2
    
    for i in range(1, int(samples) + 1):
        img = cv2.imread(os.path.join(test_path, f"{i}.png"))
        with open(os.path.join(test_path, f"{i}.txt"), 'r') as txt_file:
            label = txt_file.read().strip()
        pred = recognizer.recognize(image = img)
        print(f'ground truth: {label} | prediction: {pred}')

        correct_in_sample = compare_characters(label, pred)
        correct_chars += correct_in_sample
        total_chars += len(label)

        sample_char_recall = (correct_in_sample / len(label)) * 100 if len(label) > 0 else 0
        sample_cer = get_cer(pred, label) * 100
        cer.append(sample_cer)
        pred_chars += len(pred)
        print(f"Sample character Recall: {sample_char_recall:.2f}%")
        print(f"Sample character CER: {sample_cer:.2f}%")

    # Calculate and print overall character-level accuracy
    overall_char_recall = (correct_chars / pred_chars) * 100 if pred_chars > 0 else 0
    overall_cer = np.mean(cer)

    print(f"Character Recall: {overall_char_recall:.2f}%")
    print(f"CER: {overall_cer:.2f}%")

def test_detect(test_path, detector, show_img = False):

    samples = len(os.listdir(test_path)) / 2
    iou_scores =[]
    
    for i in range(1, int(samples) + 1):
        img = cv2.imread(os.path.join(test_path, f"img_{i}.png"))
        gt = []

        with open(os.path.join(test_path, f"gt_img_{i}.txt"), 'r') as txt_file:
            for line in txt_file:
                # Split the line by commas and strip any whitespace
                parts = line.strip().split(',')
                
                # Extract the coordinates (first 8 values) and the character (last value)
                coords = np.array([(int(parts[0]), int(parts[1])),
                                (int(parts[2]), int(parts[3])),
                                (int(parts[4]), int(parts[5])),
                                (int(parts[6]), int(parts[7]))])
                
                # Append a tuple of (coords, char) to the result list
                gt.append(coords)

        pred = detector.detect([img])

         # Calculate IoU for each predicted box with the closest ground truth box
        for pred_box in pred[0]:
            best_iou = 0.0
            for gt_box in gt:
                iou = calculate_iou(pred_box, gt_box)
                best_iou = max(best_iou, iou)  # Track the best IoU score for this prediction

            iou_scores.append(best_iou)

        if show_img:
            for box in pred:
                for xy in box:
                    pts=np.array([(xy[0]),(xy[1]),(xy[2]),(xy[3])], dtype=np.int32).reshape((-1, 1, 2))
                    cv2.polylines(img, [pts], isClosed=True, color=(255, 0, 0), thickness=2)

            for xy in gt:
                pts=np.array([(xy[0]),(xy[1]),(xy[2]),(xy[3])], dtype=np.int32)
                cv2.polylines(img, [pts], isClosed=True, color=(0, 255, 0), thickness=2)   

            cv2.imshow('Image with Oriented Bounding Box', img)
            cv2.waitKey(0)  # Wait for a key press to close the image
            cv2.destroyAllWindows()
    
    # Print the average IoU score
    if iou_scores:
        print(f"Average IoU: {np.mean(iou_scores)}")
    else:
        print("No predictions found.")

def calculate_iou(predicted_polygon, ground_truth_polygon):
    """
    Calculate IoU (Intersection over Union) between two polygons.
    """
    from shapely.geometry import Polygon
    pred_poly = Polygon(predicted_polygon)
    gt_poly = Polygon(ground_truth_polygon)

    if not pred_poly.is_valid or not gt_poly.is_valid:
        return 0.0

    # Calculate intersection and union areas
    intersection_area = pred_poly.intersection(gt_poly).area
    union_area = pred_poly.union(gt_poly).area

    if union_area == 0:
        return 0.0

    iou = intersection_area / union_area
    return iou