Spaces:

TungDuong
/

Scene_Text_Recognization

Runtime error

File size: 5,475 Bytes

06142a4

import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
import shutil
import yaml

from sklearn.model_selection import train_test_split

location_path = r'Dataset/locations.xml'
tree = ET.parse(location_path)
root = tree.getroot()


def get_gt_bboxes(location_path):
    """get all the gt bbox of text in dataset

    Args:
        location_path: (path)
    Return:
        gt_imagepaths[1] (list): image's name
        gt_locations (list): bboxes of each image
    """
    gt_imagepaths = []
    gt_imagesizes = []
    gt_locations = []
    
    for image in root:
        # get path to image
        image_name = image[0].text
        image_path = os.path.join('Dataset', image_name)
        gt_imagepaths.append(image_path)
        
        # get the image size
        w = image[1].get('x')
        h = image[1].get('y')
        gt_imagesizes.append([h, w])
        
        # bboxes in the image
        bbs = []
        for bbox in image[2]:
            x = np.int64(float(bbox.get('x')))
            y = np.int64(float(bbox.get('y')))
            width = np.int64(float(bbox.get('width')))
            height = np.int64(float(bbox.get('height')))
            bbs.append([x, y, width, height])
            
        gt_locations.append(bbs)
    
    return gt_imagepaths, gt_imagesizes, gt_locations
    
gt_imagepaths, gt_imagesizes, gt_locations = get_gt_bboxes(location_path)

def visualize_gt_bboxes(image_path, gt_locations):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    for gt_location in gt_locations:
        x, y, width, height = gt_location
        x, y, width, height = int(x), int(y), int(width), int(height)
        
        image = cv2.rectangle(image, (x, y), (x+width, y+height), color=(255, 0, 0), thickness=2)
        
    plt.imshow(image)
    plt.axis('off')
    plt.show()
    
def visualize_gt_bboxes_yolo(image_path, gt_location_yolo):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    image_height, image_width = image.shape[:2]
    
    # Convert to original format
    for data in gt_location_yolo:
        xc, yc, w, h = data[1:]
        xmin = int((xc - w/2) * image_width)
        ymin = int((yc - h/2) * image_height)
        xmax = int((xc + w/2) * image_width)
        ymax = int((yc + h/2) * image_height)
        
        image = cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=(255, 0, 0), thickness=2)
        
    plt.imshow(image)
    plt.axis('off')
    plt.show()
    
    
def convert_yolo_format(gt_locations, gt_imagesizes):
    gt_locations_yolo = []
    
    for image, image_size in zip(gt_locations, gt_imagesizes):
        gt_location_yolo = []
        for gt_location in image:
            x, y, w, h = gt_location
            image_height, image_width = image_size
            
            xc = (x + w/2) / float(image_width)
            yc = (y + h/2) / float(image_height)
            width = w / float(image_width)
            height = h / float(image_height)
            
            # class = 0 -> meaning contains text
            class_id = 0
            gt_location_yolo.append([class_id, xc, yc, width, height])
        
        gt_locations_yolo.append(gt_location_yolo)
        
    return gt_locations_yolo
        
gt_locations_yolo = convert_yolo_format(gt_locations, gt_imagesizes)

def save_data_into_yolo_folder(data, src_img_dir, save_dir):
    # Create folder if not exist
    os.makedirs(save_dir, exist_ok=True)
    
    # Make images and labels folder
    os.makedirs(os.path.join(save_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'labels'), exist_ok=True)
    
    # write data into yolo folder
    for dt in data:
        # copy data
        image_path = dt[0]
        shutil.copy(image_path, os.path.join(save_dir, 'images'))
        
        #copy labels
        image_name = os.path.basename(image_path)
        image_name = os.path.splitext(image_name)[0]
        
        with open(os.path.join(save_dir, 'labels', f'{image_name}.txt'), "w") as f:
            for label in dt[1]:
                label_str = " ".join(map(str, label))
                f.write(f'{label_str}\n')



seed = 0
val_size = 0.2
test_size = 0.125
dataset = [[gt_imagepath, gt_location_yolo] for gt_imagepath, gt_location_yolo in zip(gt_imagepaths, gt_locations_yolo)]
train_data, val_data = train_test_split(dataset, test_size=val_size, random_state=42, shuffle=True)
train_data, test_data = train_test_split(train_data, test_size=test_size, random_state=42, shuffle=True)

save_yolo_data_dir = 'yolo_data'
os.makedirs(save_yolo_data_dir, exist_ok=True)
save_data_into_yolo_folder(
    data=train_data,
    src_img_dir=save_yolo_data_dir,
    save_dir=os.path.join(save_yolo_data_dir, 'train')
)
save_data_into_yolo_folder(
    data=val_data,
    src_img_dir=save_yolo_data_dir,
    save_dir=os.path.join(save_yolo_data_dir, 'val')
)
save_data_into_yolo_folder(
    data=test_data,
    src_img_dir=save_yolo_data_dir,
    save_dir=os.path.join(save_yolo_data_dir, 'test')
)

class_label = ['text']
# Create data.yaml file
data_yaml = {
    "path": '../yolo_data',
    'train': 'train/images',
    'test': 'test/images',
    'val': 'val/images',
    'nc': 1,
    'names': class_label
}

yolo_yaml_path = os.path.join(save_yolo_data_dir, 'data.yaml')
with open(yolo_yaml_path, "w") as f:
    yaml.dump(data_yaml, f, default_flow_style=False)