Spaces:
Sleeping
Sleeping
| import multiprocessing | |
| import glob | |
| import time | |
| import json | |
| from tqdm import tqdm | |
| from os.path import join as pjoin, exists | |
| import cv2 | |
| import os | |
| import shutil | |
| from detect_merge.merge import reassign_ids | |
| import detect_compo.ip_region_proposal as ip | |
| from detect_merge.Element import Element | |
| import detect_compo.lib_ip.ip_preprocessing as pre | |
| import detect_classify.classification as clf | |
| import torch | |
| import numpy as np | |
| from torchvision import models | |
| from torch import nn | |
| import pandas as pd | |
| import csv | |
| import re | |
| import openai | |
| import random | |
| from PIL import Image | |
| def resize_height_by_longest_edge(img_path, resize_length=800): | |
| org = cv2.imread(img_path) | |
| height, width = org.shape[:2] | |
| if height > width: | |
| return resize_length | |
| else: | |
| return int(resize_length * (height / width)) | |
| if __name__ == '__main__': | |
| input_img_root = "./input_examples/" | |
| output_root = "./result_classification" | |
| segment_root = '../scrutinizing_alexa/txt' | |
| if os.path.exists(output_root): | |
| shutil.rmtree(output_root) | |
| os.makedirs(output_root) | |
| image_list = os.listdir(input_img_root) | |
| input_imgs = [input_img_root + image_name for image_name in image_list] | |
| key_params = {'min-grad': 4, 'ffl-block': 5, 'min-ele-area': 50, 'merge-contained-ele': True, | |
| 'max-word-inline-gap': 10, 'max-line-ingraph-gap': 4, 'remove-top-bar': False} | |
| is_ip = True | |
| is_clf = False | |
| is_ocr = True | |
| is_merge = True | |
| is_classification = True | |
| # Load deep learning models in advance | |
| compo_classifier = None | |
| if is_ip and is_clf: | |
| compo_classifier = {} | |
| from cnn.CNN import CNN | |
| # compo_classifier['Image'] = CNN('Image') | |
| compo_classifier['Elements'] = CNN('Elements') | |
| # compo_classifier['Noise'] = CNN('Noise') | |
| ocr_model = None | |
| if is_ocr: | |
| import detect_text.text_detection as text | |
| # set the range of target inputs' indices | |
| num = 0 | |
| # start_index = 30800 # 61728 | |
| # end_index = 100000 | |
| img_time_cost_all = [] | |
| ocr_time_cost_all = [] | |
| ic_time_cost_all = [] | |
| ts_time_cost_all = [] | |
| cd_time_cost_all = [] | |
| resize_by_height = 800 | |
| for input_img in input_imgs: | |
| output_data = pd.DataFrame(columns=['screenshot', 'id', 'label', 'index', 'text', 'sentences']) | |
| this_img_start_time = time.clock() | |
| resized_height = resize_height_by_longest_edge(input_img, resize_by_height) | |
| index = input_img.split('/')[-1][:-4] | |
| if index != "1-1" and index != "1-2": | |
| continue | |
| if is_ocr: | |
| os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True) | |
| this_ocr_time_cost = text.text_detection(input_img, output_root, show=False, method='paddle') | |
| ocr_time_cost_all.append(this_ocr_time_cost) | |
| if is_ip: | |
| os.makedirs(pjoin(output_root, 'ip'), exist_ok=True) | |
| this_cd_time_cost = ip.compo_detection(input_img, output_root, key_params, classifier=compo_classifier, resize_by_height=resized_height, show=False) | |
| cd_time_cost_all.append(this_cd_time_cost) | |
| if is_merge: | |
| import detect_merge.merge as merge | |
| os.makedirs(pjoin(output_root, 'merge'), exist_ok=True) | |
| compo_path = pjoin(output_root, 'ip', str(index) + '.json') | |
| ocr_path = pjoin(output_root, 'ocr', str(index) + '.json') | |
| board_merge, components_merge = merge.merge(input_img, compo_path, ocr_path, pjoin(output_root, 'merge'), is_remove_top_bar=key_params['remove-top-bar'], show=False) | |
| # ic_time_cost_all.append(this_ic_time_cost) | |
| # ts_time_cost_all.append(this_ts_time_cost) | |
| if is_classification: | |
| os.makedirs(pjoin(output_root, 'classification'), exist_ok=True) | |
| merge_path = pjoin(output_root, 'merge', str(index) + '.json') | |
| merge_json = json.load(open(merge_path, 'r')) | |
| os.makedirs(pjoin(output_root, 'classification', 'GUI'), exist_ok=True) | |
| this_time_cost_ic, this_time_cost_ts, output_data, output_board = clf.compo_classification(input_img, output_root, segment_root, merge_json, output_data, resize_by_height=resize_by_height) | |
| ic_time_cost_all.append(this_time_cost_ic) | |
| ts_time_cost_all.append(this_time_cost_ts) | |
| this_img_time_cost = time.clock() - this_img_start_time | |
| img_time_cost_all.append(this_img_time_cost) | |
| print("time cost for this image: %2.2f s" % this_img_time_cost) | |
| num += 1 | |
| if os.path.isfile(output_root + '/output.csv'): | |
| output_data.to_csv(output_root + '/output.csv', index=False, mode='a', header=False) | |
| else: | |
| output_data.to_csv(output_root + '/output.csv', index=False, mode='w') | |
| avg_ocr_time_cost = sum(ocr_time_cost_all) / len(ocr_time_cost_all) | |
| avg_cd_time_cost = sum(cd_time_cost_all) / len(cd_time_cost_all) | |
| avg_ic_time_cost = sum(ic_time_cost_all) / len(ic_time_cost_all) | |
| avg_ts_time_cost = sum(ts_time_cost_all) / len(ts_time_cost_all) | |
| avg_time_cost = sum(img_time_cost_all)/len(img_time_cost_all) | |
| print("average text extraction time cost for this app: %2.2f s" % avg_ocr_time_cost) | |
| print("average widget detection time cost for this app: %2.2f s" % avg_cd_time_cost) | |
| print("average icon classification time cost for this app: %2.2f s" % avg_ic_time_cost) | |
| print("average text selection processing time cost for this app: %2.2f s" % avg_ts_time_cost) | |
| print("average screenshot processing time cost for this app: %2.2f s" % avg_time_cost) | |