import streamlit as st from akaocr import TextEngine, BoxEngine import cv2 import numpy as np from PIL import Image import time from custom_component import st_copy_to_clipboard # Initialize the OCR engines box_engine = BoxEngine() text_engine = TextEngine() def transform_image(image, box): # Get perspective transform image assert len(box) == 4, "Shape of points must be 4x2" img_crop_width = int( max( np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[2] - box[3]))) img_crop_height = int( max( np.linalg.norm(box[0] - box[3]), np.linalg.norm(box[1] - box[2]))) pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) box = np.array(box, dtype="float32") M = cv2.getPerspectiveTransform(box, pts_std) dst_img = cv2.warpPerspective( image, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC) img_height, img_width = dst_img.shape[0:2] if img_height/img_width >= 1.25: dst_img = np.rot90(dst_img, k=3) return dst_img def two_pts(bounding_box): # Extract the x and y coordinates separately return ( ( round(min([x[0] for x in bounding_box])), round(min([x[1] for x in bounding_box])) ), ( round(max([x[0] for x in bounding_box])), round(max([x[1] for x in bounding_box])) ) ) def main(): st.set_page_config( page_title="Quick OCR Copy", page_icon=":flag-vn:", layout="wide" ) uploaded_file = st.file_uploader( "Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Convert the uploaded file to an OpenCV image file_bytes = np.asarray( bytearray(uploaded_file.read()), dtype=np.uint8) org_image = cv2.imdecode(file_bytes, 1) images = [] _time_start = time.perf_counter() boxes = box_engine(org_image) st.write( f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.") # crop and transform images for recognition for box in boxes[::-1]: # org_image = cv2.polylines(org_image, [box.astype( # np.int32)], isClosed=True, color=(0, 255, 0), thickness=2) image = transform_image(org_image, box) images.append(image) # Get the texts from the boxes _time_start = time.perf_counter() texts = text_engine(images) st.write( f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.") # Convert back to PIL Image for displaying output_image = Image.fromarray( cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB)) # button_locations = [(50, 10), (100, 100), (200, 300)] # text_list = ["Hello", "Streamlit", "World"] button_coords = [two_pts(box) for box in boxes[::-1]] text_list = [x[0] for x in texts] # Call the custom component st_copy_to_clipboard( image=output_image, button_coords=button_coords, text_list=text_list, before_copy_label="", after_copy_label="" ) if __name__ == '__main__': main()