ocr-copy / app.py
jayllfpt's picture
clean packages
fab08f3
raw
history blame
3.46 kB
import streamlit as st
from akaocr import TextEngine, BoxEngine
import cv2
import numpy as np
from PIL import Image
import time
from custom_component import st_copy_to_clipboard
# Initialize the OCR engines
box_engine = BoxEngine()
text_engine = TextEngine()
def transform_image(image, box):
# Get perspective transform image
assert len(box) == 4, "Shape of points must be 4x2"
img_crop_width = int(
max(
np.linalg.norm(box[0] - box[1]),
np.linalg.norm(box[2] - box[3])))
img_crop_height = int(
max(
np.linalg.norm(box[0] - box[3]),
np.linalg.norm(box[1] - box[2])))
pts_std = np.float32([[0, 0],
[img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
box = np.array(box, dtype="float32")
M = cv2.getPerspectiveTransform(box, pts_std)
dst_img = cv2.warpPerspective(
image,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
img_height, img_width = dst_img.shape[0:2]
if img_height/img_width >= 1.25:
dst_img = np.rot90(dst_img, k=3)
return dst_img
def two_pts(bounding_box):
# Extract the x and y coordinates separately
return (
(
round(min([x[0] for x in bounding_box])),
round(min([x[1] for x in bounding_box]))
),
(
round(max([x[0] for x in bounding_box])),
round(max([x[1] for x in bounding_box]))
)
)
def main():
st.set_page_config(
page_title="Quick OCR Copy",
page_icon=":flag-vn:",
layout="wide"
)
uploaded_file = st.file_uploader(
"Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Convert the uploaded file to an OpenCV image
file_bytes = np.asarray(
bytearray(uploaded_file.read()), dtype=np.uint8)
org_image = cv2.imdecode(file_bytes, 1)
images = []
_time_start = time.perf_counter()
boxes = box_engine(org_image)
st.write(
f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.")
# crop and transform images for recognition
for box in boxes[::-1]:
# org_image = cv2.polylines(org_image, [box.astype(
# np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
image = transform_image(org_image, box)
images.append(image)
# Get the texts from the boxes
_time_start = time.perf_counter()
texts = text_engine(images)
st.write(
f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.")
# Convert back to PIL Image for displaying
output_image = Image.fromarray(
cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB))
# button_locations = [(50, 10), (100, 100), (200, 300)]
# text_list = ["Hello", "Streamlit", "World"]
button_coords = [two_pts(box) for box in boxes[::-1]]
text_list = [x[0] for x in texts]
# Call the custom component
st_copy_to_clipboard(
image=output_image,
button_coords=button_coords,
text_list=text_list,
before_copy_label="",
after_copy_label=""
)
if __name__ == '__main__':
main()