Spaces:
Sleeping
Sleeping
File size: 3,462 Bytes
89f0413 ded98ad a11676a ded98ad 89f0413 ded98ad f211149 ded98ad f211149 ded98ad f211149 ded98ad f211149 ded98ad f211149 ded98ad f211149 ded98ad fb321dc ded98ad f211149 ded98ad f211149 ded98ad f211149 ded98ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | import streamlit as st
from akaocr import TextEngine, BoxEngine
import cv2
import numpy as np
from PIL import Image
import time
from custom_component import st_copy_to_clipboard
# Initialize the OCR engines
box_engine = BoxEngine()
text_engine = TextEngine()
def transform_image(image, box):
# Get perspective transform image
assert len(box) == 4, "Shape of points must be 4x2"
img_crop_width = int(
max(
np.linalg.norm(box[0] - box[1]),
np.linalg.norm(box[2] - box[3])))
img_crop_height = int(
max(
np.linalg.norm(box[0] - box[3]),
np.linalg.norm(box[1] - box[2])))
pts_std = np.float32([[0, 0],
[img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
box = np.array(box, dtype="float32")
M = cv2.getPerspectiveTransform(box, pts_std)
dst_img = cv2.warpPerspective(
image,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
img_height, img_width = dst_img.shape[0:2]
if img_height/img_width >= 1.25:
dst_img = np.rot90(dst_img, k=3)
return dst_img
def two_pts(bounding_box):
# Extract the x and y coordinates separately
return (
(
round(min([x[0] for x in bounding_box])),
round(min([x[1] for x in bounding_box]))
),
(
round(max([x[0] for x in bounding_box])),
round(max([x[1] for x in bounding_box]))
)
)
def main():
st.set_page_config(
page_title="Quick OCR Copy",
page_icon=":flag-vn:",
layout="wide"
)
uploaded_file = st.file_uploader(
"Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Convert the uploaded file to an OpenCV image
file_bytes = np.asarray(
bytearray(uploaded_file.read()), dtype=np.uint8)
org_image = cv2.imdecode(file_bytes, 1)
images = []
_time_start = time.perf_counter()
boxes = box_engine(org_image)
st.write(
f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.")
# crop and transform images for recognition
for box in boxes[::-1]:
# org_image = cv2.polylines(org_image, [box.astype(
# np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
image = transform_image(org_image, box)
images.append(image)
# Get the texts from the boxes
_time_start = time.perf_counter()
texts = text_engine(images)
st.write(
f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.")
# Convert back to PIL Image for displaying
output_image = Image.fromarray(
cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB))
# button_locations = [(50, 10), (100, 100), (200, 300)]
# text_list = ["Hello", "Streamlit", "World"]
button_coords = [two_pts(box) for box in boxes[::-1]]
text_list = [x[0] for x in texts]
# Call the custom component
st_copy_to_clipboard(
image=output_image,
button_coords=button_coords,
text_list=text_list,
before_copy_label="",
after_copy_label=""
)
if __name__ == '__main__':
main()
|