Spaces:

jayllfpt
/

ocr-copy

Sleeping

File size: 3,462 Bytes

89f0413
ded98ad
 
 
 
 
a11676a
ded98ad
89f0413
ded98ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f211149
 
ded98ad
 
f211149
 
ded98ad
 
 
 
 
 
f211149
ded98ad
 
 
 
 
 
 
 
 
 
 
 
 
 
f211149
ded98ad
f211149
 
ded98ad
f211149
ded98ad
fb321dc
 
ded98ad
 
 
 
f211149
ded98ad
f211149
 
ded98ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f211149
ded98ad

import streamlit as st
from akaocr import TextEngine, BoxEngine
import cv2
import numpy as np
from PIL import Image
import time

from custom_component import st_copy_to_clipboard


# Initialize the OCR engines
box_engine = BoxEngine()
text_engine = TextEngine()


def transform_image(image, box):
    # Get perspective transform image
    assert len(box) == 4, "Shape of points must be 4x2"
    img_crop_width = int(
        max(
            np.linalg.norm(box[0] - box[1]),
            np.linalg.norm(box[2] - box[3])))
    img_crop_height = int(
        max(
            np.linalg.norm(box[0] - box[3]),
            np.linalg.norm(box[1] - box[2])))
    pts_std = np.float32([[0, 0],
                          [img_crop_width, 0],
                          [img_crop_width, img_crop_height],
                          [0, img_crop_height]])
    box = np.array(box, dtype="float32")
    M = cv2.getPerspectiveTransform(box, pts_std)
    dst_img = cv2.warpPerspective(
        image,
        M, (img_crop_width, img_crop_height),
        borderMode=cv2.BORDER_REPLICATE,
        flags=cv2.INTER_CUBIC)

    img_height, img_width = dst_img.shape[0:2]
    if img_height/img_width >= 1.25:
        dst_img = np.rot90(dst_img, k=3)

    return dst_img


def two_pts(bounding_box):
    # Extract the x and y coordinates separately
    return (
        (
            round(min([x[0] for x in bounding_box])),
            round(min([x[1] for x in bounding_box]))
        ),
        (
            round(max([x[0] for x in bounding_box])),
            round(max([x[1] for x in bounding_box]))
        )
    )


def main():
    st.set_page_config(
        page_title="Quick OCR Copy",
        page_icon=":flag-vn:",
        layout="wide"
    )

    uploaded_file = st.file_uploader(
        "Choose an image...", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        # Convert the uploaded file to an OpenCV image
        file_bytes = np.asarray(
            bytearray(uploaded_file.read()), dtype=np.uint8)
        org_image = cv2.imdecode(file_bytes, 1)

        images = []
        _time_start = time.perf_counter()
        boxes = box_engine(org_image)
        st.write(
            f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.")

        # crop and transform images for recognition
        for box in boxes[::-1]:
            # org_image = cv2.polylines(org_image, [box.astype(
            #     np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
            image = transform_image(org_image, box)
            images.append(image)

        # Get the texts from the boxes
        _time_start = time.perf_counter()
        texts = text_engine(images)
        st.write(
            f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.")

        # Convert back to PIL Image for displaying
        output_image = Image.fromarray(
            cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB))

        # button_locations = [(50, 10), (100, 100), (200, 300)]
        # text_list = ["Hello", "Streamlit", "World"]
        button_coords = [two_pts(box) for box in boxes[::-1]]
        text_list = [x[0] for x in texts]

        # Call the custom component
        st_copy_to_clipboard(
            image=output_image,
            button_coords=button_coords,
            text_list=text_list,
            before_copy_label="",
            after_copy_label=""
        )


if __name__ == '__main__':
    main()