import streamlit as st
from akaocr import TextEngine, BoxEngine
import cv2
import numpy as np
from PIL import Image
import time

from custom_component import st_copy_to_clipboard


# Initialize the OCR engines
box_engine = BoxEngine()
text_engine = TextEngine()


def transform_image(image, box):
    # Get perspective transform image
    assert len(box) == 4, "Shape of points must be 4x2"
    img_crop_width = int(
        max(
            np.linalg.norm(box[0] - box[1]),
            np.linalg.norm(box[2] - box[3])))
    img_crop_height = int(
        max(
            np.linalg.norm(box[0] - box[3]),
            np.linalg.norm(box[1] - box[2])))
    pts_std = np.float32([[0, 0],
                          [img_crop_width, 0],
                          [img_crop_width, img_crop_height],
                          [0, img_crop_height]])
    box = np.array(box, dtype="float32")
    M = cv2.getPerspectiveTransform(box, pts_std)
    dst_img = cv2.warpPerspective(
        image,
        M, (img_crop_width, img_crop_height),
        borderMode=cv2.BORDER_REPLICATE,
        flags=cv2.INTER_CUBIC)

    img_height, img_width = dst_img.shape[0:2]
    if img_height/img_width >= 1.25:
        dst_img = np.rot90(dst_img, k=3)

    return dst_img


def two_pts(bounding_box):
    # Extract the x and y coordinates separately
    return (
        (
            round(min([x[0] for x in bounding_box])),
            round(min([x[1] for x in bounding_box]))
        ),
        (
            round(max([x[0] for x in bounding_box])),
            round(max([x[1] for x in bounding_box]))
        )
    )


def main():
    st.set_page_config(
        page_title="Quick OCR Copy",
        page_icon=":flag-vn:",
        layout="wide"
    )

    uploaded_file = st.file_uploader(
        "Choose an image...", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        # Convert the uploaded file to an OpenCV image
        file_bytes = np.asarray(
            bytearray(uploaded_file.read()), dtype=np.uint8)
        org_image = cv2.imdecode(file_bytes, 1)

        images = []
        _time_start = time.perf_counter()
        boxes = box_engine(org_image)
        st.write(
            f"Text detection took {time.perf_counter() - _time_start:.2f} seconds.")

        # crop and transform images for recognition
        for box in boxes[::-1]:
            # org_image = cv2.polylines(org_image, [box.astype(
            #     np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)
            image = transform_image(org_image, box)
            images.append(image)

        # Get the texts from the boxes
        _time_start = time.perf_counter()
        texts = text_engine(images)
        st.write(
            f"Text recognition took {time.perf_counter() - _time_start:.2f} seconds.")

        # Convert back to PIL Image for displaying
        output_image = Image.fromarray(
            cv2.cvtColor(org_image, cv2.COLOR_BGR2RGB))

        # button_locations = [(50, 10), (100, 100), (200, 300)]
        # text_list = ["Hello", "Streamlit", "World"]
        button_coords = [two_pts(box) for box in boxes[::-1]]
        text_list = [x[0] for x in texts]

        # Call the custom component
        st_copy_to_clipboard(
            image=output_image,
            button_coords=button_coords,
            text_list=text_list,
            before_copy_label="",
            after_copy_label=""
        )


if __name__ == '__main__':
    main()