Spaces:

Flitto
/

image_cut_rect

Running

App Files Files Community

HERIUN commited on Oct 14, 2024

Commit

6a07cb2

1 Parent(s): 85dbfc9

add files

Browse files

Files changed (12) hide show

DocScanner-L.pth +3 -0
config.py +28 -0
data_utils/.gitignore +3 -0
data_utils/__init__.py +233 -0
data_utils/alarm.py +68 -0
data_utils/awss3.py +50 -0
data_utils/box_utils.py +483 -0
data_utils/color_utils.py +490 -0
data_utils/conf.py +170 -0
data_utils/image_utils.py +1364 -0
rect_main.py +173 -0
seg.pth +3 -0

DocScanner-L.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d907965aa5d8e99ea8d0891fb66d13bc4f23838547bac6f568d01d480ff8c8a
+size 29328510

config.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+class Config:
+    def __init__(self):
+        self.current_dir = os.path.dirname(os.path.abspath(__file__))
+        self.seg_model_path = os.path.join(self.current_dir, "pretrained", "seg.pth")
+        self.rec_model_path = os.path.join(
+            self.current_dir, "pretrained", "DocScanner-L.pth"
+        )
+        self.geotr_model_path = os.path.join(self.current_dir, "pretrained", "model.pt")
+        self.save_path = os.path.join(self.current_dir, "output")
+    @property
+    def get_seg_model_path(self):
+        return self.seg_model_path
+    @property
+    def get_rec_model_path(self):
+        return self.rec_model_path
+    @property
+    def get_geotr_model_path(self):
+        return self.geotr_model_path
+    @property
+    def get_save_path(self):
+        return self.save_path

data_utils/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__
+sRGB Color Space Profile.icm
+USWebCoatedSWOP.icc

data_utils/__init__.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import argparse
+import numpy as np
+import pandas as pd
+import time
+from datetime import datetime, timedelta
+from pytz import timezone
+import re
+import json
+import config
+from data_utils.image_utils import (
+    load_image,
+    resize_coordinates_and_image_to_fit_to_maximum_pixel_counts,
+)
+import torch
+import os
+from functools import wraps
+import threading
+lock = threading.Lock()
+def check_gpu():
+    if torch.cuda.is_available():
+        current_device = torch.cuda.current_device()
+        device_name = torch.cuda.get_device_name(current_device)
+        print(f"Using GPU Device: {current_device} - {device_name}")
+    else:
+        print("CUDA is not available.")
+def record_and_save_gpu_memory_usage(func):  # Add func parameter
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        torch.cuda.memory._record_memory_history(enabled=True)
+        result = func(*args, **kwargs)
+        torch.cuda.memory._record_memory_history(enabled=False)
+        torch.cuda.memory._save_segment_usage(filename="snapshot/segment_usage.svg")
+        torch.cuda.memory._save_memory_usage(filename="snapshot/memory_usage.svg")
+        return result  # Ensure the result is returned
+    return wrapper
+def measure_gpu_time_and_memory(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        cuda = kwargs.get("cuda", True)  # Default to True if 'cuda' is not provided
+        start_memory = (
+            torch.cuda.memory_reserved() if cuda else 0
+        )  # Record initial memory
+        result = func(*args, **kwargs)
+        end_memory = torch.cuda.memory_reserved() if cuda else 0  # Record final memory
+        if cuda:
+            print(
+                f"{func.__name__} Initial CUDA memory reserved: {start_memory / (1024 ** 3):.2f} GB"
+            )
+            print(
+                f"{func.__name__} Final CUDA memory reserved: {end_memory / (1024 ** 3):.2f} GB"
+            )
+            print(
+                f"{func.__name__} CUDA memory change: {(end_memory - start_memory) / (1024 ** 3):.2f} GB"
+            )
+        return result
+    return wrapper
+def timeit(func):
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        if kwargs.get("debug", False):
+            print(f"{func.__name__} : {total_time:.4f} sec..")
+        # print(f'Function {func.__name__} {args} {kwargs} Took {total_time:.4f} seconds')
+        return result
+    return timeit_wrapper
+def async_timeit(func):
+    @wraps(func)
+    async def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = await func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        if kwargs.get("debug", False):
+            print(f"{func.__name__} : {total_time:.4f} sec..")
+        # print(f'Function {func.__name__} {args} {kwargs} Took {total_time:.4f} seconds')
+        return result
+    return timeit_wrapper
+def thread_func(func):
+    @wraps(func)
+    def thread_func_wrapper(*args, **kwargs):
+        lock.acquire()
+        result = func(*args, **kwargs)
+        lock.release()
+        torch.cuda.empty_cache()
+        return result
+    return thread_func_wrapper
+def get_arguments():
+    parser = argparse.ArgumentParser(description="text_remover")
+    parser.add_argument("--image")
+    parser.add_argument("--dir")
+    parser.add_argument("--json")
+    parser.add_argument("--refine", action="store_true", default=False)
+    parser.add_argument("--preserve_resolution", action="store_true", default=False)
+    parser.add_argument("--pixel_thresh", type=int)
+    # Evaluate text stroke mask
+    parser.add_argument("--prepare_kaist", action="store_true", default=False)
+    parser.add_argument("--kaist_all_zip")
+    parser.add_argument("--data_dir")
+    args = parser.parse_args()
+    return args
+def get_elapsed_time(start_time):
+    return timedelta(seconds=round(time.time() - start_time))
+def get_current_time():
+    return str(datetime.now(timezone("Asia/Seoul"))).replace(" ", "-").rsplit(".", 1)[0]
+def parse_csv_file(path_csv, resize=False):
+    df = pd.read_csv(path_csv)
+    ls_rows = list()
+    for coor, content in df[["coordinates", "content"]].values:
+        coor = re.sub(pattern=r"\(|\)", repl="", string=coor)
+        coor = coor.split(",")
+        rect = list(map(int, coor))
+        ls_rows.append((rect[2], rect[3], rect[0], rect[1], content))
+    bboxes = pd.DataFrame(
+        ls_rows, columns=["xmin", "ymin", "xmax", "ymax", "transcript"]
+    )
+    bboxes["area"] = bboxes.apply(
+        lambda x: (x["xmax"] - x["xmin"]) * (x["ymax"] - x["ymin"]), axis=1
+    )
+    bboxes.sort_values(["area"], inplace=True)
+    bboxes.drop(["area"], axis=1, inplace=True)
+    img_url = df["image_url"].values[0]
+    img = load_image(img_url)
+    if resize:
+        bboxes, img = resize_coordinates_and_image_to_fit_to_maximum_pixel_counts(
+            ha_bboxs=bboxes, img=img
+        )
+    return bboxes, img, img_url
+def parse_json_file(json_path):
+    with open(json_path, mode="r") as f:
+        req = json.load(f)
+    img_url = req["data"]["data"]["req"]["image_url"]
+    img = load_image(img_url)
+    coors = req["data"]["data"]["req"]["coordinates"]
+    bboxes = pd.DataFrame(coors, columns=["xmin", "ymin", "xmax", "ymax"])
+    return bboxes, img, img_url
+def parse_transcription_df(csv_path, index=0):
+    df = pd.read_csv(csv_path)
+    ls_rows = list()
+    for idx, (img_url, df_groupby) in enumerate(df.groupby("image_url")):
+        if idx != index:
+            continue
+        img = load_image(img_url)
+        # for img_url, coor, ori_content, tr_content in df_groupby.values:
+        for item_org_id, img_url, coor, ori_content, tr_content in df_groupby.values:
+            coor = re.sub(pattern=r"\(|\)|\.0", repl="", string=coor)
+            coor = coor.split(",")
+            rect = list(map(int, coor))
+            # ls_rows.append((rect[2], rect[3], rect[0], rect[1], ori_content, tr_content))
+            ls_rows.append(
+                (
+                    item_org_id,
+                    rect[2],
+                    rect[3],
+                    rect[0],
+                    rect[1],
+                    ori_content,
+                    tr_content,
+                )
+            )
+    bboxes = pd.DataFrame(
+        # ls_rows, columns=["xmin", "ymin", "xmax", "ymax", "ori_content", "tr_content"]
+        ls_rows,
+        columns=[
+            "item_org_id",
+            "xmin",
+            "ymin",
+            "xmax",
+            "ymax",
+            "ori_content",
+            "tr_content",
+        ],
+    )
+    return bboxes, img, img_url
+if __name__ == "__main__":
+    pass
+    # font = ImageFont.truetype(
+    #     font="/Users/jongbeomkim/Desktop/workspace/image_processing_server/fonts/NotoSansThai-ExtraBold.ttf",
+    #     size=round(30),
+    # )

data_utils/alarm.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import requests
+from slack_sdk import WebClient
+class Alarm:
+    def __init__(self, slack):
+        self.url = slack.url
+        self.username = slack.username
+        self.icon_emoji = slack.icon_emoji
+        self.channel_id = slack.channel_id
+        self.bot_token = slack.bot_token
+        self.client = WebClient(self.bot_token)
+    def _get_color(self, level) -> str:
+        if level == "ignore":
+            color = "#36A64F"  # Green
+        elif level == "warning":
+            color = "#F08080"  # Red
+        return color
+    def send(self, level, text):
+        color = self._get_color(level)
+        message = {
+            "attachments": [{"text": text, "color": color}],
+            "username": self.username,
+            "icon_emoji": self.icon_emoji,
+        }
+        requests.post(self.url, json=message)
+    def send_sdk(self, level, text):
+        color = self._get_color(level)
+        re = self.client.chat_postMessage(
+            channel=self.channel_id,
+            attachments=[{"fallback": "fallback", "text": text, "color": color}],
+            icon_emoji=self.icon_emoji,
+            user_name=self.username,
+        )
+        return re.data["ts"]
+    def post_reply_to_thread(self, level, thread_ts, text):
+        color = self._get_color(level)
+        self.client.chat_postMessage(
+            channel=self.channel_id,
+            attachments=[{"fallback": "fallback", "text": text, "color": color}],
+            icon_emoji=self.icon_emoji,
+            thread_ts=thread_ts,
+            user_name=self.username,
+        )
+    def post_reaction(self, thread_ts, emoji_name):
+        # emoji_name ex. "x", "완료"
+        self.client.reactions_add(
+            channel=self.channel_id, name=emoji_name, timestamp=thread_ts
+        )
+class AlertLevel:
+    IGNORE = "ignore"
+    WARNING = "warning"
+    MAJOR = "major"
+    @classmethod
+    def get_levels(self):
+        return [self.IGNORE, self.WARNING, self.MAJOR]

data_utils/awss3.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import boto3
+import cv2
+import os
+from dotenv import load_dotenv
+load_dotenv()
+AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
+AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
+class AWSS3:
+    def load_image(self, bucket, path, local_path):
+        file = self.__s3.get_object(Bucket=bucket, Key=path)
+        img_content = file["Body"].read()
+        with open(local_path, "wb") as f:
+            f.write(img_content)
+        img = cv2.imread(local_path, cv2.IMREAD_COLOR)
+        img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB)
+        return img
+    def save_image(self, bucket, path, local_path) -> bool:
+        with open(local_path, "rb") as f:
+            image_content = f.read()
+        if image_content:
+            content_type = "image/" + local_path.rsplit(".", 1)[-1].lower().replace(
+                "jpg", "jpeg"
+            )
+            self.__s3.put_object(
+                Bucket=bucket,
+                Key=path,
+                Body=image_content,
+                ACL="public-read",
+                ContentDisposition="inline",
+                ContentType=content_type,
+            )
+            return True
+        else:
+            return False
+    def __init__(self):
+        self.__s3 = boto3.client(
+            "s3",
+            aws_access_key_id=AWS_ACCESS_KEY_ID,
+            aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+        )

data_utils/box_utils.py ADDED Viewed

	@@ -0,0 +1,483 @@

+import cv2
+import numpy as np
+import pandas as pd
+import pkg_resources as pkg
+import torch
+import math
+from typing import Tuple
+from data_utils.image_utils import _get_width_and_height
+def points_to_xyxy(coords: np.ndarray) -> list:
+    x_coords = [coord[0] for coord in coords]
+    y_coords = [coord[1] for coord in coords]
+    x1 = min(x_coords)
+    y1 = min(y_coords)
+    x2 = max(x_coords)
+    y2 = max(y_coords)
+    return [x1, y1, x2, y2]
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
+    return y
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
+    return y
+def is_abox_in_bbox(abox_coords, bbox_coords):
+    # abox가 bbox안에 있는지 확인하는 함수. 좌표형식. (x1,y1,x2,y2)
+    if (
+        bbox_coords[0] <= abox_coords[0]
+        and bbox_coords[1] <= abox_coords[1]
+        and abox_coords[2] <= bbox_coords[2]
+        and abox_coords[3] <= bbox_coords[3]
+    ):
+        return True
+    else:
+        return False
+def calculate_aspect_ratio(box):
+    width = box[2] - box[0]
+    height = box[3] - box[1]
+    aspect_ratio = width / (height + 1e-8)
+    return aspect_ratio
+def get_box_shape(box, threshold=0.1):
+    """
+    Check if a box is close to a square.
+    - threshold (float): The threshold for considering the box as close to a square.
+                        Default is 0.1.
+    Returns:
+    - str: "square" or "horizontal" or "vertical"
+    """
+    aspect_ratio = calculate_aspect_ratio(box)
+    if abs(1 - aspect_ratio) < threshold:
+        return "square"
+    elif aspect_ratio > 1:
+        return "horizontal"
+    elif aspect_ratio < 1:
+        return "vertical"
+def calculate_aspect_ratio_loss(predicted_box, gt_box):
+    """predicted_box와 gt_box간의 가로세로 비율에 대한 차이도를 반환 range:0~1. 클수록 차이가 크다는 뜻."""
+    gt_aspect_ratio = calculate_aspect_ratio(gt_box)
+    pred_aspect_ratio = calculate_aspect_ratio(predicted_box)
+    ratio_difference = abs(gt_aspect_ratio - pred_aspect_ratio)
+    loss = 2 * math.atan(ratio_difference) / math.pi
+    return loss
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+def is_box_overlap(box1, box2):
+    # Box overlap checking logic
+    if box1[0] > box2[2] or box1[2] < box2[0] or box1[1] > box2[3] or box1[3] < box2[1]:
+        return False
+    else:
+        return True
+def intersection_area(box1, box2):
+    """
+    Calculate the intersection area between two bounding boxes.
+    Parameters:
+    - box1, box2: Tuple or list representing the bounding box in the format (x1, y1, x2, y2).
+    Returns:
+    - area: Intersection area between the two boxes.
+    """
+    x1_box1, y1_box1, x2_box1, y2_box1 = box1
+    x1_box2, y1_box2, x2_box2, y2_box2 = box2
+    # Calculate intersection coordinates
+    x_intersection = max(x1_box1, x1_box2)
+    y_intersection = max(y1_box1, y1_box2)
+    x_intersection_end = min(x2_box1, x2_box2)
+    y_intersection_end = min(y2_box1, y2_box2)
+    # Calculate intersection area
+    width_intersection = max(0, x_intersection_end - x_intersection)
+    height_intersection = max(0, y_intersection_end - y_intersection)
+    area = width_intersection * height_intersection
+    return area
+def bbox_iou(box1, box2, GIoU=False, DIoU=False, CIoU=False, CIoU2=False, eps=1e-7):
+    """
+    Caclulate IoUs(GIoU,DIoU,CIoU,CIoU2)
+    Parameters:
+    - box1, box2: Tuple or list representing the bounding box in the format (x1, y1, x2, y2).
+    Returns:
+    - IoU or GIoU or DIoU or CIoU or CIoU2
+    """
+    # Returns Intersection over Union (IoU)
+    # Get the coordinates of bounding boxes
+    # x1, y1, x2, y2 = box1
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    # Intersection area
+    inter = intersection_area(box1, box2)
+    # Union Area
+    union = w1 * h1 + w2 * h2 - inter + eps
+    iou = inter / union
+    if CIoU or DIoU or GIoU or CIoU2:
+        cw = max(b1_x2, b2_x2) - min(
+            b1_x1, b2_x1
+        )  # convex (smallest enclosing box) width
+        ch = max(b1_y2, b2_y2) - min(b1_y1, b2_y1)  # convex height
+        c_area = cw * ch + eps  # convex area
+        giou_penalty = (c_area - union) / c_area
+        if GIoU:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+            return round(iou - giou_penalty, 4)  # GIoU
+        elif (
+            DIoU or CIoU
+        ):  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            rho2 = (
+                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
+                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
+            ) / 4  # center dist ** 2
+            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
+            diou_penalty = rho2 / c2
+            if DIoU:
+                return round(iou - diou_penalty, 4)  # DIoU
+            if CIoU or CIoU2:
+                v = (4 / math.pi**2) * (
+                    (np.arctan((w2 / h2)) - np.arctan(w1 / h1)) ** 2
+                )
+                alpha = v / (v - iou + (1 + eps))
+                ciou_penalty = diou_penalty + alpha * v
+                if CIoU2:
+                    ciou2_penalty = giou_penalty + diou_penalty + alpha * v
+                    return round(iou - ciou2_penalty)  # CIoU2
+                return round(iou - ciou_penalty, 4)  # CIoU
+    return round(iou, 4)  # IoU
+def rotate_around_point(x, y, pivot_x, pivot_y, degrees) -> Tuple[int, int]:
+    """주어진 좌표 (x,y)를 축 좌표(pivot_x,pivot_y_를 기준으로 반시계 방향으로 회전. return new_x,new_y"""
+    # 각도를 라디안으로 변환
+    angle_radians = np.radians(degrees)
+    # 회전 변환 적용
+    x_new = (
+        pivot_x
+        + np.cos(angle_radians) * (x - pivot_x)
+        - np.sin(angle_radians) * (y - pivot_y)
+    )
+    y_new = (
+        pivot_y
+        + np.sin(angle_radians) * (x - pivot_x)
+        + np.cos(angle_radians) * (y - pivot_y)
+    )
+    return int(x_new), int(y_new)
+def rotate_box_coordinates_on_pivot(x1, y1, x2, y2, degrees, pivot_x, pivot_y):
+    """주어진 box 좌표(x1,y1,x2,y2)를 주어진 축 좌표(pivot_x,pivot_y)에 대해 시계 방향으로 회전"""
+    radians = np.radians(degrees)
+    rotation_matrix = np.array(
+        [[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]]
+    )
+    # 상자 좌표를 중심을 기준으로 회전
+    box_coordinates = np.array(
+        [
+            [x1 - pivot_x, y1 - pivot_y],
+            [x2 - pivot_x, y1 - pivot_y],
+            [x2 - pivot_x, y2 - pivot_y],
+            [x1 - pivot_x, y2 - pivot_y],
+        ]
+    )
+    rotated_box_coordinates = np.dot(box_coordinates, rotation_matrix.T)
+    # 회전 후 좌표에 중심 좌표를 더해 원래 좌표로 변환
+    rotated_box_coordinates += np.array([pivot_y, pivot_x])
+    # 변환된 좌표를 새로운 상자 좌표로 반환
+    new_x1, new_y1 = rotated_box_coordinates.min(axis=0)
+    new_x2, new_y2 = rotated_box_coordinates.max(axis=0)
+    return int(new_x1), int(new_y1), int(new_x2), int(new_y2)
+def bbox_iou_torch(
+    box1, box2, xywh=False, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
+):
+    # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
+    # Get the coordinates of bounding boxes
+    if xywh:  # transform from xywh to xyxy
+        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
+        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
+        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)
+    # Intersection area
+    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * (
+        b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)
+    ).clamp(0)
+    # Union Area
+    union = w1 * h1 + w2 * h2 - inter + eps
+    # IoU
+    iou = inter / union
+    if CIoU or DIoU or GIoU:
+        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(
+            b2_x1
+        )  # convex (smallest enclosing box) width
+        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
+            rho2 = (
+                (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2
+                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
+            ) / 4  # center dist ** 2
+            if (
+                CIoU
+            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi**2) * (
+                    torch.atan(w2 / h2) - torch.atan(w1 / h1)
+                ).pow(2)
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return (
+            iou - (c_area - union) / c_area
+        )  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return iou  # IoU
+def generate_random_box(width_range, height_range):
+    """
+    Generate random bounding box coordinates (x1, y1, x2, y2) with random width and height.
+    Parameters:
+    - width_range: Tuple representing the range of width values (min_width, max_width).
+    - height_range: Tuple representing the range of height values (min_height, max_height).
+    Returns:
+    - box: Tuple representing the bounding box in the format (x1, y1, x2, y2).
+    """
+    min_width, max_width = width_range
+    min_height, max_height = height_range
+    width = np.random.randint(min_width, max_width)
+    height = np.random.randint(min_height, max_height)
+    x1 = np.random.randint(0, 100 - width)
+    y1 = np.random.randint(0, 100 - height)
+    x2 = x1 + width
+    y2 = y1 + height
+    return x1, y1, x2, y2
+def mask_to_bboxes(mask, margin_rate=2, pixel_thresh=300) -> pd.DataFrame:
+    nlabels, segmap, stats, centroids = cv2.connectedComponentsWithStats(
+        image=mask, connectivity=4
+    )
+    bboxes = pd.DataFrame(
+        stats[1:, :], columns=["bbox_x1", "bbox_y1", "width", "height", "pixel_count"]
+    )
+    img_width, img_height = _get_width_and_height(mask)
+    bboxes = bboxes[bboxes["pixel_count"].ge(pixel_thresh)]
+    bboxes["bbox_x2"] = bboxes["bbox_x1"] + bboxes["width"]
+    bboxes["bbox_y2"] = bboxes["bbox_y1"] + bboxes["height"]
+    bboxes["margin"] = bboxes.apply(
+        lambda x: int(
+            math.sqrt(
+                x["pixel_count"]
+                * min(x["width"], x["height"])
+                / (x["width"] * x["height"])
+            )
+            * margin_rate
+        ),
+        axis=1,
+    )
+    bboxes["bbox_x1"] = bboxes.apply(
+        lambda x: max(0, x["bbox_x1"] - x["margin"]), axis=1
+    )
+    bboxes["bbox_y1"] = bboxes.apply(
+        lambda x: max(0, x["bbox_y1"] - x["margin"]), axis=1
+    )
+    bboxes["bbox_x2"] = bboxes.apply(
+        lambda x: min(img_width, x["bbox_x2"] + x["margin"]), axis=1
+    )
+    bboxes["bbox_y2"] = bboxes.apply(
+        lambda x: min(img_height, x["bbox_y2"] + x["margin"]), axis=1
+    )
+    bboxes = bboxes[["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2"]]
+    img_width, img_height = _get_width_and_height(mask)
+    if img_width >= img_height:
+        bboxes.sort_values(by=["bbox_x1", "bbox_y1"], inplace=True)
+    else:
+        bboxes.sort_values(by=["bbox_y1", "bbox_x1"], inplace=True)
+    return bboxes
+def bbox_to_mask(bboxes: list, mask_size):
+    """
+    Creates a mask image based on bounding box coordinates.
+    Args:
+    - bboxes: list (x_min, y_min, x_max, y_max) representing the bounding box coordinates.
+    - mask_size: Tuple (height, width) representing the size of the mask image to be created.
+    Returns:
+    - Mask image with the specified bounding box area filled with white.
+    """
+    # Initialize a black mask image with the specified size
+    mask = np.zeros(mask_size, dtype=np.uint8)
+    # mask = np.zeros_like(img).astype("uint8")
+    for bbox in bboxes:
+        # Extract bounding box coordinates
+        x_min, y_min, x_max, y_max = bbox
+        # Ensure bbox coordinates are within mask bounds
+        x_min = max(0, x_min)
+        y_min = max(0, y_min)
+        x_max = min(mask_size[1], x_max)
+        y_max = min(mask_size[0], y_max)
+        # Fill the bounding box area with white color in the mask image
+        mask[y_min:y_max, x_min:x_max] = 255
+    return mask
+def move_box_a_to_center_of_box_b(A, B):
+    # A와 B의 좌표 (l, t, r, b)
+    lA, tA, rA, bA = A
+    lB, tB, rB, bB = B
+    # 박스 A의 너비와 높이
+    width_A = rA - lA
+    height_A = bA - tA
+    # 박스 B의 중심 좌표
+    center_x_B = (lB + rB) / 2
+    center_y_B = (tB + bB) / 2
+    # 박스 A의 새로운 좌표 (중심을 B의 중심으로 이동)
+    new_lA = center_x_B - width_A / 2
+    new_tA = center_y_B - height_A / 2
+    new_rA = center_x_B + width_A / 2
+    new_bA = center_y_B + height_A / 2
+    # 새로운 A 박스의 좌표 반환
+    return (new_lA, new_tA, new_rA, new_bA)
+def scale_bboxes(bboxes, max_x, max_y, x_scale_factor=1.2, y_scale_factor=1.05):
+    # 기존 좌표에서 각 박스의 중심 좌표, 너비, 높이 계산
+    bboxes["cx"] = (bboxes["bbox_x1"] + bboxes["bbox_x2"]) / 2
+    bboxes["cy"] = (bboxes["bbox_y1"] + bboxes["bbox_y2"]) / 2
+    bboxes["width"] = bboxes["bbox_x2"] - bboxes["bbox_x1"]
+    bboxes["height"] = bboxes["bbox_y2"] - bboxes["bbox_y1"]
+    # 각 박스의 크기를 1.2배로 늘림
+    bboxes["new_width"] = bboxes["width"] * x_scale_factor
+    bboxes["new_height"] = bboxes["height"] * y_scale_factor
+    # 새로운 좌표 계산
+    bboxes["new_x1"] = bboxes["cx"] - bboxes["new_width"] / 2
+    bboxes["new_y1"] = bboxes["cy"] - bboxes["new_height"] / 2
+    bboxes["new_x2"] = bboxes["cx"] + bboxes["new_width"] / 2
+    bboxes["new_y2"] = bboxes["cy"] + bboxes["new_height"] / 2
+    # box 범위 제한
+    bboxes["new_x1"] = bboxes["new_x1"].clip(lower=0).astype(int)
+    bboxes["new_y1"] = bboxes["new_y1"].clip(lower=0).astype(int)
+    bboxes["new_x2"] = bboxes["new_x2"].clip(upper=max_x).astype(int)
+    bboxes["new_y2"] = bboxes["new_y2"].clip(upper=max_y).astype(int)
+    # 결과 데이터프레임 생성
+    new_bboxes = bboxes[
+        ["ori_content", "new_x1", "new_y1", "new_x2", "new_y2", "predicted_lang"]
+    ].copy()
+    new_bboxes.columns = [
+        "ori_content",
+        "bbox_x1",
+        "bbox_y1",
+        "bbox_x2",
+        "bbox_y2",
+        "predicted_lang",
+    ]
+    return new_bboxes
+if __name__ == "__main__":
+    w_range = (100, 200)
+    h_range = (100, 200)
+    box1 = generate_random_box(w_range, h_range)
+    box2 = generate_random_box(w_range, h_range)
+    print(f"box1 coors : {box1}")
+    print(f"box2 coors : {box2}")
+    print(f"intersection area : {intersection_area(box1,box2)}")
+    iou = bbox_iou(box1, box2)
+    giou = bbox_iou(box1, box2, GIoU=True)
+    diou = bbox_iou(box1, box2, DIoU=True)
+    ciou = bbox_iou(box1, box2, CIoU=True)
+    print(iou, giou, diou, ciou)

data_utils/color_utils.py ADDED Viewed

	@@ -0,0 +1,490 @@

+import sys
+import numpy as np
+import cv2
+import convcolors
+import matplotlib.pyplot as plt
+from colormap import rgb2hex
+import matplotlib.pyplot as plt
+from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+import extcolors
+from skimage.color import deltaE_cie76
+import math
+from data_utils import timeit
+from data_utils.image_utils import (
+    _to_pil,
+    _get_pseudo_image,
+    _mask_image,
+    _get_width_and_height,
+    _resize_image,
+    _figure_to_array,
+    load_image,
+    _to_2d,
+)
+np.set_printoptions(precision=3, edgeitems=20, linewidth=sys.maxsize, suppress=False)
+def _to_tuple(color):
+    if isinstance(color, tuple):
+        return color
+    elif isinstance(color, str):
+        if color[:3] == "rgb":
+            return eval(color.replace("rgb", ""))
+        elif color[:3] == "lab":
+            return eval(color.replace("lab", ""))
+    elif isinstance(color, np.ndarray):
+        return tuple(color)
+def _to_str(color, color_space):
+    if isinstance(color, str):
+        return color
+    elif isinstance(color, tuple):
+        if color_space == "rgb":
+            return f"""rgb{color}"""
+        elif color_space == "lab":
+            return f"""lab{color}"""
+def _to_rgb(color):
+    if isinstance(color, str):
+        if color[:3] == "rgb":
+            color = eval(color.replace("rgb", ""))
+            return color
+        elif color[:3] == "lab":
+            color = eval(color.replace("lab", ""))
+            color = convcolors.lab_to_rgb(color)
+            color = tuple([round(i) for i in color])
+            return _to_str(color, color_space="rgb")
+def _to_lab(color):
+    if isinstance(color, str):
+        if color[:3] == "rgb":
+            color = eval(color.replace("rgb", ""))
+            color = convcolors.rgb_to_lab(color)
+            color = tuple([round(i) for i in color])
+            return _to_str(color, color_space="lab")
+        elif color[:3] == "lab":
+            return color
+def _extract_colors(img, mask=None, invert=False, tolerance=10, limit=4):
+    # img(H,W,3), mask(H,W)
+    if mask is None or np.any(mask) == False:
+        pseudo_outer = img
+    else:
+        pseudo_outer = _get_pseudo_image(img=img, mask=mask, invert=invert)
+    colors = extcolors.extract_from_image(
+        img=_to_pil(pseudo_outer), tolerance=tolerance, limit=limit
+    )[0]
+    sum_freqs = sum([i[1] for i in colors])
+    return [
+        {
+            "rgb": rgb,
+            "hex_code": rgb2hex(*rgb),
+            "percentage": round(freq / sum_freqs, 3),
+        }
+        for rgb, freq in colors
+    ]
+def get_palette(colors, img=None, mask=None, invert=False, index=None, zoom=4):
+    rgbs = [i["rgb"] for i in colors]
+    pers = [i["percentage"] for i in colors]
+    hex_codes = [i["hex_code"] for i in colors]
+    labels = [
+        f"""{str(rgb)}\n{str(round(per * 100, 1))}%""" for rgb, per in zip(rgbs, pers)
+    ]
+    explode = [0] * len(rgbs)
+    if index is not None:
+        explode[index] = 0.05
+    fig, ax = plt.subplots(figsize=(30, 20), dpi=15)
+    wedges, _ = ax.pie(
+        x=pers,
+        labels=labels,
+        labeldistance=1.07,
+        colors=hex_codes,
+        textprops={"fontsize": 50, "color": "black"},
+        wedgeprops={"edgecolor": "black", "linewidth": 7},
+        startangle=90,
+        radius=1,
+        counterclock=False,
+        explode=explode,
+    )
+    plt.setp(wedges, width=0.3)
+    plt.setp(wedges, width=0.26)
+    ax.set_aspect("equal")
+    if img is not None:
+        if mask is not None:
+            img = _mask_image(img=img, mask=mask, invert=invert)
+        w, h = _get_width_and_height(img)
+        if w >= h:
+            resized_img = _resize_image(img=img, w=400, h=int(400 * h / w))
+        else:
+            resized_img = _resize_image(img=img, w=int(400 * w / h), h=400)
+        offset_img = OffsetImage(resized_img.astype("float32") / 255, zoom=zoom)
+        annot_box = AnnotationBbox(offsetbox=offset_img, xy=(0, 0))
+        ax.add_artist(annot_box)
+    fig.tight_layout()
+    palette = _figure_to_array(fig)
+    plt.close()
+    return palette
+def _get_complementary_color(color):
+    if isinstance(color, str):
+        color = _to_tuple(color)
+        return f"""rgb{tuple([255 - rgb for rgb in color])}"""
+    if isinstance(color, tuple):
+        return tuple([255 - rgb for rgb in color])
+def _linearize(x):
+    if isinstance(x, np.ndarray):
+        x = x.astype("float64")
+        x /= 255
+        return np.where(x <= 0.04045, x / 12.92, ((x + 0.055) / 1.055) ** 2.4)
+    elif isinstance(x, (int, np.uint8)):
+        x /= 255
+        if x <= 0.04045:
+            return x / 12.92
+        else:
+            return ((x + 0.055) / 1.055) ** 2.4
+def _get_relative_luminance(x):
+    if isinstance(x, str):
+        return _get_relative_luminance(_to_tuple(_to_rgb(x)))
+    elif isinstance(x, np.ndarray):
+        x = _linearize(x)
+        return np.round(0.2126 * x[..., 0] + 0.7152 * x[..., 1] + 0.0722 * x[..., 2], 3)
+    elif isinstance(x, tuple):
+        assert len(x) == 3, "If the argument `x` is tuple, it should have 3 elements."
+        return round(
+            0.2126 * _linearize(x[0])
+            + 0.7152 * _linearize(x[1])
+            + 0.0722 * _linearize(x[2]),
+            3,
+        )
+def rgb_to_lab(rgb: tuple):
+    rgb = np.uint8([[list(rgb)]])
+    lab = cv2.cvtColor(rgb, cv2.COLOR_RGB2LAB)
+    return tuple(lab[0][0])
+def lab_to_rgb(lab):
+    lab = np.uint8([[list(lab)]])
+    rgb = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
+    return tuple(rgb[0][0])
+def get_contrast(x, y):
+    l1 = _get_relative_luminance(x)
+    l2 = _get_relative_luminance(y)
+    if isinstance(l1, float) and isinstance(l2, float):
+        return (
+            round((l1 + 0.05) / (l2 + 0.05), 1)
+            if l1 > l2
+            else round((l2 + 0.05) / (l1 + 0.05), 1)
+        )
+    elif isinstance(l1, np.ndarray) or isinstance(l2, np.ndarray):
+        return np.where(
+            l1 > l2,
+            np.round((l1 + 0.05) / (l2 + 0.05), 1),
+            np.round((l2 + 0.05) / (l1 + 0.05), 1),
+        )
+def adjust_luminance_for_contrast(color1, color2, th=4.5):
+    """color2(text_color)를 color1(back ground color)과 대비도가 th 이상이 되도록 color2(의 명도)를 수정합니다.
+    lab 형식을 사용하기 때문에 color2의 색 변화를 최소화 합니다.
+    color1이 어둡다면, color2는 밝아지고, color1이 밝다면, color2는 어두워 집니다.
+    Args:
+        color1 (_tuple_): 기준 색. 해당 색은 바뀌지 않는 색이며, 대비도 측정의 기준이 되는 색입니다. ex.back ground color
+        color2 (_tuple_): 변화를 줄 색. ex. text color
+        th (float, optional): 대비도 임계값. color1과 color2의 대비도가 해당 수치 이상이 되는것을 목표로 합니다. Defaults to 4.5.
+        type (str, optional): 색상 타입 "rgb" or "lab". Defaults to "rgb".
+    Returns:
+        _tuple_: new color2
+    """
+    initial_cont = get_contrast(color1, color2)
+    if initial_cont >= th:
+        return color2
+    lab1 = rgb_to_lab(color1)
+    lab2 = rgb_to_lab(color2)
+    plus_cont, minus_cont = initial_cont, initial_cont
+    plus_l, minus_l = lab2, lab2
+    max_iterations = 100
+    plus_iteration = 0
+    minus_iteration = 0
+    step = 3
+    if lab1[0] >= 127:
+        while minus_iteration < max_iterations:  # minus iteration
+            minus_l = (min(minus_l[0] - step, 255), minus_l[1], minus_l[2])
+            minus_cont = get_contrast(lab_to_rgb(lab1), lab_to_rgb(minus_l))
+            if minus_cont >= th:
+                return lab_to_rgb(minus_l)
+            minus_iteration += 1
+    else:
+        while plus_iteration < max_iterations:  # plus iteration
+            plus_l = (min(plus_l[0] + step, 255), plus_l[1], plus_l[2])
+            plus_cont = get_contrast(lab_to_rgb(lab1), lab_to_rgb(plus_l))
+            if plus_cont >= th:
+                return lab_to_rgb(plus_l)
+            plus_iteration += 1
+    return color2
+def get_readability(color, bg, contrast_thresh=2.5):
+    contrast = get_contrast(_to_tuple(color), bg)
+    below_thresh = contrast[contrast < contrast_thresh]
+    if below_thresh.size == 0:
+        return 21
+    else:
+        return below_thresh.mean()
+def _blend_two_colors(color1, color2, ratio=0.5):
+    blended = np.array(_to_tuple(_to_lab(color1))) * ratio + np.array(
+        _to_tuple(_to_lab(color2))
+    ) * (1 - ratio)
+    blended = _to_rgb(_to_str(_to_tuple(blended), color_space="lab"))
+    return blended
+def get_colorfulness(img):
+    try:
+        r, g, b = cv2.split(img.astype("float"))
+        rg = np.absolute(r - g)
+        yb = np.absolute((r + g) / 2 - b)
+        rg_mean, rg_std = np.mean(rg), np.std(rg)
+        yb_mean, yb_std = np.mean(yb), np.std(yb)
+        std_root = np.sqrt((rg_std**2) + (yb_std**2))
+        mean_root = np.sqrt((rg_mean**2) + (yb_mean**2))
+        colorfulness = std_root + (0.3 * mean_root)
+    except ValueError:
+        colorfulness = 0
+    return colorfulness
+def get_colorfulness_by_extracting_colors(img, limit=20):
+    colors = _extract_colors(img=img, tolerance=10, limit=limit)
+    pers = [i["percentage"] for i in colors]
+    colorfulness = (np.array(pers).cumsum() < 0.98).sum()
+    return colorfulness
+def _colors_to_pseudo_image(colors):
+    pseudo_img = np.array(colors, dtype="uint8")[None, ...]
+    return pseudo_img
+def _pick_most_colors(colors, tolerance):
+    pseudo_img = _colors_to_pseudo_image(colors)
+    most_colors = extcolors.extract_from_image(
+        img=_to_pil(pseudo_img), tolerance=tolerance, limit=len(colors)
+    )[0]
+    most_colors = [i[0] for i in most_colors]
+    return most_colors
+def _get_euclidean_distance(color1, color2):
+    return deltaE_cie76(
+        np.array(convcolors.rgb_to_lab(color1))[None, None, ...],
+        np.array(convcolors.rgb_to_lab(color2))[None, None, ...],
+    )[0][0]
+def is_similar_black_or_gray(color) -> str:
+    # color = (R,G,B)
+    black_distance = _get_euclidean_distance(color, (0, 0, 0))
+    gray_distance = _get_euclidean_distance(color, (128, 128, 128))
+    if black_distance < gray_distance:
+        return "black"
+    else:
+        return "gray"
+def is_similar_white_or_gray(color) -> str:
+    # color = (R,G,B)
+    gray_distance = _get_euclidean_distance(color, (128, 128, 128))
+    white_distance = _get_euclidean_distance(color, (255, 255, 255))
+    if white_distance < gray_distance:
+        return "white"
+    else:
+        return "gray"
+def is_similar_white_or_black(color) -> str:
+    # color = (R,G,B)
+    black_distance = _get_euclidean_distance(color, (0, 0, 0))
+    white_distance = _get_euclidean_distance(color, (255, 255, 255))
+    if white_distance < black_distance:
+        return "white"
+    else:
+        return "black"
+def view_hist(img):
+    color = ("r", "g", "b")
+    for i, col in enumerate(color):
+        hist = cv2.calcHist([img], [i], None, [256], [0, 256])
+        plt.plot(hist, color=col)
+    plt.savefig("calc_hist.png")
+def normalize_image(img):
+    img_norm = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
+    return img_norm
+def equalize_hist(img):
+    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    # hist = cv2.calcHist([gray_img],[0],None,[256],[0,256])
+    # ycrcb_img = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
+    # ycrcb_img[:, :, 0] = cv2.equalizeHist(ycrcb_img[:, :, 0])
+    # equalized_img = cv2.cvtColor(ycrcb_img, cv2.COLOR_YCrCb2RGB)
+    # make contras limiting adaptive histogram equalization
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    equalized_img = clahe.apply(gray_img)
+    return equalized_img
+def is_gray(color, threshold=30):
+    r, g, b = map(int, color)  # color type is np.uint8. cast int to prevent overflow
+    if abs(r - g) < threshold and abs(r - b) < threshold and abs(g - b) < threshold:
+        return True
+    return False
+def merge_similar_colors(colors, tolerance=10):
+    most_colors = _pick_most_colors(colors, tolerance=tolerance)
+    new_colors = list()
+    for color in colors:
+        minim = math.inf
+        for most_color in most_colors:
+            dist = _get_euclidean_distance(color, most_color)
+            if dist < minim:
+                picked = most_color
+                minim = dist
+        new_colors.append(picked)
+    return new_colors
+def merge_colors(colors, tolerance=10):
+    temp = [eval(i[3:]) for i in colors]
+    # print(len(set(temp)))
+    pseudo_img = np.array([temp], dtype="uint8")
+    # _to_pil(pseudo_img).show()
+    extracted_colors = _extract_colors(
+        pseudo_img,
+        mask=None,
+        invert=False,
+        tolerance=tolerance,
+        limit=len(colors) // 2,
+    )
+    # print(len(extracted_colors))
+    new_colors = list()
+    for i in temp:
+        min_dist = math.inf
+        for c in extracted_colors:
+            dist = _get_euclidean_distance(c["rgb"], i)
+            if dist < min_dist:
+                min_dist = dist
+                best = c["rgb"]
+        new_colors.append(best)
+    return [_to_str(i, color_space="rgb") for i in new_colors]
+def get_most_color(img, mask=None, min_count=10, get_full=False):
+    # img=(H,W,3) (0~255), mask=(H,W,3) (0 or 255)
+    if mask is None:
+        img_pixels = img.reshape(-1, 3)
+    else:
+        img_pixels = img[_to_2d(mask) == 255]
+    colors, colors_counts = np.unique(img_pixels, axis=0, return_counts=True)
+    if colors_counts.max() <= min_count:
+        most_color = tuple(
+            (
+                (colors_counts[:, np.newaxis] * colors).sum(axis=0)
+                / colors_counts.sum()
+            ).astype(np.uint8)
+        )
+    else:
+        most_color = tuple(colors[np.argmax(colors_counts)])
+    if get_full:
+        return colors, colors_counts
+    return most_color, colors_counts.max()  # (R,G,B), color count
+if __name__ == "__main__":
+    img = load_image(
+        "/Users/jongbeomkim/Desktop/Screen Shot 2023-11-07 at 10.41.23 AM.png"
+    )
+    contrast = get_contrast("rgb(10, 100, 100)", img)
+    below_thresh = contrast[contrast < 0]
+    if below_thresh.size == 0:
+        21
+    else:
+        below_thresh.mean()
+    colors = [
+        "rgb(0, 0, 0)",
+        "rgb(95, 95, 95)",
+        "rgb(184, 137, 91)",
+        "rgb(0, 0, 0)",
+        "rgb(0, 0, 0)",
+        "rgb(93, 93, 93)",
+        "rgb(182, 142, 93)",
+        "rgb(0, 0, 0)",
+        "rgb(0, 0, 0)",
+        "rgb(99, 99, 99)",
+        "rgb(0, 0, 0)",
+        "rgb(0, 0, 0)",
+        "rgb(90, 90, 90)",
+        "rgb(184, 141, 90)",
+        "rgb(0, 0, 0)",
+        "rgb(0, 0, 0)",
+        "rgb(93, 93, 93)",
+        "rgb(14, 14, 14)",
+        "rgb(17, 17, 17)",
+        "rgb(101, 101, 101)",
+        "rgb(97, 97, 97)",
+        "rgb(193, 193, 193)",
+        "rgb(122, 122, 122)",
+        "rgb(122, 122, 122)",
+        "rgb(0, 0, 0)",
+        "rgb(118, 118, 118)",
+    ]
+    new_colors = merge_colors(colors, tolerance=30)
+    new_colors

data_utils/conf.py ADDED Viewed

	@@ -0,0 +1,170 @@

+from dataclasses import dataclass
+from typing import Optional
+from dataclasses_json import dataclass_json
+import json
+from pathlib import Path
+@dataclass
+class Slack:
+    url: str
+    channel: str
+    username: str
+    icon_emoji: str
+    channel_id: Optional[str] = None
+    bot_token: Optional[str] = None
+class Config(object):
+    def __init__(self, env="dev"):
+        if env == "dev":
+            config_path = Path(__file__).parent.parent / "config/config.dev.json"
+        else:
+            config_path = Path(__file__).parent.parent / "config/config.prod.json"
+        config = self._read_config(config_path)
+        self.awss3_bucket, self.awss3_path = self.__parse_awss3_storage(
+            config.get("storage", {}).get("awss3", {})
+        )
+        (
+            self.requested_img_path,
+            self.text_removed_img_path,
+        ) = self.__parse_local_storage(config.get("storage", {}).get("local", {}))
+        self.mq_url = self._parse_amqp_server(config["mq_server"])
+        (
+            self.req_queue,
+            self.req_pattern,
+            self.resp_queue,
+            self.success_resp_pattern,
+            self.failure_resp_pattern,
+        ) = self._parse_queue(config["queue"])
+        self.slack = self._parse_slack(config["alarm"]["slack"])
+    def _read_config(self, config_path) -> dict:
+        with open(config_path, mode="r") as f:
+            config = json.load(f)
+        return config
+    def _parse_amqp_server(self, amqp_server) -> str:
+        username = amqp_server["username"]
+        password = amqp_server["password"]
+        url = amqp_server["url"]
+        port = amqp_server["port"]
+        amqp_url = f"amqps://{username}:{password}@{url}:{port}"
+        return amqp_url
+    def _parse_queue(self, queue) -> tuple:
+        req_queue = queue["request_name"]
+        req_pattern = queue["request_pattern"]
+        resp_queue = queue.get("response_name")
+        success_resp_pattern = queue["success_response_pattern"]
+        failure_resp_pattern = queue["failure_response_pattern"]
+        return (
+            req_queue,
+            req_pattern,
+            resp_queue,
+            success_resp_pattern,
+            failure_resp_pattern,
+        )
+    def __parse_awss3_storage(self, awss3_storage) -> tuple:
+        awss3_bucket = awss3_storage.get("default_bucket")
+        awss3_path = awss3_storage.get("default_path")
+        return awss3_bucket, awss3_path
+    def __parse_local_storage(self, local_storage) -> tuple:
+        requested_img_path = local_storage.get("requested")
+        text_removed_img_path = local_storage.get("text_removed")
+        return requested_img_path, text_removed_img_path
+    def _parse_slack(self, slack) -> Slack:
+        url = slack["url"]
+        channel = slack["channel"]
+        username = slack["username"]
+        icon_emoji = slack["icon_emoji"]
+        channel_id = slack.get("channel_id", None)
+        bot_token = slack.get("bot_token", None)
+        return Slack(url, channel, username, icon_emoji, channel_id, bot_token)
+class ImageTrConfig(object):
+    def __init__(self, env="dev"):
+        if env == "dev":
+            config_path = Path(__file__).parent.parent / "config/config.dev.json"
+        else:
+            config_path = Path(__file__).parent.parent / "config/config.prod.json"
+        config = self._read_config(config_path)
+        (
+            self.awss3_bucket,
+            self.awss3_inpainting_path,
+            self.awss3_translation_path,
+        ) = self.__parse_awss3_storage(config.get("storage", {}).get("awss3", {}))
+        self.mq_url = self._parse_amqp_server(config["mq_server"])
+        (
+            self.req_queue,
+            self.req_pattern,
+            self.resp_queue,
+            self.success_resp_pattern,
+            self.failure_resp_pattern,
+        ) = self._parse_queue(config["queue"])
+        self.slack = self._parse_slack(config["alarm"]["slack"])
+    def _read_config(self, config_path) -> dict:
+        with open(config_path, mode="r") as f:
+            config = json.load(f)
+        return config
+    def _parse_amqp_server(self, amqp_server) -> str:
+        username = amqp_server["username"]
+        password = amqp_server["password"]
+        url = amqp_server["url"]
+        port = amqp_server["port"]
+        amqp_url = f"amqps://{username}:{password}@{url}:{port}"
+        return amqp_url
+    def _parse_queue(self, queue) -> tuple:
+        req_queue = queue["request_name"]
+        req_pattern = queue["request_pattern"]
+        resp_queue = queue.get("response_name")
+        success_resp_pattern = queue["success_response_pattern"]
+        failure_resp_pattern = queue["failure_response_pattern"]
+        return (
+            req_queue,
+            req_pattern,
+            resp_queue,
+            success_resp_pattern,
+            failure_resp_pattern,
+        )
+    def __parse_awss3_storage(self, awss3_storage) -> tuple:
+        awss3_bucket = awss3_storage.get("default_bucket")
+        awss3_inpainting_path = awss3_storage.get("inpainting_path")
+        awss3_translation_path = awss3_storage.get("translation_path")
+        return awss3_bucket, awss3_inpainting_path, awss3_translation_path
+    def __parse_local_storage(self, local_storage) -> tuple:
+        requested_img_path = local_storage.get("requested")
+        text_removed_img_path = local_storage.get("text_removed")
+        return requested_img_path, text_removed_img_path
+    def _parse_slack(self, slack) -> Slack:
+        url = slack["url"]
+        channel = slack["channel"]
+        username = slack["username"]
+        icon_emoji = slack["icon_emoji"]
+        channel_id = slack.get("channel_id", None)
+        bot_token = slack.get("bot_token", None)
+        return Slack(url, channel, username, icon_emoji, channel_id, bot_token)

data_utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,1364 @@

+# References
+# https://sashamaps.net/docs/resources/20-colors/
+import numpy as np
+import cv2
+from scipy import ndimage as ndi
+from PIL import Image, ImageDraw, ImageCms, ExifTags, ImageEnhance
+import requests
+from pathlib import Path
+import pandas as pd
+from scipy.sparse import coo_matrix
+from skimage.feature import peak_local_max
+from skimage.morphology import local_maxima
+from skimage.segmentation import watershed
+from moviepy.video.io.bindings import mplfig_to_npimage
+import io
+import os
+from enum import Enum
+COLORS = (
+    (230, 25, 75),
+    (60, 180, 75),
+    (255, 255, 25),
+    (0, 130, 200),
+    (245, 130, 48),
+    (145, 30, 180),
+    (70, 240, 250),
+    (240, 50, 230),
+    (210, 255, 60),
+    (250, 190, 212),
+    (0, 128, 128),
+    (220, 190, 255),
+    (170, 110, 40),
+    (255, 250, 200),
+    (128, 0, 0),
+    (170, 255, 195),
+    (128, 128, 0),
+    (255, 215, 180),
+    (0, 0, 128),
+    (128, 128, 128),
+)
+class PC_TYPE(Enum):
+    HARRIS = 1
+    EDGES_CONTOURS = 2
+    GFTT = 3
+    FAST = 4
+    KAZE = 5
+def _to_2d(img):
+    # it use just first channel. if you want rgb2gray, use _to_grayscale
+    if img.ndim == 3:
+        return img[:, :, 0]
+    else:
+        return img
+def _to_3d(img):
+    if img.ndim == 2:
+        return np.dstack([img, img, img])
+    else:
+        return img
+def _to_byte(img: Image, format) -> bytes:
+    # BytesIO is a file-like buffer stored in memory
+    imgByteArr = io.BytesIO()
+    # image.save expects a file-like as a argument
+    img.save(imgByteArr, format=format)
+    # Turn the BytesIO object back into a bytes object
+    imgByteArr = imgByteArr.getvalue()
+    return imgByteArr
+def _get_width_and_height(img):
+    if img.ndim == 2:
+        h, w = img.shape
+    else:
+        h, w, _ = img.shape
+    return w, h
+def _get_resolution(img):
+    w, h = _get_width_and_height(img)
+    res = w * h
+    return res
+def _to_pil(img):
+    if not isinstance(img, Image.Image):
+        img = Image.fromarray(img, mode="RGB")
+    return img
+def _to_array(img):
+    img = np.array(img)
+    return img
+def _bool_to_uint8(img):
+    uint8 = img.astype("uint8")
+    if (
+        np.array_equal(np.unique(uint8), np.array([0, 1]))
+        or np.array_equal(np.unique(uint8), np.array([0]))
+        or np.array_equal(np.unique(uint8), np.array([1]))
+    ):
+        return uint8 * 255
+    else:
+        return uint8
+def _figure_to_array(fig):
+    arr = mplfig_to_npimage(fig)
+    return arr
+def _preprocess_image(img):
+    if img.dtype == "int32":
+        img = _repaint_segmentation_map(img)
+    if img.dtype == "bool":
+        img = img.astype("uint8") * 255
+    if img.ndim == 2:
+        if (
+            np.array_equal(np.unique(img), np.array([0, 255]))
+            or np.array_equal(np.unique(img), np.array([0]))
+            or np.array_equal(np.unique(img), np.array([255]))
+        ):
+            img = _to_3d(img)
+        else:
+            img = _apply_jet_colormap(img)
+    return img
+def _blend_two_images(img1, img2, alpha=0.5):
+    img1 = _to_pil(img1)
+    img2 = _to_pil(img2)
+    img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha)
+    return _to_array(img_blended)
+def _repaint_segmentation_map(seg_map):
+    canvas_r = _get_canvas_same_size_as_image(seg_map, black=True)
+    canvas_g = _get_canvas_same_size_as_image(seg_map, black=True)
+    canvas_b = _get_canvas_same_size_as_image(seg_map, black=True)
+    remainder_map = seg_map % len(COLORS) + 1
+    for remainder, (r, g, b) in enumerate(COLORS, start=1):
+        canvas_r[remainder_map == remainder] = r
+        canvas_g[remainder_map == remainder] = g
+        canvas_b[remainder_map == remainder] = b
+    canvas_r[seg_map == 0] = 0
+    canvas_g[seg_map == 0] = 0
+    canvas_b[seg_map == 0] = 0
+    dstacked = np.dstack([canvas_r, canvas_g, canvas_b])
+    return dstacked
+def _get_canvas_same_size_as_image(img, black=False):
+    if black:
+        return np.zeros_like(img).astype("uint8")
+    else:
+        return (np.ones_like(img) * 255).astype("uint8")
+def _get_canvas(w, h, black=False):
+    if black:
+        return np.zeros((h, w, 3)).astype("uint8")
+    else:
+        return (np.ones((h, w, 3)) * 255).astype("uint8")
+def _invert_image(mask):
+    return cv2.bitwise_not(mask.astype("uint8"))
+def _to_grayscale(img):
+    gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY)
+    return gray_img
+def _erode_mask(mask, kernel_size=3):
+    kernel = cv2.getStructuringElement(
+        shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size)
+    )
+    if mask.dtype == "bool":
+        mask = mask.astype("uint8") * 255
+    mask = cv2.erode(src=mask, kernel=kernel)
+    return mask
+def _dilate_mask(mask, kernel_size=3):
+    if kernel_size == 0:
+        return mask
+    kernel = cv2.getStructuringElement(
+        shape=cv2.MORPH_RECT, ksize=(kernel_size, kernel_size)
+    )
+    if mask.dtype == "bool":
+        mask = mask.astype("uint8") * 255
+    mask = cv2.dilate(src=mask, kernel=kernel)
+    return mask
+def _gaussian_blur_mask(mask, kernel_size=5):
+    blurred_mask = cv2.GaussianBlur(
+        src=mask, ksize=(kernel_size, kernel_size), sigmaX=0
+    )
+    # mask = (blurred_mask >= 32).astype("uint8") * 255
+    mask = (blurred_mask != 0).astype("uint8") * 255
+    return mask
+def _blur(img, v=0.04):
+    w, h = _get_width_and_height(img)
+    kernel_size = round(min(w, h) * v)
+    bl = cv2.GaussianBlur(
+        src=img.copy(order="C"),
+        ksize=(kernel_size // 2 * 2 + 1, kernel_size // 2 * 2 + 1),
+        sigmaX=0,
+    )
+    return bl
+def _get_adaptive_thresholded_image(img, invert=False, block_size=3):
+    gray_img = cv2.cvtColor(src=img, code=cv2.COLOR_RGB2GRAY)
+    thrsh_type = cv2.THRESH_BINARY if not invert else cv2.THRESH_BINARY_INV
+    img_thr = cv2.adaptiveThreshold(
+        src=gray_img,
+        maxValue=255,
+        adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
+        thresholdType=thrsh_type,
+        blockSize=block_size,
+        C=0,
+    )
+    return img_thr
+def _make_segmentation_map_rectangle(seg_map):
+    seg_map_copied = seg_map.copy(order="C")
+    for idx in range(1, np.max(seg_map_copied) + 1):
+        seg_map_sub = seg_map_copied == idx
+        nonzero_x = np.where((seg_map_sub != 0).any(axis=0))[0]
+        nonzero_y = np.where((seg_map_sub != 0).any(axis=1))[0]
+        if nonzero_x.size != 0 and nonzero_y.size != 0:
+            seg_map_copied[
+                nonzero_y[0] : nonzero_y[-1], nonzero_x[0] : nonzero_x[-1]
+            ] = idx
+    return seg_map_copied
+def _apply_jet_colormap(img):
+    img_jet = cv2.applyColorMap(src=(255 - img), colormap=cv2.COLORMAP_JET)
+    return img_jet
+def _reverse_jet_colormap(img):
+    gray_values = np.arange(256, dtype=np.uint8)
+    color_values = list(map(tuple, _apply_jet_colormap(gray_values).reshape(256, 3)))
+    color_to_gray_map = dict(zip(color_values, gray_values))
+    out = np.apply_along_axis(
+        lambda bgr: color_to_gray_map[tuple(bgr)], axis=2, arr=img
+    )
+    return out
+def _get_pixel_counts(arr, sort=False, include_zero=False):
+    unique, cnts = np.unique(arr, return_counts=True)
+    idx2cnt = dict(zip(unique, cnts))
+    if not include_zero:
+        if 0 in idx2cnt:
+            idx2cnt.pop(0)
+    if not sort:
+        return idx2cnt
+    else:
+        return dict(sorted(idx2cnt.items(), key=lambda x: x[1], reverse=True))
+def _combine_masks(masks):
+    canvas = _get_canvas_same_size_as_image(img=masks[0], black=True)
+    for mask in masks:
+        canvas = np.maximum(_to_3d(canvas), _to_3d(mask))
+    return canvas
+def _get_local_maxima_coordinates(region_score_map, region_seg_map=None, th=150):
+    # `src_lang="ja"`일 때 `150`이 더 잘 작동함.
+    if region_seg_map is None:
+        _, region_mask = cv2.threshold(
+            src=region_score_map, thresh=th, maxval=255, type=cv2.THRESH_BINARY
+        )
+        _, region_seg_map = cv2.connectedComponents(image=region_mask, connectivity=4)
+    local_max = peak_local_max(
+        image=region_score_map,
+        min_distance=5,
+        labels=region_seg_map,
+        num_peaks_per_label=24,
+    )
+    local_max = local_max[:, ::-1]  # yx to xy
+    return local_max
+def _get_local_maxima_array(region_score_map, region_seg_map=None, th=150):
+    local_max_coor = _get_local_maxima_coordinates(
+        region_score_map, region_seg_map=None, th=th
+    )
+    _, h = _get_width_and_height(local_max_coor)
+    vals = np.array([1] * h)
+    rows = local_max_coor[:, 1]
+    cols = local_max_coor[:, 0]
+    local_max = (
+        coo_matrix((vals, (rows, cols)), shape=region_score_map.shape)
+        .toarray()
+        .astype("bool")
+    )
+    return local_max
+def _mask_image(img, mask, invert=False):
+    """img에서 mask 영역에 해당하는 부분만 추출
+    Args:
+        img (_PIL or np.ndarray_): 이미지
+        mask (_PIL or np.ndarray_): 마스크 (H,W,C)일경우 흑백으로 변환 후 or (H,W)
+        invert (bool, optional): invert_mask로 추출할지.
+    Returns:
+        _np.ndarray_: 결과 이미지
+    """
+    img = _to_array(img)
+    mask = _to_2d(_to_array(mask))
+    if invert:
+        mask = _invert_image(mask)
+    return cv2.bitwise_and(src1=img, src2=img, mask=mask.astype("uint8"))
+def _ignore_small_regions_in_mask(mask, area_thresh=10):
+    mask = _to_2d(mask)
+    _, seg_map, stats, _ = cv2.connectedComponentsWithStats(
+        mask.astype("uint8"), connectivity=4
+    )
+    bool = np.isin(seg_map, np.where(stats[:, cv2.CC_STAT_AREA] >= area_thresh)[0][1:])
+    new_mask = bool.astype("uint8") * 255
+    new_mask = _to_3d(new_mask)
+    return new_mask
+def _crop_image(img, l, t, r, b):
+    w, h = _get_width_and_height(img)
+    return img[
+        int(max(0, t)) : int(min(h, b)),
+        int(max(0, l)) : int(min(w, r)),
+        ...,
+    ]
+def _bboxes_to_mask(img, bboxes):
+    canvas = _get_canvas_same_size_as_image(img=img, black=True)
+    for row in bboxes.itertuples():
+        canvas[row.bbox_y1 : row.bbox_y2, row.bbox_x1 : row.bbox_x2] = 255
+    return _to_3d(canvas)
+def _apply_watershed(mask, region_score_map, th=150):
+    local_max_arr = _get_local_maxima_array(region_score_map, th=th)
+    _, markers = cv2.connectedComponents(
+        image=local_max_arr.astype("uint8"), connectivity=4
+    )
+    seg_map = watershed(image=-region_score_map, markers=markers, mask=_to_2d(mask))
+    return seg_map
+def _perform_watershed(score_map, score_thresh=80):
+    trimmed_score_map = score_map.copy()
+    trimmed_score_map[trimmed_score_map < 190] = 0
+    markers = local_maxima(image=trimmed_score_map, allow_borders=False)
+    _, markers = cv2.connectedComponents(image=markers.astype("int8"), connectivity=8)
+    _, region_mask = cv2.threshold(
+        src=score_map, thresh=score_thresh, maxval=255, type=cv2.THRESH_BINARY
+    )
+    watersheded = watershed(image=-score_map, markers=markers, mask=_to_2d(region_mask))
+    return watersheded
+def _get_region_segmentation_map(region_score_map, region_thresh=30):
+    _, region_mask = cv2.threshold(
+        src=region_score_map, thresh=region_thresh, maxval=255, type=cv2.THRESH_BINARY
+    )
+    region_seg_map = _apply_watershed(
+        region_score_map=region_score_map, mask=region_mask
+    )
+    return region_seg_map
+def _combine_two_segmentation_maps(seg_map1, seg_map2):
+    seg_map = seg_map1 + _mask_image(
+        img=seg_map2 + len(np.unique(seg_map1)) - 1, mask=(seg_map2 != 0)
+    )
+    px_cnts = _get_pixel_counts(seg_map, sort=True, include_zero=True)
+    seg_map = _mask_image(img=seg_map, mask=(seg_map != list(px_cnts)[0]))
+    return seg_map
+def _get_image_segmentation_map(img, region_score_map=None, block_size=3):
+    if region_score_map is not None:
+        _, region_mask = cv2.threshold(
+            src=region_score_map, thresh=20, maxval=255, type=cv2.THRESH_BINARY
+        )
+        region_mask = _dilate_mask(img=region_mask, kernel_size=16)
+        img_masked = _mask_image(img=img, mask=region_mask)
+    else:
+        img_masked = img
+    img_thr1 = _get_adaptive_thresholded_image(
+        img=img_masked, invert=False, block_size=block_size
+    )
+    img_thr2 = _get_adaptive_thresholded_image(
+        img=img_masked, invert=True, block_size=block_size
+    )
+    _, seg_map1 = cv2.connectedComponents(image=img_thr1, connectivity=4)
+    _, seg_map2 = cv2.connectedComponents(image=img_thr2, connectivity=4)
+    seg_map = _combine_two_segmentation_maps(seg_map1=seg_map1, seg_map2=seg_map2)
+    return seg_map
+def _get_segmentation_map_overlapping_mask(seg_map, mask, overlap_thresh=0.6):
+    img_pixel_counts = _get_pixel_counts(seg_map, sort=True, include_zero=False)
+    overlapping_seg_map = _mask_image(img=seg_map, mask=(mask != 0))
+    overlapping_counts = _get_pixel_counts(
+        overlapping_seg_map, sort=False, include_zero=False
+    )
+    df_counts = pd.DataFrame.from_dict(
+        img_pixel_counts, orient="index", columns=["total_pixel_count"]
+    )
+    df_counts["overlap_pixel_count"] = df_counts.apply(
+        lambda x: overlapping_counts.get(x.name, 0), axis=1
+    )
+    df_counts["ratio"] = (
+        df_counts["overlap_pixel_count"] / df_counts["total_pixel_count"]
+    )
+    region_is_inside = df_counts[df_counts["ratio"] > overlap_thresh].index.tolist()
+    mask = np.isin(seg_map, region_is_inside).astype("uint8")
+    mask = _to_3d(mask * 255)
+    return mask
+def _split_segmentation_map(seg_map, pccs):
+    ls_idx = (
+        pccs[pccs["inside"]]
+        .apply(lambda x: seg_map[x["y"], x["x"]], axis=1)
+        .values.tolist()
+    )
+    seg_map1 = _mask_image(img=seg_map, mask=np.isin(seg_map, ls_idx))
+    seg_map2 = _mask_image(img=seg_map, mask=~np.isin(seg_map, ls_idx))
+    return seg_map1, seg_map2
+def _segmentation_map_to_mask(seg_map):
+    return _to_3d((seg_map != 0).astype("uint8") * 255)
+def _get_pseudo_character_centers_from_mask(mask, bboxes: pd.DataFrame = None):
+    """Mask 이미지로부터 label(글자)의 중심 좌표를 구하는 함수"""
+    center_coords = []
+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
+        image=_to_2d(mask), connectivity=8
+    )
+    for i in range(1, num_labels):
+        center_coords.append((int(centroids[i][0]), int(centroids[i][1])))
+    pccs = pd.DataFrame(
+        center_coords,
+        columns=[
+            "x",
+            "y",
+        ],
+    )
+    if not bboxes.empty:
+        # 벡터화 연산으로 bbox 안에 있는지 검사
+        pccs["inside"] = (
+            (pccs["x"].values[:, None] > bboxes["bbox_x1"].values) &
+            (pccs["x"].values[:, None] < bboxes["bbox_x2"].values) &
+            (pccs["y"].values[:, None] > bboxes["bbox_y1"].values) &
+            (pccs["y"].values[:, None] < bboxes["bbox_y2"].values)
+        ).any(axis=1)
+    else:
+        pccs["inside"] = True
+    return pccs
+def _get_pseudo_character_centers(
+    region_score_map, region_seg_map=None, bboxes=pd.DataFrame()
+):
+    local_max_coor = _get_local_maxima_coordinates(
+        region_score_map, region_seg_map=region_seg_map
+    )
+    pccs = pd.DataFrame(local_max_coor, columns=["x", "y"])
+    if not bboxes.empty:
+        # 벡터화 연산으로 bbox 안에 있는지 검사
+        pccs["inside"] = (
+            (pccs["x"].values[:, None] > bboxes["bbox_x1"].values) &
+            (pccs["x"].values[:, None] < bboxes["bbox_x2"].values) &
+            (pccs["y"].values[:, None] > bboxes["bbox_y1"].values) &
+            (pccs["y"].values[:, None] < bboxes["bbox_y2"].values)
+        ).any(axis=1)
+    else:
+        pccs["inside"] = True
+    return pccs
+def _convert_region_score_map_to_region_mask(region_score_map, region_score_thresh=170):
+    _, region_mask = cv2.threshold(
+        src=region_score_map, thresh=30, maxval=255, type=cv2.THRESH_BINARY
+    )
+    new_mask = _get_canvas_same_size_as_image(img=region_mask, black=True)
+    n_labels, seg_map, _, _ = cv2.connectedComponentsWithStats(
+        image=_to_2d(region_mask), connectivity=4
+    )
+    for k in range(1, n_labels):
+        if np.max(region_score_map[seg_map == k]) < region_score_thresh:
+            continue
+        new_mask[seg_map == k] = 255
+    new_mask = _to_3d(new_mask)
+    return new_mask
+def _split_mask(mask, region_score_map=None, bboxes=pd.DataFrame(), th=30):
+    """mask를 두 종류로 나눕니다. 각각 inpainting과정에서 지워야할 mask와 복구해야할 mask 영역을 의미합니다.
+       mask1과 mask2는 서로 겹칠수도 있습니다.
+       동작원리 : region_score_map(이 안주어질 경우 dst_mask_map)을 th로 이진화 및 segmap으로 변형(Connected components)후
+       label영역 별 Local maximum 포인트를 watershed의 marker로 여겨 watershed를 진행한 결과를 segmap으로 여기고,
+       pccs를 peak_loacl_max(skimage)함수로 region_scoremap과 segmap을 이용해 구한다. 이때 bbox정보도 포함시켜, 각 pccs가 box안에 들어 오는지 확인한 후
+       bbox안에 있는 pccs에 대해 각 pccs가 속한 segmap의 label영역(seg_map1)과 속하지 못한 label 영역(seg_map2)로 나눈다.
+    Args:
+        mask (_np.ndarray_): (H,W,3)의 mask. values : (0 or 255)
+        region_score_map (_np.ndarray_): region_score_map, craft의 결과. 글의 중심을 강조하는 Heat map
+        bboxes (_pd.DataFrame_): 박스 좌표정보(bbox_x1,bbox_y1,bbox_x2,bbox_y2)가 포함된 dataFrame.
+    Returns:
+        _np.ndarray_: 지워야 하는 부분인 mask1. 복구해야 하는 부분인 mask2.
+    """
+    if region_score_map is None:
+        dst_mask_map = _to_2d(get_dst_mask(mask))
+        seg_map = _apply_watershed(mask=mask, region_score_map=dst_mask_map, th=th)
+        pccs = _get_pseudo_character_centers(
+            region_score_map=dst_mask_map, region_seg_map=seg_map, bboxes=bboxes
+        )
+    else:
+        seg_map = _apply_watershed(mask, region_score_map, th=th)
+        pccs = _get_pseudo_character_centers(
+            region_score_map=region_score_map, region_seg_map=seg_map, bboxes=bboxes
+        )
+    box_mask = _bboxes_to_mask(seg_map, bboxes)
+    seg_map1, seg_map2 = _split_segmentation_map(seg_map=seg_map, pccs=pccs)
+    mask1 = _segmentation_map_to_mask(seg_map1)
+    mask2 = _segmentation_map_to_mask(seg_map2)
+    mask3 = _to_3d(_mask_image(mask1, box_mask, invert=True))
+    mask2 = _combine_masks([mask2, mask3])
+    return mask1, mask2
+def get_word_segmentation_map(region_score_map, affinity_score_map):
+    _, region_mask = cv2.threshold(
+        src=region_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY
+    )
+    _, affinity_mask = cv2.threshold(
+        src=affinity_score_map, thresh=70, maxval=255, type=cv2.THRESH_BINARY
+    )
+    word_mask = region_mask + affinity_mask
+    _, segmentation_map_word = cv2.connectedComponents(image=word_mask, connectivity=4)
+    return segmentation_map_word
+def get_line_segmentation_map(line_score_map):
+    _, line_mask = cv2.threshold(
+        src=line_score_map, thresh=130, maxval=255, type=cv2.THRESH_BINARY
+    )
+    _, line_segmentation_map = cv2.connectedComponents(image=line_mask, connectivity=4)
+    return line_segmentation_map
+def _get_3d_block_segmentation_map(img, bboxes):
+    segmentation_map_block = np.zeros(
+        shape=(img.shape[0], img.shape[1], len(bboxes) + 1)
+    )
+    for idx, (xmin, ymin, xmax, ymax) in enumerate(
+        bboxes[["xmin", "ymin", "xmax", "ymax"]].values, start=1
+    ):
+        segmentation_map_block[ymin:ymax, xmin:xmax, idx] = 255
+    return segmentation_map_block
+def compare_images(img1, img2, flag=cv2.CMP_EQ):
+    # 두 이미지가 같은 영역을 255 아닌 영역을 0. flag는 cv2.CMP_XX참고(EQ==같으면1,NE==다르면1)
+    return cv2.compare(img1, img2, flag)
+def convert_webp_png_get_data(img: np.ndarray):
+    pil_img = _to_pil(img)
+    convert_pil_img = pil_img.convert("RGB")
+    convert_pil_img.save("temp.png")
+    _, byte, format = load_image("temp.png", with_byte=True, with_format=True)
+    os.remove("temp.png")
+    return byte
+def add_water_mark(original_img, water_mark_img_path):
+    if isinstance(original_img, np.ndarray):
+        original_img = _to_pil(original_img)
+        return_np = True
+    else:
+        return_np = False
+    watermark = Image.open(water_mark_img_path).convert("RGBA")
+    width_o, height_o = original_img.size
+    width_wm, height_wm = watermark.size
+    position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2)
+    # 원본 이미지보다 크기가 작은 경우에만 워터마크 이미지를 비율에 맞게 조정
+    if width_wm > width_o or height_wm > height_o:
+        # 워터마크 이미지의 가로 세로 비율 계산
+        ratio_w = width_o / width_wm
+        ratio_h = height_o / height_wm
+        # 더 작은 비율을 선택하여 워터마크 이미지를 조정
+        ratio = min(ratio_w, ratio_h)
+        new_width = int(width_wm * ratio)
+        new_height = int(height_wm * ratio)
+        watermark = watermark.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        width_wm, height_wm = watermark.size
+        # 새로 계산된 위치
+        position = ((width_o - width_wm) // 2, (height_o - height_wm) // 2)
+    original_img.paste(watermark, position, watermark)
+    rgb_image = original_img.convert("RGB")
+    if return_np:
+        return _to_array(rgb_image)
+    return rgb_image
+def load_image(url_or_path, with_byte=False, with_format=False):
+    if "http" in url_or_path:
+        url_or_path = str(url_or_path)
+        response = requests.get(url_or_path)
+        PIL_image = Image.open(io.BytesIO(response.content))
+        format = PIL_image.format
+        image_bytes = response.content
+        if format == "GIF":
+            img_exif = None
+        else:
+            img_exif = PIL_image._getexif()
+        if PIL_image.mode in ["L", "P", "PA", "RGBA"]:
+            PIL_image = Image.open(io.BytesIO(response.content)).convert("RGB")
+        if img_exif:
+            for k in img_exif.keys():
+                attr = ExifTags.TAGS.get(k, "no_key")
+                if attr != "no_key":
+                    if ExifTags.TAGS[k] == "Orientation":
+                        if img_exif[k] == 3:
+                            PIL_image = PIL_image.rotate(180, expand=True)
+                        elif img_exif[k] == 6:
+                            PIL_image = PIL_image.rotate(270, expand=True)
+                        elif img_exif[k] == 8:
+                            PIL_image = PIL_image.rotate(90, expand=True)
+                        break
+        if PIL_image.mode == "CMYK":
+            cmyk_profile = ImageCms.ImageCmsProfile("resources/USWebCoatedSWOP.icc")
+            srgb_profile = ImageCms.ImageCmsProfile(
+                "resources/sRGB Color Space Profile.icm"
+            )
+            PIL_image = ImageCms.profileToProfile(
+                PIL_image, cmyk_profile, srgb_profile, outputMode="RGB"
+            )
+            img = np.array(PIL_image)
+        else:
+            img = np.array(PIL_image)
+    else:
+        # img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR)
+        # img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB)
+        PIL_image = Image.open(url_or_path)
+        format = PIL_image.format
+        byte_arr = io.BytesIO()
+        if PIL_image.mode == "RGBA":
+            PIL_image = PIL_image.convert("RGB")
+        PIL_image.save(byte_arr, format="JPEG")
+        image_bytes = byte_arr.getvalue()
+        img = np.array(PIL_image)
+    # if "http" in url_or_path:
+    #     img = cv2.imdecode(
+    #         np.asarray(bytearray(requests.get(url_or_path).content), dtype="uint8"), flags=cv2.IMREAD_COLOR
+    #     )
+    # else:
+    #     img = cv2.imread(url_or_path, flags=cv2.IMREAD_COLOR)
+    # img = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2RGB)
+    if with_byte:
+        if with_format:
+            return img, image_bytes, format
+        else:
+            return img, image_bytes
+    return img
+def save_image(img1, img2=None, alpha=0.5, path="") -> None:
+    copied_img1 = _preprocess_image(_to_array(img1.copy(order="C")))
+    if img2 is None:
+        img_arr = copied_img1
+    else:
+        copied_img2 = _to_array(_preprocess_image(_to_array(img2.copy(order="C"))))
+        img_arr = _to_array(
+            _blend_two_images(img1=copied_img1, img2=copied_img2, alpha=alpha)
+        )
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if os.path.splitext(str(path))[1] == ".gif":
+        pil = _to_pil(img1)
+        pil.save(str(path))
+        return True
+    if img_arr.ndim == 3:
+        cv2.imwrite(
+            filename=str(path),
+            img=img_arr[:, :, ::-1],
+            params=[cv2.IMWRITE_JPEG_QUALITY, 100],
+        )
+    elif img_arr.ndim == 2:
+        cv2.imwrite(
+            filename=str(path), img=img_arr, params=[cv2.IMWRITE_JPEG_QUALITY, 100]
+        )
+def show_image(img1, img2=None, alpha=0.5):
+    img1 = _to_pil(_preprocess_image(_to_array(img1)))
+    if img2 is None:
+        img1.show()
+    else:
+        img2 = _to_pil(_preprocess_image(_to_array(img2)))
+        img_blended = Image.blend(im1=img1, im2=img2, alpha=alpha)
+        img_blended.show()
+def draw_bboxes(img, bboxes: pd.DataFrame, index=False):
+    """속성추출전 원본 이미지와 bboxes정보를 가지고 이미지위에 bboxes를 시각화 해주는 함수."""
+    canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
+    draw = ImageDraw.Draw(canvas)
+    dic = dict()
+    for row in bboxes.itertuples():
+        h = row.bbox_y2 - row.bbox_y1
+        w = row.bbox_x2 - row.bbox_x1
+        smaller = min(w, h)
+        thickness = max(1, smaller // 22)
+        dic[row.Index] = ((0, 255, 0), (0, 100, 0), thickness)
+    for row in bboxes.itertuples():
+        _, fill, thickness = dic[row.Index]
+        draw.rectangle(
+            xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
+            outline=None,
+            fill=fill,
+            width=thickness,
+        )
+    for row in bboxes.itertuples():
+        outline, _, thickness = dic[row.Index]
+        draw.rectangle(
+            xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
+            outline=outline,
+            fill=None,
+            width=thickness,
+        )
+    if index:
+        from data_utils.rendering_utils import _get_font
+        max_len = max(map(len, map(str, bboxes.index)))
+        for row in bboxes.itertuples():
+            h = row.bbox_y2 - row.bbox_y1
+            w = row.bbox_x2 - row.bbox_x1
+            smaller = min(w, h)
+            font_size = max(10, min(40, smaller // 4))
+            draw.text(
+                xy=(row.bbox_x1, row.bbox_y1 - 4),
+                text=str(row.Index).zfill(max_len),
+                fill="white",
+                stroke_fill="black",
+                stroke_width=2,
+                font=_get_font(lang="en", font_size=font_size),
+                anchor="ls",
+            )
+    return _blend_two_images(img1=canvas, img2=img, alpha=0.4)
+def visualize_clusters(img, bboxes, index=False):
+    from data_utils.rendering_utils import _get_font
+    canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
+    draw = ImageDraw.Draw(canvas)
+    dic = dict()
+    for row in bboxes.itertuples():
+        h = row.bbox_y2 - row.bbox_y1
+        w = row.bbox_x2 - row.bbox_x1
+        smaller = min(w, h)
+        thickness = max(1, smaller // 22)
+        dic[row.Index] = ((255, 255, 255), COLORS[row.cluster], thickness)
+    for row in bboxes.itertuples():
+        _, fill, thickness = dic[row.Index]
+        draw.rectangle(
+            xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
+            outline=None,
+            fill=fill,
+            width=1,
+        )
+    for row in bboxes.itertuples():
+        outline, _, thickness = dic[row.Index]
+        draw.rectangle(
+            xy=(row.bbox_x1, row.bbox_y1, row.bbox_x2, row.bbox_y2),
+            outline=outline,
+            fill=None,
+            width=1,
+        )
+    if index:
+        for row in bboxes.itertuples():
+            h = row.bbox_y2 - row.bbox_y1
+            w = row.bbox_x2 - row.bbox_x1
+            smaller = min(w, h)
+            font_size = max(14, min(40, smaller * 0.35))
+            draw.text(
+                xy=(row.bbox_x1, row.bbox_y1 - 4),
+                text=str(row.cluster),
+                fill="white",
+                stroke_fill="black",
+                stroke_width=2,
+                font=_get_font(lang="en", font_size=font_size),
+                anchor="ls",
+            )
+    return _blend_two_images(img1=canvas, img2=img, alpha=0.25)
+def draw_bboxes_and_textboxes(bboxes, img):
+    canvas = img.copy(order="C")
+    for row in bboxes.itertuples():
+        cv2.rectangle(
+            img=canvas,
+            pt1=(row.bbox_x1, row.bbox_y1),
+            pt2=(row.bbox_x2, row.bbox_y2),
+            color=(0, 255, 0),
+            thickness=4,
+        )
+        cv2.rectangle(
+            img=canvas,
+            pt1=(row.tbox_x1, row.tbox_y1),
+            pt2=(row.tbox_x2, row.tbox_y2),
+            color=(255, 0, 0),
+            thickness=2,
+        )
+    return canvas
+def draw_pseudo_character_centers(img, pccs, margin=4):
+    canvas = _to_pil(_get_canvas_same_size_as_image(img=img, black=True))
+    draw = ImageDraw.Draw(canvas)
+    for row in pccs.itertuples():
+        draw.ellipse(
+            xy=(row.x - margin, row.y - margin, row.x + margin, row.y + margin),
+            outline=(255, 0, 0),
+            fill=(100, 0, 0),
+        )
+    return _blend_two_images(img1=canvas, img2=img, alpha=0.3)
+def _resize_image(img, w, h):
+    ori_w, ori_h = _get_width_and_height(img)
+    if w < ori_w or h < ori_h:
+        interpolation = cv2.INTER_AREA
+    else:
+        interpolation = cv2.INTER_LANCZOS4
+    resized_img = cv2.resize(src=img, dsize=(w, h), interpolation=interpolation)
+    return resized_img
+def _resize_image_using_shorter_side(img, img_size=1530):
+    ori_w, ori_h = _get_width_and_height(img)
+    shorter = min(ori_w, ori_h)
+    if shorter <= img_size:
+        return img
+    if ori_w < ori_h:
+        resized_img = cv2.resize(
+            src=img,
+            dsize=(img_size, round(ori_h * (img_size / ori_w))),
+            interpolation=cv2.INTER_AREA,
+        )
+    else:
+        resized_img = cv2.resize(
+            src=img,
+            dsize=(round(ori_w * (img_size / ori_h)), img_size),
+            interpolation=cv2.INTER_AREA,
+        )
+    return resized_img
+def _resize_image_using_longer_side(img, img_size=2560):
+    ori_w, ori_h = _get_width_and_height(img)
+    longer = max(ori_w, ori_h)
+    if longer <= img_size:
+        return img
+    if ori_w < ori_h:
+        resized_img = cv2.resize(
+            src=img,
+            dsize=(round(ori_w * (img_size / ori_h)), img_size),
+            interpolation=cv2.INTER_AREA,
+        )
+    else:
+        resized_img = cv2.resize(
+            src=img,
+            dsize=(img_size, round(ori_h * (img_size / ori_w))),
+            interpolation=cv2.INTER_AREA,
+        )
+    return resized_img
+def _split_image_3(img, print=False):
+    if img.ndim == 2:
+        is_2d = True
+    else:
+        is_2d = False
+    img = _to_3d(img)
+    w, h = _get_width_and_height(img)
+    if h >= w:
+        if print:
+            print(f"Resolution: {w}, {h} -> {w}, {h // 2}")
+        img1 = img[: h // 2, :, :]
+        img2 = img[h // 4 : h // 4 + h // 2, :, :]
+        img3 = img[-h // 2 :, :, :]
+    else:
+        if print:
+            print(f"Resolution: {w}, {h} -> {w // 2}, {h}")
+        img1 = img[:, : w // 2, :]
+        img2 = img[:, w // 2 // 2 : w // 2 // 2 + w // 2, :]
+        img3 = img[:, -w // 2 :, :]
+    if is_2d:
+        img1 = _to_2d(img1)
+        img2 = _to_2d(img2)
+        img3 = _to_2d(img3)
+    return img1, img2, img3
+def _split_image_2(img, print=False):
+    if img.ndim == 2:
+        is_2d = True
+    else:
+        is_2d = False
+    img = _to_3d(img)
+    w, h = _get_width_and_height(img)
+    if h >= w:
+        if print:
+            print(f"Resolution: {w}, {h} -> {w}, {h // 2}")
+        img1 = img[: h // 2, :, :]
+        img3 = img[-h // 2 :, :, :]
+    else:
+        if print:
+            print(f"Resolution: {w}, {h} -> {w // 2}, {h}")
+        img1 = img[:, : w // 2, :]
+        img3 = img[:, -w // 2 :, :]
+    if is_2d:
+        img1 = _to_2d(img1)
+        img3 = _to_2d(img3)
+    return img1, img3
+def _combine_images_3(img, img1, img2, img3):
+    if (img1 is None) and (img2 is None) and (img3 is None):
+        canvas = None
+    else:
+        img1 = _to_2d(img1)
+        img2 = _to_2d(img2)
+        img3 = _to_2d(img3)
+        canvas = _get_canvas_same_size_as_image(_to_2d(img), black=True)
+        w, h = _get_width_and_height(img)
+        if h >= w:
+            canvas[: h // 2, :] = img1
+            canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :] = np.maximum(
+                canvas[h // 2 // 2 : h // 2 // 2 + h // 2, :], img2
+            )
+            canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img3)
+        else:
+            canvas[:, : w // 2] = img1
+            canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2] = np.maximum(
+                canvas[:, w // 2 // 2 : w // 2 // 2 + w // 2], img2
+            )
+            canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img3)
+    return canvas
+def _combine_images_2(img, img1, img2):
+    if (img1 is None) and (img2 is None):
+        canvas = None
+    else:
+        canvas = _get_canvas_same_size_as_image(img, black=True)
+        w, h = _get_width_and_height(img)
+        if h >= w:
+            canvas[: h // 2, :] = img1
+            canvas[-h // 2 :, :] = np.maximum(canvas[-h // 2 :, :], img2)
+        else:
+            canvas[:, : w // 2] = img1
+            canvas[:, -w // 2 :] = np.maximum(canvas[:, -w // 2 :], img2)
+    return canvas
+def _rotate_90_degrees(img, counterclockwise=False):
+    return cv2.rotate(
+        src=img,
+        rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE
+        if counterclockwise
+        else cv2.ROTATE_90_CLOCKWISE,
+    )
+def save_image_patches(img, bboxes, dir):
+    for row in bboxes.itertuples():
+        patch = _crop_image(
+            img=img,
+            l=row.bbox_x1,
+            t=row.bbox_y1,
+            r=row.bbox_x2,
+            b=row.bbox_y2,
+        )
+        patch_w = row.bbox_x2 - row.bbox_x1
+        patch_h = row.bbox_y2 - row.bbox_y1
+        if patch_h > patch_w:
+            patch = _rotate_90_degrees(patch, counterclockwise=False)
+        save_image(img1=patch, path=Path(dir) / f"{str(row.Index).zfill(4)}.jpg")
+def get_minimum_area_bounding_rectangle(mask):
+    bool = _to_2d(mask.astype("uint8")) != 0
+    nonzero_x = np.where(bool.any(axis=0))[0]
+    nonzero_y = np.where(bool.any(axis=1))[0]
+    if len(nonzero_x) != 0 and len(nonzero_y) != 0:
+        bbox_x1 = nonzero_x[0]
+        bbox_x2 = nonzero_x[-1]
+        bbox_y1 = nonzero_y[0]
+        bbox_y2 = nonzero_y[-1]
+        return int(bbox_x1), int(bbox_y1), int(bbox_x2), int(bbox_y2)
+    else:
+        return 0, 0, 0, 0
+def get_minimum_area_bounding_rectangle2(mask, l, t, r, b):
+    bool = _to_2d(mask.astype("uint8")) != 0
+    nonzero_x = np.where(bool.any(axis=0))[0]
+    nonzero_y = np.where(bool.any(axis=1))[0]
+    try:
+        new_l = nonzero_x[np.where(l < nonzero_x)][0]
+    except Exception:
+        new_l = l
+    try:
+        new_t = nonzero_y[np.where(t < nonzero_y)][0]
+    except Exception:
+        new_t = t
+    try:
+        new_r = nonzero_x[np.where(nonzero_x < r)][-1]
+    except Exception:
+        new_r = r
+    try:
+        new_b = nonzero_y[np.where(nonzero_y < b)][-1]
+    except Exception:
+        new_b = b
+    return new_l, new_t, new_r, new_b
+def _downsample_image(img):
+    ori_w, ori_h = _get_width_and_height(img)
+    resized = _resize_image(img, w=ori_w // 2, h=ori_h // 2)
+    return resized
+def _upsample_image(img):
+    ori_w, ori_h = _get_width_and_height(img)
+    resized = _resize_image(img, w=ori_w * 2, h=ori_h * 2)
+    return resized
+def _get_pseudo_image(img, mask, invert=False):
+    if invert:
+        mask = _invert_image(mask)
+    rows, cols = np.nonzero(_to_2d(mask))
+    pseudo_outer = img[rows, cols, :].reshape((1, -1, 3))
+    return pseudo_outer
+def resize_coordinates_and_image_to_fit_to_maximum_pixel_counts(
+    bboxes, img, max_pixel_counts=1530
+):
+    w, h = _get_width_and_height(img)
+    ratio = min(max_pixel_counts / h, max_pixel_counts / w)
+    if ratio < 1:
+        for col in ["xmin", "ymin", "xmax", "ymax"]:
+            bboxes[col] = bboxes[col].apply(lambda x: int(x * ratio))
+        img = cv2.resize(
+            src=img,
+            dsize=(int(w * ratio), int(h * ratio)),
+            interpolation=cv2.INTER_LANCZOS4,
+        )
+    return bboxes, img
+def get_image_patches_3(img, text_stroke_mask, mask1, mask2):
+    splitting_mask = get_splitting_mask(text_stroke_mask)
+    _, _, stats, _ = cv2.connectedComponentsWithStats(
+        image=_to_2d(splitting_mask), connectivity=4
+    )
+    ls_patches = list()
+    for xmin, ymin, width, height, px_cnt in stats[1:, :]:
+        xmax = xmin + width
+        ymax = ymin + height
+        cropped_img = _crop_image(img=img, l=xmin, t=ymin, r=xmax, b=ymax)
+        cropped_mask1 = _crop_image(img=mask1, l=xmin, t=ymin, r=xmax, b=ymax)
+        cropped_mask2 = _crop_image(img=mask2, l=xmin, t=ymin, r=xmax, b=ymax)
+        ls_patches.append(
+            {
+                "xmin": xmin,
+                "ymin": ymin,
+                "xmax": xmax,
+                "ymax": ymax,
+                "img": cropped_img,
+                "mask1": cropped_mask1,
+                "mask2": cropped_mask2,
+            }
+        )
+    return ls_patches
+def get_image_patches_2(img, mask1, mask2):
+    splitting_mask = get_splitting_mask(mask1)
+    _, _, stats, _ = cv2.connectedComponentsWithStats(
+        image=_to_2d(splitting_mask), connectivity=4
+    )
+    ls_patches = list()
+    for x1, y1, w, h, _ in stats[1:, :]:
+        x2 = x1 + w
+        y2 = y1 + h
+        cropped_img = _crop_image(img=img, l=x1, t=y1, r=x2, b=y2)
+        cropped_mask1 = _crop_image(img=mask1, l=x1, t=y1, r=x2, b=y2)
+        cropped_mask2 = _crop_image(img=mask2, l=x1, t=y1, r=x2, b=y2)
+        ls_patches.append(
+            {
+                "x1": x1,
+                "y1": y1,
+                "x2": x2,
+                "y2": y2,
+                "img": cropped_img,
+                "mask1": cropped_mask1,
+                "mask2": cropped_mask2,
+            }
+        )
+    return ls_patches
+def get_splitting_mask(text_stroke_mask):
+    splitting_mask = _dilate_mask(text_stroke_mask, kernel_size=200)
+    return splitting_mask
+def enhance_sharpness(img):
+    """img의 선명도를 높임. 3가지 방법이 있음(sharpening filter, unsharpening mask, pil sharpening)
+    3 방법 중 PIL 이 가장 원본의 색변화가 적음
+    Args:
+        img (_np.ndarray_): 이미지
+    Returns:
+        _np.ndarray_: 결과 이미지
+    """
+    # sharpening_k = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
+    # hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    # sharpened_v = cv2.filter2D(hsv[..., 2], -1, sharpening_k)
+    # hsv[..., 2] = sharpened_v
+    # img_patch2 = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+    # src_ycrcb = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
+    # src_f = src_ycrcb[:, :, 0].astype(np.float32)
+    # blr = cv2.GaussianBlur(src_f, (0, 0), 2.0)
+    # src_ycrcb[:, :, 0] = np.clip(2. * src_f - blr, 0, 255).astype(np.uint8)
+    # img_patch3 = cv2.cvtColor(src_ycrcb, cv2.COLOR_YCrCb2RGB)
+    pil_img = _to_pil(img)
+    sharpness_img = ImageEnhance.Sharpness(pil_img).enhance(2)
+    result_img = _to_array(sharpness_img)
+    return result_img
+def mask2point(mask):
+    # mask (H,W,3) 0 or 255 -> (N,2)
+    mask = _to_2d(mask)
+    indices = np.argwhere(mask == 255)
+    return indices
+def get_corner(corner_coords):
+    # corner_coords (N,2) each point means (y,x)
+    cy, cx = np.mean(corner_coords, axis=0)
+    quadrant_1 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] >= cx)]
+    rt = quadrant_1[:, 1].max(), quadrant_1[:, 0].min()
+    quadrant_2 = corner_coords[(corner_coords[:, 0] < cy) & (corner_coords[:, 1] < cx)]
+    lt = quadrant_2[:, 1].min(), quadrant_2[:, 0].min()
+    quadrant_3 = corner_coords[(corner_coords[:, 0] >= cy) & (corner_coords[:, 1] < cx)]
+    lb = quadrant_3[:, 1].min(), quadrant_3[:, 0].max()
+    quadrant_4 = corner_coords[
+        (corner_coords[:, 0] >= cy) & (corner_coords[:, 1] >= cx)
+    ]
+    rb = quadrant_4[:, 1].max(), quadrant_4[:, 0].max()
+    return lt, rt, rb, lb
+def get_dst_mask(mask):
+    mask = _to_2d(mask)
+    dst = cv2.distanceTransform(mask, cv2.DIST_L2, 5)
+    # 거리 값을 0 ~ 255 범위로 정규화 ---②
+    dist_transform_normalized = cv2.normalize(
+        dst, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U
+    )
+    return _to_3d(dist_transform_normalized)
+def unwarp(img, src, dst):
+    h, w = img.shape[:2]
+    # use cv2.getPerspectiveTransform() to get M, the transform matrix, and Minv, the inverse
+    M = cv2.getPerspectiveTransform(src, dst)
+    # use cv2.warpPerspective() to warp your image to a top-down view
+    warped = cv2.warpPerspective(img, M, (w, h), flags=cv2.INTER_LINEAR)
+    return warped, M
+def perspective_correction(img, src=None, vis=False, method: PC_TYPE = PC_TYPE.HARRIS):
+    # img (H,W,C) 0~255, src=[[ltx,lty],[rtx,rty],[rbx,rby],[lbx,lby]]
+    if src is None:
+        gray = _to_grayscale(img)
+        if not isinstance(method, PC_TYPE):
+            raise ValueError(
+                f"Invalid method: {method}. Expected one of {list(PC_TYPE)}."
+            )
+        if method == PC_TYPE.HARRIS:
+            corner = cv2.cornerHarris(gray, 5, 3, 0.04)  # (H,W) value: corner score
+            threshold = 0.005 * corner.max()
+            corner_coords = np.argwhere(corner > threshold)
+        elif method == PC_TYPE.EDGES_CONTOURS:
+            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+            edges = cv2.Canny(blurred, 50, 150)
+            contours, _ = cv2.findContours(
+                edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+            )
+            contour_points = []
+            for cs in contours:
+                c = [css for css in cs]
+                contour_points.extend(c)
+            corner_coords = np.array(contour_points).reshape(-1, 2)[..., ::-1]
+        elif method == PC_TYPE.GFTT:
+            corners = cv2.goodFeaturesToTrack(
+                gray, 0, 0.01, 5, blockSize=3, useHarrisDetector=True, k=0.03
+            )
+            corner_coords = corners.reshape(corners.shape[0], 2)[..., ::-1]
+        elif method == PC_TYPE.FAST:
+            th = 50
+            fast = cv2.FastFeatureDetector_create(th)
+            keypoints = fast.detect(gray)
+            corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints])
+        elif method == PC_TYPE.KAZE:
+            # feature = cv2.SIFT_create()
+            feature = cv2.KAZE_create()
+            keypoints = feature.detect(gray)
+            corner_coords = np.array([[kp.pt[1], kp.pt[0]] for kp in keypoints])
+        if vis:
+            view_img = img.copy()
+            for corner in corner_coords:
+                y, x = corner
+                cv2.circle(view_img, (int(x), int(y)), 3, (255, 0, 0), 2)
+            save_image(view_img, path="vis_corner.png")
+        lt, rt, rb, lb = get_corner(corner_coords)
+        src = np.float32([lt, rt, rb, lb])
+    dst = np.float32(
+        [
+            (0, 0),
+            (img.shape[1] - 1, 0),
+            (img.shape[1] - 1, img.shape[0] - 1),
+            (0, img.shape[0] - 1),
+        ]
+    )
+    result, M = unwarp(img, src, dst)
+    save_image(result, path="cv_result.png")
+    return result
+if __name__ == "__main__":
+    image_url = "https://d2reotjpatzlok.cloudfront.net/qr-place/item/QR_20240726_2441_2_LZ1ZFCT38HN7PPCEZR8H.jpg"
+    img, imgdata, format = load_image(image_url, with_byte=True, with_format=True)
+    perspective_correction(img, vis=True)

rect_main.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import importlib
+import warnings
+from collections import defaultdict
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+from config import Config
+from data_utils.image_utils import _to_2d
+warnings.filterwarnings("ignore")
+DocTr_Plus = importlib.import_module("models.DocTr-Plus.inference")
+DocScanner = importlib.import_module("models.DocScanner.inference")
+cuda = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+mask_dict = defaultdict(int)
+def load_geotrp_model(cuda, path=""):
+    _GeoTrP = DocTr_Plus.GeoTrP()
+    _GeoTrP = _GeoTrP.to(cuda)
+    DocTr_Plus.reload_model(_GeoTrP.GeoTr, path)
+    _GeoTrP.eval()
+    return _GeoTrP
+def load_docscanner_model(cuda, path_l="", path_m=""):
+    net = DocScanner.Net().to(cuda)
+    DocScanner.reload_seg_model(net.msk, path_m)
+    DocScanner.reload_rec_model(net.bm, path_l)
+    net.eval()
+    return net
+def preprocess_image(img, target_size=[288, 288]):
+    im_ori = img[:, :, :3] / 255.0
+    h_, w_, _ = im_ori.shape
+    im_ori_resized = cv2.resize(im_ori, (288, 288))
+    im = cv2.resize(im_ori_resized, target_size)
+    im = im.transpose(2, 0, 1)
+    im = torch.from_numpy(im).float().unsqueeze(0)
+    return im_ori, im, h_, w_
+def geotrp_rec(img, model):
+    im_ori, im, h_, w_ = preprocess_image(img)
+    with torch.no_grad():
+        bm = model(im.cuda())
+        bm = bm.cpu().numpy()[0]
+        bm0 = bm[0, :, :]
+        bm1 = bm[1, :, :]
+        bm0 = cv2.blur(bm0, (3, 3))
+        bm1 = cv2.blur(bm1, (3, 3))
+        img_geo = cv2.remap(im_ori, bm0, bm1, cv2.INTER_LINEAR) * 255
+        img_geo = cv2.resize(img_geo, (w_, h_))
+        return img_geo
+def docscanner_get_mask(img, model):
+    _, im, h, w = preprocess_image(img)
+    with torch.no_grad():
+        _, msk = model(im.cuda())
+        msk = msk.cpu()
+        mask_np = (msk[0, 0].numpy() * 255).astype(np.uint8)
+        mask_resized = cv2.resize(mask_np, (w, h))
+    return mask_resized
+def docscanner_rec_img(img, model):
+    im_ori, im, h, w = preprocess_image(img)
+    with torch.no_grad():
+        bm = model(im.cuda())
+        bm = bm.cpu()
+        # save rectified image
+        bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
+        bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
+        bm0 = cv2.blur(bm0, (3, 3))
+        bm1 = cv2.blur(bm1, (3, 3))
+        lbl = torch.from_numpy(np.stack([bm0, bm1], axis=2)).unsqueeze(0)  # h * w * 2
+        out = F.grid_sample(
+            torch.from_numpy(im_ori).permute(2, 0, 1).unsqueeze(0).float(),
+            lbl,
+            align_corners=True,
+        )
+        img = (((out[0] * 255).permute(1, 2, 0).numpy())[:, :, ::-1]).astype(np.uint8)
+    return img
+def docscanner_rec(img, model):
+    im_ori = img[:, :, :3] / 255.0
+    h, w, _ = im_ori.shape
+    im = cv2.resize(im_ori, (288, 288))
+    im = im.transpose(2, 0, 1)
+    im = torch.from_numpy(im).float().unsqueeze(0)
+    with torch.no_grad():
+        bm, msk = model(im.cuda())
+        bm = bm.cpu()
+        msk = msk.cpu()
+        mask_np = (msk[0, 0].numpy() * 255).astype(np.uint8)
+        mask_resized = cv2.resize(mask_np, (w, h))
+        mask_img = mask_resized
+        # save rectified image
+        bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
+        bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
+        bm0 = cv2.blur(bm0, (3, 3))
+        bm1 = cv2.blur(bm1, (3, 3))
+        lbl = torch.from_numpy(np.stack([bm0, bm1], axis=2)).unsqueeze(0)  # h * w * 2
+        out = F.grid_sample(
+            torch.from_numpy(im_ori).permute(2, 0, 1).unsqueeze(0).float(),
+            lbl,
+            align_corners=True,
+        )
+        img = (((out[0] * 255).permute(1, 2, 0).numpy())[:, :, ::-1]).astype(np.uint8)
+        return img, mask_img
+# 추후 data_utils에 넣을 예정
+def get_mask_white_area(mask):
+    """
+    Get the white area (non-zero pixels) of a mask.
+    Args:
+        mask (np.ndarray): Input mask image (2D or 3D array)
+    Returns:
+        np.ndarray: Array of (y, x) coordinates of white pixels
+    """
+    mask = _to_2d(mask)
+    white_pixels = np.argwhere(mask > 0)
+    return white_pixels
+def main():
+    config = Config()
+    img = cv2.imread("input/test.jpg")  # 코드 실행시 수정 필요
+    docscanner = load_docscanner_model(
+        cuda, path_l=config.get_rec_model_path, path_m=config.get_seg_model_path
+    )
+    doctr = load_geotrp_model(cuda, path=config.get_geotr_model_path)
+    mask = docscanner_get_mask(img, docscanner)
+    mask_dict.add(get_mask_white_area(mask))
+if __name__ == "__main__":
+    main()

seg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb79fdec55a5ed435dc74d8112aa9285d8213bae475022f711c709744fb19dd4
+size 4715923