Spaces:

sayed555
/

manga

Sleeping

App Files Files Community

sayed555 commited on Mar 22

Commit

de2b10e

verified ·

1 Parent(s): 82f073c

Upload 48 files

Browse files

Files changed (48) hide show

utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/__init__.cpython-39.pyc +0 -0
utils/__pycache__/config.cpython-310.pyc +0 -0
utils/__pycache__/download_util.cpython-310.pyc +0 -0
utils/__pycache__/exceptions.cpython-310.pyc +0 -0
utils/__pycache__/fontformat.cpython-310.pyc +0 -0
utils/__pycache__/imgproc_utils.cpython-310.pyc +0 -0
utils/__pycache__/io_utils.cpython-310.pyc +0 -0
utils/__pycache__/logger.cpython-310.pyc +0 -0
utils/__pycache__/message.cpython-310.pyc +0 -0
utils/__pycache__/proj_imgtrans.cpython-310.pyc +0 -0
utils/__pycache__/registry.cpython-310.pyc +0 -0
utils/__pycache__/shared.cpython-310.pyc +0 -0
utils/__pycache__/shared.cpython-39.pyc +0 -0
utils/__pycache__/split_text_region.cpython-310.pyc +0 -0
utils/__pycache__/stroke_width_calculator.cpython-310.pyc +0 -0
utils/__pycache__/structures.cpython-310.pyc +0 -0
utils/__pycache__/text_layout.cpython-310.pyc +0 -0
utils/__pycache__/text_processing.cpython-310.pyc +0 -0
utils/__pycache__/textblock.cpython-310.pyc +0 -0
utils/__pycache__/textblock_mask.cpython-310.pyc +0 -0
utils/__pycache__/textlines_merge.cpython-310.pyc +0 -0
utils/__pycache__/watermark_utils.cpython-310.pyc +0 -0
utils/__pycache__/zluda_config.cpython-310.pyc +0 -0
utils/appinfo.py +2 -0
utils/config.py +287 -0
utils/download_util.py +371 -0
utils/exceptions.py +20 -0
utils/fontformat.py +136 -0
utils/imgproc_utils.py +413 -0
utils/io_utils.py +243 -0
utils/logger.py +99 -0
utils/message.py +67 -0
utils/package.py +289 -0
utils/proj_imgtrans.py +720 -0
utils/registry.py +272 -0
utils/shared.py +160 -0
utils/split_text_region.py +386 -0
utils/stroke_width_calculator.py +113 -0
utils/structures.py +84 -0
utils/text_layout.py +477 -0
utils/text_processing.py +237 -0
utils/textblock.py +908 -0
utils/textblock_mask.py +394 -0
utils/textlines_merge.py +568 -0
utils/watermark_utils.py +68 -0
utils/zluda_config.py +32 -0

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (133 Bytes). View file

utils/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (11.2 kB). View file

utils/__pycache__/download_util.cpython-310.pyc ADDED Viewed

Binary file (9.14 kB). View file

utils/__pycache__/exceptions.cpython-310.pyc ADDED Viewed

Binary file (1.12 kB). View file

utils/__pycache__/fontformat.cpython-310.pyc ADDED Viewed

Binary file (5.73 kB). View file

utils/__pycache__/imgproc_utils.cpython-310.pyc ADDED Viewed

Binary file (11.9 kB). View file

utils/__pycache__/io_utils.cpython-310.pyc ADDED Viewed

Binary file (7.77 kB). View file

utils/__pycache__/logger.cpython-310.pyc ADDED Viewed

Binary file (2.96 kB). View file

utils/__pycache__/message.cpython-310.pyc ADDED Viewed

Binary file (2.23 kB). View file

utils/__pycache__/proj_imgtrans.cpython-310.pyc ADDED Viewed

Binary file (22.1 kB). View file

utils/__pycache__/registry.cpython-310.pyc ADDED Viewed

Binary file (8.16 kB). View file

utils/__pycache__/shared.cpython-310.pyc ADDED Viewed

Binary file (4.14 kB). View file

utils/__pycache__/shared.cpython-39.pyc ADDED Viewed

Binary file (4.08 kB). View file

utils/__pycache__/split_text_region.cpython-310.pyc ADDED Viewed

Binary file (9.84 kB). View file

utils/__pycache__/stroke_width_calculator.cpython-310.pyc ADDED Viewed

Binary file (3.49 kB). View file

utils/__pycache__/structures.cpython-310.pyc ADDED Viewed

Binary file (2.84 kB). View file

utils/__pycache__/text_layout.cpython-310.pyc ADDED Viewed

Binary file (9.26 kB). View file

utils/__pycache__/text_processing.cpython-310.pyc ADDED Viewed

Binary file (5.42 kB). View file

utils/__pycache__/textblock.cpython-310.pyc ADDED Viewed

Binary file (26.8 kB). View file

utils/__pycache__/textblock_mask.cpython-310.pyc ADDED Viewed

Binary file (11.1 kB). View file

utils/__pycache__/textlines_merge.cpython-310.pyc ADDED Viewed

Binary file (19 kB). View file

utils/__pycache__/watermark_utils.cpython-310.pyc ADDED Viewed

Binary file (1.65 kB). View file

utils/__pycache__/zluda_config.cpython-310.pyc ADDED Viewed

Binary file (1.13 kB). View file

utils/appinfo.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ branch = 'dev'
2	+ version = '1.4.0'

utils/config.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import json, os, traceback
+import os.path as osp
+import copy
+from . import shared
+from .fontformat import FontFormat
+from .structures import List, Dict, Config, field, nested_dataclass
+from .logger import logger as LOGGER
+from .io_utils import json_dump_nested_obj, np, serialize_np
+@nested_dataclass
+class ModuleConfig(Config):
+    textdetector: str = 'ctd'
+    ocr: str = "mit48px"
+    inpainter: str = 'lama_large_512px'
+    translator: str = "google"
+    enable_detect: bool = True
+    keep_exist_textlines: bool = False
+    enable_ocr: bool = True
+    enable_translate: bool = True
+    enable_inpaint: bool = True
+    textdetector_params: Dict = field(default_factory=lambda: dict())
+    ocr_params: Dict = field(default_factory=lambda: dict())
+    translator_params: Dict = field(default_factory=lambda: dict())
+    inpainter_params: Dict = field(default_factory=lambda: dict())
+    translate_source: str = '日本語'
+    translate_target: str = '简体中文'
+    check_need_inpaint: bool = True
+    load_model_on_demand: bool = False
+    empty_runcache: bool = False
+    def get_params(self, module_key: str, for_saving=False) -> dict:
+        d = self[module_key + '_params']
+        if not for_saving:
+            return d
+        sd = {}
+        for module_key, module_params in d.items():
+            if module_params is None:
+                continue
+            saving_module_params = {}
+            sd[module_key] = saving_module_params
+            for pk, pv in module_params.items():
+                if pk in {'description'}:
+                    continue
+                if isinstance(pv, dict):
+                    pv = pv['value']
+                saving_module_params[pk] = pv
+        return sd
+    def get_saving_params(self, to_dict=True):
+        params = copy.copy(self)
+        params.ocr_params = self.get_params('ocr', for_saving=True)
+        params.inpainter_params = self.get_params('inpainter', for_saving=True)
+        params.textdetector_params = self.get_params('textdetector', for_saving=True)
+        params.translator_params = self.get_params('translator', for_saving=True)
+        if to_dict:
+            return params.__dict__
+        return params
+    def stage_enabled(self, idx: int):
+        if idx == 0:
+            return self.enable_detect
+        elif idx == 1:
+            return self.enable_ocr
+        elif idx == 2:
+            return self.enable_translate
+        elif idx == 3:
+            return self.enable_inpaint
+        else:
+            raise Exception(f'not supported stage idx: {idx}')
+    def all_stages_disabled(self):
+        return (self.enable_detect or self.enable_ocr or self.enable_translate or self.enable_inpaint) is False
+@nested_dataclass
+class DrawPanelConfig(Config):
+    pentool_color: List = field(default_factory=lambda: [0, 0, 0])
+    pentool_width: float = 30.
+    pentool_shape: int = 0
+    inpainter_width: float = 30.
+    inpainter_shape: int = 0
+    current_tool: int = 0
+    rectool_auto: bool = False
+    rectool_method: int = 0
+    recttool_dilate_ksize: int = 0
+@nested_dataclass
+class ProgramConfig(Config):
+    module: ModuleConfig = field(default_factory=lambda: ModuleConfig())
+    drawpanel: DrawPanelConfig = field(default_factory=lambda: DrawPanelConfig())
+    global_fontformat: FontFormat = field(default_factory=lambda: FontFormat())
+    recent_proj_list: List = field(default_factory=lambda: list())
+    show_page_list: bool = False
+    imgtrans_paintmode: bool = False
+    imgtrans_textedit: bool = True
+    imgtrans_textblock: bool = True
+    auto_watermark: bool = False
+    mask_transparency: float = 0.
+    original_transparency: float = 0.
+    open_recent_on_startup: bool = True
+    let_fntsize_flag: int = 0
+    let_fntstroke_flag: int = 0
+    let_fntcolor_flag: int = 0
+    let_fnt_scolor_flag: int = 0
+    let_fnteffect_flag: int = 1
+    let_alignment_flag: int = 0
+    let_writing_mode_flag: int = 0
+    let_family_flag: int = 0
+    let_autolayout_flag: bool = True
+    let_uppercase_flag: bool = True
+    let_show_only_custom_fonts_flag: bool = False
+    let_textstyle_indep_flag: bool = False
+    text_styles_path: str = osp.join(shared.DEFAULT_TEXTSTYLE_DIR, 'default.json')
+    fsearch_case: bool = False
+    fsearch_whole_word: bool = False
+    fsearch_regex: bool = False
+    fsearch_range: int = 0
+    gsearch_case: bool = False
+    gsearch_whole_word: bool = False
+    gsearch_regex: bool = False
+    gsearch_range: int = 0
+    darkmode: bool = False
+    textselect_mini_menu: bool = True
+    fold_textarea: bool = False
+    show_source_text: bool = True
+    show_trans_text: bool = True
+    saladict_shortcut: str = "Alt+S"
+    search_url: str = "https://www.google.com/search?q="
+    ocr_sublist: List = field(default_factory=lambda: list())
+    restore_ocr_empty: bool = False
+    pre_mt_sublist: List = field(default_factory=lambda: list())
+    mt_sublist: List = field(default_factory=lambda: list())
+    display_lang: str = field(default_factory=lambda: shared.DEFAULT_DISPLAY_LANG) # to always apply shared.DEFAULT_DISPLAY_LANG
+    imgsave_quality: int = 100
+    imgsave_ext: str = '.png'
+    intermediate_imgsave_ext: str = '.png'
+    show_text_style_preset: bool = True
+    expand_tstyle_panel: bool = True
+    show_text_effect_panel: bool = True
+    expand_teffect_panel: bool = True
+    text_advanced_format_panel: bool = True
+    expand_tadvanced_panel: bool = True
+    # Watermark settings
+    watermark_enabled: bool = False
+    watermark_path: str = ''""
+    watermark_opacity: int = 0.7
+    @staticmethod
+    def load(cfg_path: str):
+        with open(cfg_path, 'r', encoding='utf8') as f:
+            config_dict = json.loads(f.read())
+        # for backward compatibility
+        if 'dl' in config_dict:
+            dl = config_dict.pop('dl')
+            if not 'module' in config_dict:
+                if 'textdetector_setup_params' in dl:
+                    textdetector_params = dl.pop('textdetector_setup_params')
+                    dl['textdetector_params'] = textdetector_params
+                if 'inpainter_setup_params' in dl:
+                    inpainter_params = dl.pop('inpainter_setup_params')
+                    dl['inpainter_params'] = inpainter_params
+                if 'ocr_setup_params' in dl:
+                    ocr_params = dl.pop('ocr_setup_params')
+                    dl['ocr_params'] = ocr_params
+                if 'translator_setup_params' in dl:
+                    translator_params = dl.pop('translator_setup_params')
+                    dl['translator_params'] = translator_params
+                config_dict['module'] = dl
+        if 'module' in config_dict:
+            module_cfg = config_dict['module']
+            trans_params = module_cfg['translator_params']
+            repl_pairs = {'baidu': 'Baidu', 'caiyun': 'Caiyun', 'chatgpt': 'ChatGPT', 'Deepl': 'DeepL', 'papago': 'Papago'}
+            for k, i in repl_pairs.items():
+                if k in trans_params:
+                    trans_params[i] = trans_params.pop(k)
+            if module_cfg['translator'] in repl_pairs:
+                module_cfg['translator'] = repl_pairs[module_cfg['translator']]
+        return ProgramConfig(**config_dict)
+pcfg: ProgramConfig = None
+text_styles: List[FontFormat] = []
+active_format: FontFormat = None
+def load_textstyle_from(p: str, raise_exception = False):
+    if not osp.exists(p):
+        LOGGER.warning(f'Text style {p} does not exist.')
+        return
+    try:
+        with open(p, 'r', encoding='utf8') as f:
+            style_list = json.loads(f.read())
+            styles_loaded = []
+            for style in style_list:
+                try:
+                    styles_loaded.append(FontFormat(**style))
+                except Exception as e:
+                    LOGGER.warning(f'Skip invalid text style: {style}')
+    except Exception as e:
+        LOGGER.error(f'Failed to load text style from {p}: {e}')
+        if raise_exception:
+            raise e
+        return
+    global text_styles, pcfg
+    if len(text_styles) > 0:
+        text_styles.clear()
+    text_styles.extend(styles_loaded)
+    pcfg.text_styles_path = p
+def load_config(config_path: str = shared.CONFIG_PATH):
+    if config_path != shared.CONFIG_PATH:
+        shared.CONFIG_PATH = config_path
+        LOGGER.info(f'Using specified config file at {shared.CONFIG_PATH}')
+    if osp.exists(shared.CONFIG_PATH):
+        try:
+            config = ProgramConfig.load(shared.CONFIG_PATH)
+        except Exception as e:
+            LOGGER.exception(e)
+            LOGGER.warning("Failed to load config file, using default config")
+            config = ProgramConfig()
+    else:
+        LOGGER.info(f'{shared.CONFIG_PATH} does not exist, new config file will be created.')
+        config = ProgramConfig()
+    global pcfg
+    pcfg = config
+    p = pcfg.text_styles_path
+    if not osp.exists(pcfg.text_styles_path):
+        dp = osp.join(shared.DEFAULT_TEXTSTYLE_DIR, 'default.json')
+        if p != dp and osp.exists(dp):
+            p = dp
+            LOGGER.warning(f'Text style {p} does not exist, use the default from {dp}.')
+        else:
+            with open(dp, 'w', encoding='utf8') as f:
+                f.write(json.dumps([],  ensure_ascii=False))
+            LOGGER.info(f'New text style file created at {dp}.')
+    load_textstyle_from(p)
+def json_dump_program_config(obj, **kwargs):
+    def _default(obj):
+        if isinstance(obj, (np.ndarray, np.ScalarType)):
+            return serialize_np(obj)
+        elif isinstance(obj, ModuleConfig):
+            return obj.get_saving_params()
+        return obj.__dict__
+    return json.dumps(obj, default=lambda o: _default(o), ensure_ascii=False, **kwargs)
+def save_config():
+    global pcfg
+    try:
+        with open(shared.CONFIG_PATH, 'w', encoding='utf8') as f:
+            f.write(json_dump_program_config(pcfg))
+        LOGGER.info('Config saved')
+        return True
+    except Exception as e:
+        LOGGER.error(f'Failed save config to {shared.CONFIG_PATH}: {e}')
+        LOGGER.error(traceback.format_exc())
+        return False
+def save_text_styles(raise_exception = False):
+    global pcfg, text_styles
+    try:
+        style_dir = osp.dirname(pcfg.text_styles_path)
+        if not osp.exists(style_dir):
+            os.makedirs(style_dir)
+        with open(pcfg.text_styles_path, 'w', encoding='utf8') as f:
+            f.write(json_dump_nested_obj(text_styles))
+        LOGGER.info('Text style saved')
+        return True
+    except Exception as e:
+        LOGGER.error(f'Failed save text style to {pcfg.text_styles_path}: {e}')
+        LOGGER.error(traceback.format_exc())
+        if raise_exception:
+            raise e
+        return False

utils/download_util.py ADDED Viewed

	@@ -0,0 +1,371 @@

+import math
+import os
+import requests
+import traceback
+import re
+import sys
+import shutil
+import os.path as osp
+from typing import List, Union
+import hashlib
+from tqdm import tqdm
+from urllib.parse import urlparse
+from torch.hub import download_url_to_file as _torchhub_download_url_to_file, get_dir
+import requests
+import tqdm
+from py7zr import pack_7zarchive, unpack_7zarchive
+import ssl
+from . import shared
+from .logger import logger as LOGGER
+shutil.register_archive_format('7zip', pack_7zarchive, description='7zip archive')
+shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)
+def calculate_sha256(filename):
+    hash_sha256 = hashlib.sha256()
+    blksize = 1024 * 1024
+    with open(filename, "rb") as f:
+        for chunk in iter(lambda: f.read(blksize), b""):
+            hash_sha256.update(chunk)
+    return hash_sha256.hexdigest().lower()
+def sizeof_fmt(size, suffix='B'):
+    """Get human readable file size.
+    Args:
+        size (int): File size.
+        suffix (str): Suffix. Default: 'B'.
+    Return:
+        str: Formatted file size.
+    """
+    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+        if abs(size) < 1024.0:
+            return f'{size:3.1f} {unit}{suffix}'
+        size /= 1024.0
+    return f'{size:3.1f} Y{suffix}'
+def download_file_from_google_drive(file_id, save_path):
+    """Download files from google drive.
+    Ref:
+    https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive  # noqa E501
+    Args:
+        file_id (str): File id.
+        save_path (str): Save path.
+    """
+    session = requests.Session()
+    URL = 'https://docs.google.com/uc?export=download'
+    params = {'id': file_id, 'confirm': 't'}    # https://stackoverflow.com/a/73893665/17671327
+    response = session.get(URL, params=params, stream=True)
+    token = get_confirm_token(response)
+    if token:
+        params['confirm'] = token
+        response = session.get(URL, params=params, stream=True)
+    # get file size
+    response_file_size = session.get(URL, params=params, stream=True, headers={'Range': 'bytes=0-2'})
+    if 'Content-Range' in response_file_size.headers:
+        file_size = int(response_file_size.headers['Content-Range'].split('/')[1])
+    else:
+        file_size = None
+    save_response_content(response, save_path, file_size)
+def get_confirm_token(response):
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+def save_response_content(response, destination, file_size=None, chunk_size=32768):
+    if file_size is not None:
+        pbar = tqdm(total=math.ceil(file_size / chunk_size), unit='chunk')
+        readable_file_size = sizeof_fmt(file_size)
+    else:
+        pbar = None
+    with open(destination, 'wb') as f:
+        downloaded_size = 0
+        for chunk in response.iter_content(chunk_size):
+            downloaded_size += chunk_size
+            if pbar is not None:
+                pbar.update(1)
+                pbar.set_description(f'Download {sizeof_fmt(downloaded_size)} / {readable_file_size}')
+            if chunk:  # filter out keep-alive new chunks
+                f.write(chunk)
+        if pbar is not None:
+            pbar.close()
+# def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
+#     """Load file form http url, will download models if necessary.
+#     Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
+#     Args:
+#         url (str): URL to be downloaded.
+#         model_dir (str): The path to save the downloaded model. Should be a full path. If None, use pytorch hub_dir.
+#             Default: None.
+#         progress (bool): Whether to show the download progress. Default: True.
+#         file_name (str): The downloaded file name. If None, use the file name in the url. Default: None.
+#     Returns:
+#         str: The path to the downloaded file.
+#     """
+#     if model_dir is None:  # use the pytorch hub_dir
+#         hub_dir = get_dir()
+#         model_dir = os.path.join(hub_dir, 'checkpoints')
+#     os.makedirs(model_dir, exist_ok=True)
+#     parts = urlparse(url)
+#     filename = os.path.basename(parts.path)
+#     if file_name is not None:
+#         filename = file_name
+#     cached_file = os.path.abspath(os.path.join(model_dir, filename))
+#     if not os.path.exists(cached_file):
+#         print(f'Downloading: "{url}" to {cached_file}\n')
+#         download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
+#     return cached_file
+def torchhub_download_url_to_file(url: str, dst: str, progress: bool = True):
+    original_ctx = ssl._create_default_https_context
+    ssl._create_default_https_context = ssl._create_unverified_context  # https://stackoverflow.com/questions/50236117/scraping-ssl-certificate-verify-failed-error-for-http-en-wikipedia-org
+    _torchhub_download_url_to_file(url, dst, progress=progress)
+    ssl._create_default_https_context = original_ctx
+def check_local_file(local_file: str, sha256_precal: str = None, cache_hash: bool = False):
+    file_exists = osp.exists(local_file)
+    valid_hash, sha256_calculated = True, sha256_precal
+    if file_exists and sha256_precal is not None and shared.check_local_file_hash:
+        sha256_precal = sha256_precal.lower()
+        if cache_hash and local_file in shared.cache_data and shared.cache_data[local_file].lower() == sha256_precal:
+            pass
+        else:
+            sha256_calculated = calculate_sha256(local_file).lower()
+            if sha256_calculated != sha256_precal:
+                valid_hash = False
+            if cache_hash:
+                shared.cache_data[local_file] = sha256_calculated
+                shared.CACHE_UPDATED = True
+    return file_exists, valid_hash, sha256_calculated
+def get_filename_from_url(url: str, default: str = '') -> str:
+    m = re.search(r'/([^/?]+)[^/]*$', url)
+    if m:
+        return m.group(1)
+    return default
+def download_url_with_progressbar(url: str, path: str,):
+    if os.path.basename(path) in ('.', '') or os.path.isdir(path):
+        new_filename = get_filename_from_url(url)
+        if not new_filename:
+            raise Exception('Could not determine filename')
+        path = os.path.join(path, new_filename)
+    headers = {}
+    downloaded_size = 0
+    # the resume downloading here is buggy when the local file is corrupted or over-sized or intended to be replaced
+    # if os.path.isfile(path):  # its actually buggy
+    #     downloaded_size = os.path.getsize(path)
+    #     headers['Range'] = 'bytes=%d-' % downloaded_size
+    #     headers['Accept-Encoding'] = 'deflate'
+    r = requests.get(url, stream=True, allow_redirects=True, headers=headers)
+    if downloaded_size and r.headers.get('Accept-Ranges') != 'bytes':
+        print('Error: Webserver does not support partial downloads. Restarting from the beginning.')
+        r = requests.get(url, stream=True, allow_redirects=True)
+        downloaded_size = 0
+    total = int(r.headers.get('content-length', 0))
+    chunk_size = 1024
+    if r.ok:
+        with tqdm.tqdm(
+            desc=os.path.basename(path),
+            initial=downloaded_size,
+            total=total+downloaded_size,
+            unit='iB',
+            unit_scale=True,
+            unit_divisor=chunk_size,
+        ) as bar:
+            with open(path, 'ab' if downloaded_size else 'wb') as f:
+                is_tty = sys.stdout.isatty()
+                downloaded_chunks = 0
+                for data in r.iter_content(chunk_size=chunk_size):
+                    size = f.write(data)
+                    bar.update(size)
+                    # Fallback for non TTYs so output still shown
+                    downloaded_chunks += 1
+                    if not is_tty and downloaded_chunks % 1000 == 0:
+                        print(bar)
+    else:
+        raise Exception(f'Couldn\'t resolve url: "{url}" (Error: {r.status_code})')
+def try_download_files(url: str,
+                        files: List[str],
+                        save_files = List[str],
+                        sha256_pre_calculated: List[str] = None,
+                        concatenate_url_filename: int = 0,
+                        cache_hash: bool = False,
+                        download_method: str = '',
+                        gdrive_file_id: str = None):
+    all_successful = True
+    for file, savep, sha256_precal in zip(files, save_files, sha256_pre_calculated):
+        save_dir = osp.dirname(savep)
+        if not osp.exists(save_dir):
+            os.makedirs(save_dir)
+        file_exists, valid_hash, sha256_calculated = check_local_file(savep, sha256_precal, cache_hash=cache_hash)
+        if file_exists:
+            if valid_hash:
+                continue
+            else:
+                LOGGER.warning(f'Mismatch between local file {savep} and pre-calculated hash: "{sha256_calculated}" <-> "{sha256_precal.lower()}", it will be redownloaded...')
+        try:
+            if concatenate_url_filename == 1:
+                download_url = url + file
+            elif concatenate_url_filename == 2:
+                download_url = url + osp.basename(file)
+            else:
+                download_url = url
+            if gdrive_file_id is not None:
+                download_file_from_google_drive(gdrive_file_id, savep)
+            elif download_method == 'torch_hub':
+                LOGGER.info(f'downloading {savep} from {download_url} ...')
+                torchhub_download_url_to_file(download_url, savep)
+            else:
+                download_url_with_progressbar(download_url, savep)
+            file_exists, valid_hash, sha256_calculated = check_local_file(savep, sha256_precal, cache_hash=cache_hash)
+            if not file_exists:
+                raise Exception(f'Some how the downloaded {savep} doesnt exists.')
+            elif not valid_hash:
+                raise Exception(f'Mismatch between newly downloaded {savep} and pre-calculated hash: "{sha256_calculated}" <-> "{sha256_precal.lower()}"')
+        except:
+            err_msg = traceback.format_exc()
+            all_successful = False
+            LOGGER.error(err_msg)
+            LOGGER.error(f'Failed downloading {file} from {download_url}, please manually save it to {savep}')
+    return all_successful
+def download_and_check_files(url: str,
+                        files: Union[str, List],
+                        save_files = None,
+                        sha256_pre_calculated: Union[str, List] = None,
+                        concatenate_url_filename: int = 0,
+                        archived_files: List = None,
+                        archive_sha256_pre_calculated: Union[str, List] = None,
+                        save_dir: str = None,
+                        download_method: str = 'torch_hub',
+                        gdrive_file_id: str = None):
+    def _wrap_up_checkinputs(files: Union[str, List], save_files: Union[str, List] = None, sha256_pre_calculated: Union[str, List] = None, save_dir: str = None):
+        '''
+        ensure they're lists with equal length
+        '''
+        if not isinstance(files, List):
+            files = [files]
+        if not isinstance(sha256_pre_calculated, List):
+            if sha256_pre_calculated is None:
+                sha256_pre_calculated = [None] * len(files)
+            else:
+                sha256_pre_calculated = [sha256_pre_calculated]
+        if save_files is None:
+            save_files = files
+        elif not isinstance(save_files, List):
+            save_files = [save_files]
+        assert len(files) == len(sha256_pre_calculated) == len(save_files)
+        if save_dir is not None:
+            _save_files = []
+            for savep in save_files:
+                _save_files.append(osp.join(save_dir, savep))
+            save_files = _save_files
+        return files, save_files, sha256_pre_calculated
+    def _all_valid(save_files: List[str] = None, sha256_pre_calculated: List[str] = None,):
+        for savep, sha256_precal in zip(save_files, sha256_pre_calculated):
+            file_exists, valid_hash, sha256_calculated = check_local_file(savep, sha256_precal, cache_hash=True)
+            if not file_exists or not valid_hash:
+                return False
+        return True
+    files, save_files, sha256_pre_calculated = _wrap_up_checkinputs(files, save_files, sha256_pre_calculated, save_dir)
+    if archived_files is None:
+        return try_download_files(url, files, save_files, sha256_pre_calculated, concatenate_url_filename, cache_hash=True, download_method=download_method, gdrive_file_id=gdrive_file_id)
+    # handle archived
+    if _all_valid(save_files, sha256_pre_calculated):
+        return [], None
+    if isinstance(archived_files, str):
+        archived_files = [archived_files]
+    # download archive files
+    tmp_downloaded_archives = [osp.join(shared.cache_dir, archive_name) for archive_name in archived_files]
+    _, _, archive_sha256_pre_calculated = _wrap_up_checkinputs(archived_files, tmp_downloaded_archives, archive_sha256_pre_calculated)
+    archive_downloaded = try_download_files(url, archived_files, tmp_downloaded_archives, archive_sha256_pre_calculated, concatenate_url_filename, cache_hash=False, download_method=download_method, gdrive_file_id=gdrive_file_id)
+    if not archive_downloaded:
+        return False
+    # extract archived
+    archivep = tmp_downloaded_archives[0] # todo: support multi-volume
+    extract_dir = osp.join(shared.cache_dir, 'tmp_extract')
+    os.makedirs(extract_dir, exist_ok=True)
+    LOGGER.info(f'Extracting {archivep} ...')
+    shutil.unpack_archive(archivep, extract_dir)
+    all_valid = True
+    for file, savep, sha256_precal in zip(files, save_files, sha256_pre_calculated):
+        unarchived = osp.join(extract_dir, file)
+        save_dir = osp.dirname(savep)
+        if not osp.exists(save_dir):
+            os.makedirs(save_dir)
+        shutil.move(unarchived, savep)
+        file_exists, valid_hash, sha256_calculated = check_local_file(savep, sha256_precal, cache_hash=True)
+        if not file_exists:
+            LOGGER.error(f'The unarchived file {savep} doesnt exists.')
+            all_valid = False
+        elif not valid_hash:
+            LOGGER.error(f'Mismatch between the unarchived {savep} and pre-calculated hash: "{sha256_calculated}" <-> "{sha256_precal.lower()}"')
+            all_valid = False
+    if all_valid:
+        # clean archive files
+        shutil.rmtree(extract_dir)
+        for p in tmp_downloaded_archives:
+            os.remove(p)
+    return all_valid

utils/exceptions.py ADDED Viewed

	@@ -0,0 +1,20 @@

+class ProjectDirNotExistException(Exception):
+    pass
+class ProjectLoadFailureException(Exception):
+    pass
+class ProjectNotSupportedException(Exception):
+    pass
+class ImgnameNotInProjectException(Exception):
+    pass
+class NotImplementedProjException(Exception):
+    pass
+class InvalidModuleConfigException(Exception):
+    pass
+class InvalidProgramConfigException(Exception):
+    pass

utils/fontformat.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from typing import Union
+import enum
+import re
+import copy
+import numpy as np
+from . import shared
+from .structures import Tuple, Union, List, Dict, Config, field, nested_dataclass
+def pt2px(pt, to_int=False) -> float:
+    if to_int:
+        return int(round(pt * shared.LDPI / 72.))
+    else:
+        return pt * shared.LDPI / 72.
+def px2pt(px) -> float:
+    return px / shared.LDPI * 72.
+class LineSpacingType(enum.IntEnum):
+    Proportional = 0
+    Distance = 1
+class TextAlignment(enum.IntEnum):
+    Left = 0
+    Center = 1
+    Right = 2
+fontweight_qt5_to_qt6 = {0: 100, 12: 200, 25: 300, 50: 400, 57: 500, 63: 600, 75: 700, 81: 800, 87: 900}
+fontweight_qt6_to_qt5 = {100: 0, 200: 12, 300: 25, 400: 50, 500: 57, 600: 63, 700: 75, 800: 81, 900: 87}
+fontweight_pattern = re.compile(r'font-weight:(\d+)', re.DOTALL)
+def fix_fontweight_qt(weight: Union[str, int]):
+    def _fix_html_fntweight(matched):
+        weight = int(matched.group(1))
+        return f'font-weight:{fix_fontweight_qt(weight)}'
+    if weight is None:
+        return None
+    if isinstance(weight, int):
+        if shared.FLAG_QT6 and weight < 100:
+            if weight in fontweight_qt5_to_qt6:
+                weight = fontweight_qt5_to_qt6[weight]
+        if not shared.FLAG_QT6 and weight >= 100:
+            if weight in fontweight_qt6_to_qt5:
+                weight = fontweight_qt6_to_qt5[weight]
+    if isinstance(weight, str):
+        weight = fontweight_pattern.sub(lambda matched: _fix_html_fntweight(matched), weight)
+    return weight
+@nested_dataclass
+class FontFormat(Config):
+    font_family: str = shared.DEFAULT_FONT_FAMILY # to always apply shared.DEFAULT_FONT_FAMILY
+    font_size: float = 24
+    stroke_width: float = 0.
+    frgb: List = field(default_factory=lambda: [0, 0, 0])
+    srgb: List = field(default_factory=lambda: [0, 0, 0])
+    bold: bool = False
+    underline: bool = False
+    italic: bool = False
+    alignment: int = 0
+    vertical: bool = False
+    font_weight: int = None
+    line_spacing: float = 1.2
+    letter_spacing: float = 1.15
+    opacity: float = 1.
+    shadow_radius: float = 0.
+    shadow_strength: float = 1.
+    shadow_color: List = field(default_factory=lambda: [0, 0, 0])
+    shadow_offset: List = field(default_factory=lambda: [0., 0.])
+    gradient_enabled: bool = False
+    gradient_start_color: List = field(default_factory=lambda: [0, 0, 0])
+    gradient_end_color: List = field(default_factory=lambda: [255, 255, 255])
+    gradient_angle: float = 0.
+    gradient_size: float = 1.0
+    _style_name: str = ''
+    line_spacing_type: int = LineSpacingType.Proportional
+    deprecated_attributes: dict = field(default_factory = lambda: dict())
+    @property
+    def size_pt(self):
+        return px2pt(self.font_size)
+    def __post_init__(self):
+        da = self.deprecated_attributes
+        if len(da) > 0:
+            if 'size' in da:
+                self.font_size = pt2px(da['size'])
+            if 'weight' in da:
+                self.font_weight = da['weight']
+            if 'family' in da:
+                self.font_family = da['family']
+        self.font_weight = fix_fontweight_qt(self.font_weight)
+        self.deprecated_attributes = {}
+    def deepcopy(self):
+        fmt_copyed: FontFormat = None
+        fmt_copyed = copy.deepcopy(self)
+        return fmt_copyed
+    def merge(self, target: Config, compare: bool = False):
+        if id(self) == id(target):
+            return set()
+        tgt_keys = target.annotations_set()
+        updated_keys = set()
+        for key in tgt_keys:
+            if not hasattr(self, key):
+                continue
+            if compare:
+                if key != '_style_name':
+                    if isinstance(target[key], np.ndarray):
+                        is_diff = np.any(self[key] != target[key])
+                    else:
+                        is_diff = self[key] != target[key]
+                    if is_diff:
+                        self.update(key, copy.deepcopy(target[key]))
+                        updated_keys.add(key)
+            else:
+                self.update(key, copy.deepcopy(target[key]))
+        return updated_keys
+    def foreground_color(self):
+        return [int(round(x)) for x in self.frgb]
+    def stroke_color(self):
+        return [int(round(x)) for x in self.srgb]

utils/imgproc_utils.py ADDED Viewed

	@@ -0,0 +1,413 @@

+import numpy as np
+import cv2
+import random
+from typing import List, Tuple, Union
+def hex2bgr(hex):
+    gmask = 254 << 8
+    rmask = 254
+    b = hex >> 16
+    g = (hex & gmask) >> 8
+    r = hex & rmask
+    return np.stack([b, g, r]).transpose()
+def union_area(bboxa, bboxb):
+    x1 = max(bboxa[0], bboxb[0])
+    y1 = max(bboxa[1], bboxb[1])
+    x2 = min(bboxa[2], bboxb[2])
+    y2 = min(bboxa[3], bboxb[3])
+    if y2 < y1 or x2 < x1:
+        return -1
+    return (y2 - y1) * (x2 - x1)
+def get_yololabel_strings(clslist, labellist):
+    content = ''
+    for cls, xywh in zip(clslist, labellist):
+        content += str(int(cls)) + ' ' + ' '.join([str(e) for e in xywh]) + '\n'
+    if len(content) != 0:
+        content = content[:-1]
+    return content
+# 4 points bbox to 8 points polygon
+def xywh2xyxypoly(xywh, to_int=True):
+    xyxypoly = np.tile(xywh[:, [0, 1]], 4)
+    xyxypoly[:, [2, 4]] += xywh[:, [2]]
+    xyxypoly[:, [5, 7]] += xywh[:, [3]]
+    if to_int:
+        xyxypoly = xyxypoly.astype(np.int64)
+    return xyxypoly
+def xyxy2yolo(xyxy, w: int, h: int):
+    if xyxy == [] or xyxy == np.array([]) or len(xyxy) == 0:
+        return None
+    if isinstance(xyxy, list):
+        xyxy = np.array(xyxy)
+    if len(xyxy.shape) == 1:
+        xyxy = np.array([xyxy])
+    yolo = np.copy(xyxy).astype(np.float64)
+    yolo[:, [0, 2]] =  yolo[:, [0, 2]] / w
+    yolo[:, [1, 3]] = yolo[:, [1, 3]] / h
+    yolo[:, [2, 3]] -= yolo[:, [0, 1]]
+    yolo[:, [0, 1]] += yolo[:, [2, 3]] / 2
+    return yolo
+def yolo_xywh2xyxy(xywh: np.array, w: int, h:  int, to_int=True):
+    if xywh is None:
+        return None
+    if len(xywh) == 0:
+        return None
+    if len(xywh.shape) == 1:
+        xywh = np.array([xywh])
+    xywh[:, [0, 2]] *= w
+    xywh[:, [1, 3]] *= h
+    xywh[:, [0, 1]] -= xywh[:, [2, 3]] / 2
+    xywh[:, [2, 3]] += xywh[:, [0, 1]]
+    if to_int:
+        xywh = xywh.astype(np.int64)
+    return xywh
+def rotate_polygons(center, polygons, rotation, new_center=None, to_int=True):
+    if new_center is None:
+        new_center = center
+    rotation = np.deg2rad(rotation)
+    s, c = np.sin(rotation), np.cos(rotation)
+    polygons = polygons.astype(np.float32)
+    polygons[:, 1::2] -= center[1]
+    polygons[:, ::2] -= center[0]
+    rotated = np.copy(polygons)
+    rotated[:, 1::2] = polygons[:, 1::2] * c - polygons[:, ::2] * s
+    rotated[:, ::2] = polygons[:, 1::2] * s + polygons[:, ::2] * c
+    rotated[:, 1::2] += new_center[1]
+    rotated[:, ::2] += new_center[0]
+    if to_int:
+        return rotated.astype(np.int64)
+    return rotated
+def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), auto=False, scaleFill=False, scaleup=True, stride=128):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if not isinstance(new_shape, tuple):
+        new_shape = (new_shape, new_shape)
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+    # dw /= 2  # divide padding into 2 sides
+    # dh /= 2
+    dh, dw = int(dh), int(dw)
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, 0, dh, 0, dw, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+def resize_keepasp(im, new_shape=640, scaleup=True, interpolation=cv2.INTER_LINEAR, stride=None):
+    shape = im.shape[:2]  # current shape [height, width]
+    if new_shape is not None:
+        if not isinstance(new_shape, tuple):
+            new_shape = (new_shape, new_shape)
+    else:
+        new_shape = shape
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    if stride is not None:
+        h, w = new_unpad
+        if h % stride != 0 :
+            new_h = (stride - (h % stride)) + h
+        else :
+            new_h = h
+        if w % stride != 0 :
+            new_w = (stride - (w % stride)) + w
+        else :
+            new_w = w
+        new_unpad = (new_h, new_w)
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=interpolation)
+    return im
+def expand_textwindow(img_size, xyxy, expand_r=8, shrink=False):
+    im_h, im_w = img_size[:2]
+    x1, y1 , x2, y2 = xyxy
+    w = x2 - x1
+    h = y2 - y1
+    paddings = int(round((max(h, w) * 0.25 + min(h, w) * 0.75) / expand_r))
+    if shrink:
+        paddings *= -1
+    x1, y1 = max(0, x1 - paddings), max(0, y1 - paddings)
+    x2, y2 = min(im_w-1, x2+paddings), min(im_h-1, y2+paddings)
+    return [x1, y1, x2, y2]
+def enlarge_window(rect, im_w, im_h, ratio=2.5, aspect_ratio=1.0) -> List:
+    assert ratio > 1.0
+    x1, y1, x2, y2 = rect
+    w = x2 - x1
+    h = y2 - y1
+    if w <= 0 or h <= 0:
+        return [0, 0, 0, 0]
+    # https://numpy.org/doc/stable/reference/generated/numpy.roots.html
+    coeff = [aspect_ratio, w+h*aspect_ratio, (1-ratio)*w*h]
+    roots = np.roots(coeff)
+    roots.sort()
+    delta = int(round(roots[-1] / 2 ))
+    delta_w = int(delta * aspect_ratio)
+    delta_w = min(x1, im_w - x2, delta_w)
+    delta = min(y1, im_h - y2, delta)
+    rect = np.array([x1-delta_w, y1-delta, x2+delta_w, y2+delta], dtype=np.int64)
+    rect[::2] = np.clip(rect[::2], 0, im_w)
+    rect[1::2] = np.clip(rect[1::2], 0, im_h)
+    return rect.tolist()
+def draw_connected_labels(num_labels, labels, stats, centroids, names="draw_connected_labels", skip_background=True):
+    labdraw = np.zeros((labels.shape[0], labels.shape[1], 3), dtype=np.uint8)
+    max_ind = 0
+    if isinstance(num_labels, int):
+        num_labels = range(num_labels)
+    # for ind, lab in enumerate((range(num_labels))):
+    for lab in num_labels:
+        if skip_background and lab == 0:
+            continue
+        randcolor = (random.randint(0,255), random.randint(0,255), random.randint(0,255))
+        labdraw[np.where(labels==lab)] = randcolor
+        maxr, minr = 0.5, 0.001
+        maxw, maxh = stats[max_ind][2] * maxr, stats[max_ind][3] * maxr
+        minarea = labdraw.shape[0] * labdraw.shape[1] * minr
+        stat = stats[lab]
+        bboxarea = stat[2] * stat[3]
+        if stat[2] < maxw and stat[3] < maxh and bboxarea > minarea:
+            pix = np.zeros((labels.shape[0], labels.shape[1]), dtype=np.uint8)
+            pix[np.where(labels==lab)] = 255
+            rect = cv2.minAreaRect(cv2.findNonZero(pix))
+            box = np.int0(cv2.boxPoints(rect))
+            labdraw = cv2.drawContours(labdraw, [box], 0, randcolor, 2)
+            labdraw = cv2.circle(labdraw, (int(centroids[lab][0]),int(centroids[lab][1])), radius=5, color=(random.randint(0,255), random.randint(0,255), random.randint(0,255)), thickness=-1)
+    cv2.imshow(names, labdraw)
+    return labdraw
+def rotate_image(mat: np.ndarray, angle: float) -> np.ndarray:
+    """
+    Rotates an image (angle in degrees) and expands image to avoid cropping
+    # https://stackoverflow.com/questions/43892506/opencv-python-rotate-image-without-cropping-sides
+    """
+    height, width = mat.shape[:2] # image shape has 3 dimensions
+    image_center = (width/2, height/2) # getRotationMatrix2D needs coordinates in reverse order (width, height) compared to shape
+    rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)
+    # rotation calculates the cos and sin, taking absolutes of those.
+    abs_cos = abs(rotation_mat[0,0])
+    abs_sin = abs(rotation_mat[0,1])
+    # find the new width and height bounds
+    bound_w = int(height * abs_sin + width * abs_cos)
+    bound_h = int(height * abs_cos + width * abs_sin)
+    # subtract old image center (bringing image back to origo) and adding the new image center coordinates
+    rotation_mat[0, 2] += bound_w/2 - image_center[0]
+    rotation_mat[1, 2] += bound_h/2 - image_center[1]
+    # rotate image with the new bounds and translated rotation matrix
+    rotated_mat = cv2.warpAffine(mat, rotation_mat, (bound_w, bound_h))
+    return rotated_mat
+def color_difference(rgb1: List, rgb2: List) -> float:
+    # https://en.wikipedia.org/wiki/Color_difference#CIE76
+    color1 = np.array(rgb1, dtype=np.uint8).reshape(1, 1, 3)
+    color2 = np.array(rgb2, dtype=np.uint8).reshape(1, 1, 3)
+    diff = cv2.cvtColor(color1, cv2.COLOR_RGB2LAB).astype(np.float64) - cv2.cvtColor(color2, cv2.COLOR_RGB2LAB).astype(np.float64)
+    diff[..., 0] *= 0.392
+    diff = np.linalg.norm(diff, axis=2)
+    return diff.item()
+def extract_ballon_region(img: np.ndarray, ballon_rect: List, show_process=False, enlarge_ratio=2.0, cal_region_rect=False) -> Tuple[np.ndarray, int, List]:
+    WHITE = (255, 255, 255)
+    BLACK = (0, 0, 0)
+    x1, y1, x2, y2 = ballon_rect[0], ballon_rect[1], \
+        ballon_rect[2] + ballon_rect[0], ballon_rect[3] + ballon_rect[1]
+    if enlarge_ratio > 1:
+        x1, y1, x2, y2 = enlarge_window([x1, y1, x2, y2], img.shape[1], img.shape[0], enlarge_ratio, aspect_ratio=ballon_rect[3] / ballon_rect[2])
+    img = img[y1:y2, x1:x2].copy()
+    kernel = np.ones((3,3),np.uint8)
+    orih, oriw = img.shape[0], img.shape[1]
+    scaleR = 1
+    if orih > 300 and oriw > 300:
+        scaleR = 0.6
+    elif orih < 120 or oriw < 120:
+        scaleR = 1.4
+    if scaleR != 1:
+        h, w = img.shape[0], img.shape[1]
+        orimg = np.copy(img)
+        img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA)
+    h, w = img.shape[0], img.shape[1]
+    img_area = h * w
+    cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
+    detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3)
+    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)
+    cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8)
+    ballon_mask, outer_index = np.zeros((h, w), np.uint8), -1
+    min_retval = np.inf
+    mask = np.zeros((h, w), np.uint8)
+    difres = 10
+    seedpnt = (int(w/2), int(h/2))
+    for ii in range(len(cons)):
+        rect = cv2.boundingRect(cons[ii])
+        if rect[2]*rect[3] < img_area*0.4:
+            continue
+        mask = cv2.drawContours(mask, cons, ii, (255), 2)
+        cpmask = np.copy(mask)
+        cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)
+        retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+        if retval <= img_area * 0.3:
+            mask = cv2.drawContours(mask, cons, ii, (0), 2)
+        if retval < min_retval and retval > img_area * 0.3:
+            min_retval = retval
+            ballon_mask = cpmask
+    ballon_mask = 127 - ballon_mask
+    ballon_mask = cv2.dilate(ballon_mask, kernel,iterations = 1)
+    ballon_area, _, _, rect = cv2.floodFill(ballon_mask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+    ballon_mask = 30 - ballon_mask
+    retval, ballon_mask = cv2.threshold(ballon_mask, 1, 255, cv2.THRESH_BINARY)
+    ballon_mask = cv2.bitwise_not(ballon_mask, ballon_mask)
+    box_kernel = int(np.sqrt(ballon_area) / 30)
+    if box_kernel > 1:
+        box_kernel = np.ones((box_kernel,box_kernel),np.uint8)
+        ballon_mask = cv2.dilate(ballon_mask, box_kernel, iterations = 1)
+        ballon_mask = cv2.erode(ballon_mask, box_kernel, iterations = 1)
+    if scaleR != 1:
+        img = orimg
+        ballon_mask = cv2.resize(ballon_mask, (oriw, orih))
+    if show_process:
+        cv2.imshow('ballon_mask', ballon_mask)
+        cv2.imshow('img', img)
+        cv2.waitKey(0)
+    if cal_region_rect:
+        return ballon_mask, (ballon_mask > 0).sum(), [x1, y1, x2, y2], cv2.boundingRect(ballon_mask)
+    return ballon_mask, (ballon_mask > 0).sum(), [x1, y1, x2, y2]
+def square_pad_resize(img: np.ndarray, tgt_size: int):
+    h, w = img.shape[:2]
+    pad_h, pad_w = 0, 0
+    # make square image
+    if w < h:
+        pad_w = h - w
+        w += pad_w
+    elif h < w:
+        pad_h = w - h
+        h += pad_h
+    pad_size = tgt_size - h
+    if pad_size > 0:
+        pad_h += pad_size
+        pad_w += pad_size
+    if pad_h > 0 or pad_w > 0:
+        img = cv2.copyMakeBorder(img, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT)
+    down_scale_ratio = tgt_size / img.shape[0]
+    assert down_scale_ratio <= 1
+    if down_scale_ratio < 1:
+        img = cv2.resize(img, (tgt_size, tgt_size), interpolation=cv2.INTER_AREA)
+    return img, down_scale_ratio, pad_h, pad_w
+def get_block_mask(xywh: List, mask_array: np.ndarray, angle: int):
+    x, y, w, h = xywh
+    im_h, im_w = mask_array.shape[:2]
+    if angle != 0:
+        cx, cy = x + int(round(w / 2)), y + int(round(h / 2))
+        poly = xywh2xyxypoly(np.array([[x, y, w, h]]))
+        poly = rotate_polygons([cx, cy], poly, -angle)
+        x1, x2 = np.min(poly[..., ::2]), np.max(poly[..., ::2])
+        y1, y2 = np.min(poly[..., 1::2]), np.max(poly[..., 1::2])
+        if x2 < 0 or x2 - x1 < 2 or x1 >= im_w - 1 \
+            or y2 < 0 or y2 - y1 < 2 or y1 >= im_h - 1:
+            return None, None
+        else:
+            poly[..., ::2] -= cx - int((x2 - x1) / 2)
+            poly[..., 1::2] -= cy - int((y2 - y1) / 2)
+            itmsk = np.zeros((y2 - y1, x2 - x1), np.uint8)
+            cv2.fillPoly(itmsk, poly.reshape(-1, 4, 2), color=(255))
+            px1, px2, py1, py2 = 0, itmsk.shape[1], 0, itmsk.shape[0]
+            if x1 < 0:
+                px1 = -x1
+                x1 = 0
+            if x2 > im_w:
+                px2 = im_w - x2
+                x2 = im_w
+            if y1 < 0:
+                py1 = -y1
+                y1 = 0
+            if y2 > im_h:
+                py2 = im_h - y2
+                y2 = im_h
+            itmsk = itmsk[py1: py2, px1: px2]
+            msk = cv2.bitwise_and(mask_array[y1: y2, x1: x2], itmsk)
+    else:
+        x1, y1, x2, y2 = x, y, x+w, y+h
+        if x2 < 0 or x2 - x1 < 2 or x1 >= im_w - 1 \
+            or y2 < 0 or y2 - y1 < 2 or y1 >= im_h - 1:
+            return None, None
+        else:
+            if x1 < 0:
+                x1 = 0
+            if x2 > im_w:
+                x2 = im_w
+            if y1 < 0:
+                y1 = 0
+            if y2 > im_h:
+                y2 = im_h
+            msk = mask_array[y1: y2, x1: x2]
+    return msk, [x1, y1, x2, y2]

utils/io_utils.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import json, os, sys, time, io
+import os.path as osp
+from pathlib import Path
+import importlib
+from typing import List, Dict, Callable, Union
+import base64
+import traceback
+from .logger import logger as LOGGER
+import requests
+from PIL import Image
+import PIL
+import cv2
+import numpy as np
+import pillow_jxl
+from natsort import natsorted
+IMG_EXT = ['.bmp', '.jpg', '.png', '.jpeg', '.webp', '.jxl']
+NP_INT_TYPES = (np.int_, np.int8, np.int16, np.int32, np.int64, np.uint, np.uint8, np.uint16, np.uint32, np.uint64)
+if int(np.version.full_version.split('.')[0]) == 1:
+    NP_BOOL_TYPES = (np.bool_, np.bool8)
+    NP_FLOAT_TYPES = (np.float_, np.float16, np.float32, np.float64)
+else:
+    NP_BOOL_TYPES = (np.bool_, np.bool)
+    NP_FLOAT_TYPES = (np.float16, np.float32, np.float64)
+def to_dict(obj):
+    return json.loads(json.dumps(obj, default=lambda o: o.__dict__, ensure_ascii=False))
+def serialize_np(obj):
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.ScalarType):
+        if isinstance(obj, NP_BOOL_TYPES):
+            return bool(obj)
+        elif isinstance(obj, NP_FLOAT_TYPES):
+            return float(obj)
+        elif isinstance(obj, NP_INT_TYPES):
+            return int(obj)
+    return obj
+def json_dump_nested_obj(obj, **kwargs):
+    def _default(obj):
+        if isinstance(obj, (np.ndarray, np.ScalarType)):
+            return serialize_np(obj)
+        return obj.__dict__
+    return json.dumps(obj, default=lambda o: _default(o), ensure_ascii=False, **kwargs)
+# https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, (np.ndarray, np.ScalarType)):
+            return serialize_np(obj)
+        return json.JSONEncoder.default(self, obj)
+def find_all_imgs(img_dir, abs_path=False, sort=False):
+    imglist = []
+    for filename in os.listdir(img_dir):
+        file_suffix = Path(filename).suffix
+        if file_suffix.lower() not in IMG_EXT:
+            continue
+        if abs_path:
+            imglist.append(osp.join(img_dir, filename))
+        else:
+            imglist.append(filename)
+    if sort:
+        imglist = natsorted(imglist)
+    return imglist
+def find_all_files_recursive(tgt_dir: Union[List, str], ext: Union[List, set], exclude_dirs=None):
+    if isinstance(tgt_dir, str):
+        tgt_dir = [tgt_dir]
+    if exclude_dirs is None:
+        exclude_dirs = set()
+    filelst = []
+    for d in tgt_dir:
+        for root, _, files in os.walk(d):
+            if osp.basename(root) in exclude_dirs:
+                continue
+            for f in files:
+                if Path(f).suffix.lower() in ext:
+                    filelst.append(osp.join(root, f))
+    return filelst
+def imread(imgpath, read_type=cv2.IMREAD_COLOR, max_retry_limit=5, retry_interval=0.1):
+    if not osp.exists(imgpath):
+        return None
+    num_tries = 0
+    while True:
+        try:
+            img = Image.open(imgpath)
+            if read_type != cv2.IMREAD_GRAYSCALE:
+                img = img.convert('RGB')
+            img = np.array(img)
+            break
+        except PIL.UnidentifiedImageError as e:
+            # IMG I/O thread might not finished yet
+            num_tries += 1
+            if max_retry_limit is not None and num_tries >= max_retry_limit:
+                LOGGER.exception(e)
+                return None
+            LOGGER.warning(f'PIL.UnidentifiedImageError: failed to read {imgpath}, retries: {num_tries} / {max_retry_limit}')
+            time.sleep(retry_interval)
+    if read_type == cv2.IMREAD_GRAYSCALE:
+        if img.ndim == 3:
+            if img.shape[-1] == 3:
+                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+            elif img.shape[-1] == 4:
+                img = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY)
+            elif img.shape[-1] == 1:
+                img = img[..., 0]
+            else:
+                raise
+    return img
+def imwrite(img_path, img, ext='.png', quality=100, jxl_encode_effort=3):
+    # cv2 writing is faster than PIL
+    suffix = Path(img_path).suffix
+    ext = ext.lower()
+    assert ext in IMG_EXT
+    if suffix != '':
+        img_path = img_path.replace(suffix, ext)
+    else:
+        img_path += ext
+    encode_param = None
+    if ext in {'.jpg', '.jpeg'}:
+        encode_param = [cv2.IMWRITE_JPEG_QUALITY, quality]
+    elif ext == '.webp':
+        encode_param = [cv2.IMWRITE_WEBP_QUALITY, quality]
+    if ext == '.jxl':
+        # jxl_encode_effort: https://github.com/Isotr0py/pillow-jpegxl-plugin/issues/23
+        # higher values theoretically produce smaller files at the expense of time, 3 seems to strike a balance
+        lossless = quality > 99 # quality=100, lossless=False seems to result in larger file compared with lossless=True
+        Image.fromarray(img).save(img_path, quality=quality, lossless=lossless, effort=jxl_encode_effort)
+    else:
+        if len(img.shape) == 3:
+            if img.shape[-1] == 3:
+                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+            elif img.shape[-1] == 4:
+                img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGRA)
+        cv2.imencode(ext, img, encode_param)[1].tofile(img_path)
+def show_img_by_dict(imgdicts):
+    for keyname in imgdicts.keys():
+        cv2.imshow(keyname, imgdicts[keyname])
+    cv2.waitKey(0)
+def text_is_empty(text) -> bool:
+    if isinstance(text, str):
+        if text.strip() == '':
+            return True
+    if isinstance(text, list):
+        for t in text:
+            t_is_empty = text_is_empty(t)
+            if not t_is_empty:
+                return False
+        return True
+    elif text is None:
+        return True
+def empty_func(*args, **kwargs):
+    return
+def get_obj_from_str(string, reload=False):
+    module, cls = string.rsplit(".", 1)
+    if reload:
+        module_imp = importlib.import_module(module)
+        importlib.reload(module_imp)
+    return getattr(importlib.import_module(module, package=None), cls)
+def get_module_from_str(module_str: str):
+    return importlib.import_module(module_str, package=None)
+def build_funcmap(module_str: str, params_names: List[str], func_prefix: str = '', func_suffix: str = '', fallback_func: Callable = None, verbose: bool = True) -> Dict:
+    if fallback_func is None:
+        fallback_func = empty_func
+    module = get_module_from_str(module_str)
+    funcmap = {}
+    for param in params_names:
+        tgt_func = f'{func_prefix}{param}{func_suffix}'
+        try:
+            tgt_func = getattr(module, tgt_func)
+        except Exception as e:
+            if verbose:
+                print(f'failed to import {tgt_func} from {module_str}: {e}')
+            tgt_func = fallback_func
+        funcmap[param] = tgt_func
+    return funcmap
+def _b64encode(x: bytes) -> str:
+    return base64.b64encode(x).decode("utf-8")
+def img2b64(img):
+    """
+    Convert a PIL image to a base64-encoded string.
+    """
+    if isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    buffered = io.BytesIO()
+    img.save(buffered, format='PNG')
+    return _b64encode(buffered.getvalue())
+def save_encoded_image(b64_image: str, output_path: str):
+    with open(output_path, "wb") as image_file:
+        image_file.write(base64.b64decode(b64_image))
+def submit_request(url, data, exist_on_exception=True, auth=None, wait_time = 5):
+    response = None
+    try:
+        while True:
+            try:
+                response = requests.post(url, data=data, auth=auth)
+                response.raise_for_status()
+                break
+            except Exception as e:
+                if wait_time > 0:
+                    print(traceback.format_exc(), file=sys.stderr)
+                    print(f'sleep {wait_time} sec...')
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    raise e
+    except Exception as e:
+        print(traceback.format_exc(), file=sys.stderr)
+        if response is not None:
+            print('response content: ' + response.text)
+        if exist_on_exception:
+            exit()
+    return response

utils/logger.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import datetime
+import logging
+import os
+import os.path as osp
+from glob import glob
+import termcolor
+if os.name == "nt":  # Windows
+    import colorama
+    colorama.init()
+COLORS = {
+    "WARNING": "yellow",
+    "INFO": "white",
+    "DEBUG": "blue",
+    "CRITICAL": "red",
+    "ERROR": "red",
+}
+class ColoredFormatter(logging.Formatter):
+    def __init__(self, fmt, use_color=True):
+        logging.Formatter.__init__(self, fmt)
+        self.use_color = use_color
+    def format(self, record):
+        levelname = record.levelname
+        if self.use_color and levelname in COLORS:
+            def colored(text):
+                return termcolor.colored(
+                    text,
+                    color=COLORS[levelname],
+                    attrs={"bold": True},
+                )
+            record.levelname2 = colored("{:<7}".format(record.levelname))
+            record.message2 = colored(record.getMessage())
+            asctime2 = datetime.datetime.fromtimestamp(record.created)
+            record.asctime2 = termcolor.colored(asctime2, color="green")
+            record.module2 = termcolor.colored(record.module, color="cyan")
+            record.funcName2 = termcolor.colored(record.funcName, color="cyan")
+            record.lineno2 = termcolor.colored(record.lineno, color="cyan")
+        return logging.Formatter.format(self, record)
+FORMAT = (
+    "[%(levelname2)s] %(module2)s:%(funcName2)s:%(lineno2)s - %(message2)s"
+)
+class ColoredLogger(logging.Logger):
+    def __init__(self, name):
+        logging.Logger.__init__(self, name, logging.INFO)
+        color_formatter = ColoredFormatter(FORMAT)
+        console = logging.StreamHandler()
+        console.setFormatter(color_formatter)
+        self.addHandler(console)
+        return
+def setup_logging(logfile_dir: str, max_num_logs=14):
+    if not osp.exists(logfile_dir):
+        os.makedirs(logfile_dir)
+    else:
+        old_logs = glob(osp.join(logfile_dir, '*.log'))
+        old_logs.sort()
+        n_log = len(old_logs)
+        if n_log >= max_num_logs:
+            to_remove = n_log - max_num_logs + 1
+            try:
+                for ii in range(to_remove):
+                    os.remove(old_logs[ii])
+            except Exception as e:
+                logger.error(e)
+    logfilename = datetime.datetime.now().strftime('_%Y_%m_%d-%H_%M_%S.log')
+    logfilep = osp.join(logfile_dir, logfilename)
+    fh = logging.FileHandler(logfilep, mode='w', encoding='utf-8')
+    fh.setFormatter(
+        logging.Formatter(
+            ("[%(levelname)s] %(module)s:%(funcName)s:%(lineno)s - %(message)s")
+        )
+    )
+    fh.setLevel(logging.DEBUG)
+    logger.addHandler(fh)
+logging.setLoggerClass(ColoredLogger)
+logger = logging.getLogger('BallonTranslator')
+logger.setLevel(logging.DEBUG)
+logger.propagate = False

utils/message.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import traceback
+from typing import Callable, List, Dict
+from . import shared
+from .logger import logger as LOGGER
+def create_error_dialog(exception: Exception, error_msg: str = None, exception_type: str = None):
+    '''
+        Popup a error dialog in main thread
+    Args:
+        error_msg: Description text prepend before str(exception)
+        exception_type: Specify it to avoid errors dialog of the same type popup repeatedly
+    '''
+    detail_traceback = traceback.format_exc()
+    if exception_type is None:
+        exception_type = ''
+    exception_type_empty = exception_type == ''
+    show_exception = exception_type_empty or exception_type not in shared.showed_exception
+    if show_exception:
+        if error_msg is None:
+            error_msg = str(exception)
+        else:
+            error_msg = str(exception) + '\n' + error_msg
+        LOGGER.error(error_msg + '\n')
+        LOGGER.error(detail_traceback)
+        if not shared.HEADLESS:
+            shared.create_errdialog_in_mainthread(error_msg, detail_traceback, exception_type)
+def create_info_dialog(info_msg, btn_type=None, modal: bool = False, frame_less: bool = False, signal_slot_map_list: List[Dict] = None):
+    '''
+        Popup a info dialog in main thread
+    '''
+    LOGGER.info(info_msg)
+    if not shared.HEADLESS:
+        shared.create_infodialog_in_mainthread({'info_msg': info_msg, 'btn_type': btn_type, 'modal': modal, 'frame_less': frame_less, 'signal_slot_map_list': signal_slot_map_list})
+def connect_once(signal, exec_func: Callable):
+    '''
+    signal.emit will only trigger exec_func once
+    '''
+    def _disconnect_after_called(*func_args, **func_kwargs):
+        def _try_disconnect():
+            try:
+                signal.disconnect(connect_func)
+            except:
+                print('Failed to disconnect')
+                print(traceback.format_exc())
+        try:
+            exec_func(*func_args, **func_kwargs)
+        except Exception as e:
+            _try_disconnect()
+            raise e
+        _try_disconnect()
+    connect_func = _disconnect_after_called
+    signal.connect(_disconnect_after_called)

utils/package.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# copied from https://github.com/HansBug/hbutils/blob/main/hbutils/system/python/package.py
+# to replace the deprecated pkg_resources
+import functools
+import itertools
+import os
+import pathlib
+import subprocess
+import sys
+from typing import List, Optional
+from packaging.requirements import Requirement
+from packaging.utils import canonicalize_name
+try:
+    import importlib.metadata as importlib_metadata
+except (ModuleNotFoundError, ImportError):
+    import importlib_metadata
+from packaging.version import Version
+def package_version(name: str) -> Optional[Version]:
+    """
+    Overview:
+        Get version of package with given ``name``.
+    :param name: Name of the package, case is not sensitive.
+    :return: A :class:`packing.version.Version` object. If the package is not installed, return ``None``.
+    Examples::
+        >>> from hbutils.system import package_version
+        >>>
+        >>> package_version('pip')
+        <Version('21.3.1')>
+        >>> package_version('setuptools')
+        <Version('59.6.0')>
+        >>> package_version('not_a_package')
+        None
+    """
+    try:
+        return Version(importlib_metadata.distribution(canonicalize_name(name)).version)
+    except importlib_metadata.PackageNotFoundError:
+        return None
+def _nonblank(text):
+    return text and not text.startswith('#')
+@functools.singledispatch
+def yield_lines(iterable):
+    r"""
+    Based on https://github.com/jaraco/jaraco.text/blob/main/jaraco/text/__init__.py#L537 .
+    Yield valid lines of a string or iterable.
+    >>> list(yield_lines(''))
+    []
+    >>> list(yield_lines(['foo', 'bar']))
+    ['foo', 'bar']
+    >>> list(yield_lines('foo\nbar'))
+    ['foo', 'bar']
+    >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
+    ['foo', 'baz #comment']
+    >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
+    ['foo', 'bar', 'baz', 'bing']
+    """
+    return itertools.chain.from_iterable(map(yield_lines, iterable))
+@yield_lines.register(str)
+def _(text):
+    return filter(_nonblank, map(str.strip, text.splitlines()))
+def drop_comment(line):
+    """
+    Based on https://github.com/jaraco/jaraco.text/blob/main/jaraco/text/__init__.py#L560 .
+    Drop comments.
+    >>> drop_comment('foo # bar')
+    'foo'
+    A hash without a space may be in a URL.
+    >>> drop_comment('https://example.com/foo#bar')
+    'https://example.com/foo#bar'
+    """
+    return line.partition(' #')[0]
+def join_continuation(lines):
+    r"""
+    Based on https://github.com/jaraco/jaraco.text/blob/main/jaraco/text/__init__.py#L575 .
+    Join lines continued by a trailing backslash.
+    >>> list(join_continuation(['foo \\', 'bar', 'baz']))
+    ['foobar', 'baz']
+    >>> list(join_continuation(['foo \\', 'bar', 'baz']))
+    ['foobar', 'baz']
+    >>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
+    ['foobarbaz']
+    Not sure why, but...
+    The character preceding the backslash is also elided.
+    >>> list(join_continuation(['goo\\', 'dly']))
+    ['godly']
+    A terrible idea, but...
+    If no line is available to continue, suppress the lines.
+    >>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
+    ['foo']
+    """
+    lines = iter(lines)
+    for item in lines:
+        while item.endswith('\\'):
+            try:  # pragma: no cover
+                item = item[:-2].strip() + next(lines)
+            except StopIteration:
+                return
+        yield item
+def load_req_file(requirements_file: str) -> List[str]:
+    """
+    Overview:
+        Load requirements items from a ``requirements.txt`` file.
+    :param requirements_file: Requirements file.
+    :return requirements: List of requirements.
+    Examples::
+        >>> from hbutils.system import load_req_file
+        >>> load_req_file('requirements.txt')
+        ['packaging>=21.3', 'setuptools>=50.0']
+    """
+    with pathlib.Path(requirements_file).open() as reqfile:
+        return list(map(
+            lambda x: str(Requirement(x)),
+            join_continuation(map(drop_comment, yield_lines(reqfile)))
+        ))
+def pip(*args, silent: bool = False):
+    """
+    Overview:
+        Run pip command with code.
+    :param args: Command line arguments for ``pip`` command.
+    :param silent: Do not print anything. Default is false, which means print the output to ``sys.stdout`` \
+        and ``sys.stderr``.
+    Examples::
+        >>> from hbutils.system import pip
+        >>> pip('-V')
+        pip 22.3.1 from /home/user/myproject/venv/lib/python3.7/site-packages/pip (python 3.7)
+        >>> pip('-V', silent=True)  # nothing will be printed
+    """
+    process = subprocess.run(
+        [sys.executable, '-m', 'pip', *args],
+        stdin=sys.stdin if not silent else None,
+        stdout=sys.stdout if not silent else subprocess.PIPE,
+        stderr=sys.stderr if not silent else subprocess.PIPE,
+    )
+    assert not process.returncode, f'Error when calling {process.args!r}{os.linesep}' \
+                                   f'Error Code - {process.returncode}{os.linesep}' \
+                                   f'Stdout:{os.linesep}' \
+                                   f'{process.stdout.decode()}{os.linesep}' \
+                                   f'{os.linesep}' \
+                                   f'Stderr:{os.linesep}' \
+                                   f'{process.stderr.decode()}{os.linesep}'
+    process.check_returncode()
+def _yield_reqs_to_install(req: Requirement, current_extra: str = ''):
+    if req.marker and not req.marker.evaluate({'extra': current_extra}):
+        return
+    try:
+        version = importlib_metadata.distribution(req.name).version
+    except importlib_metadata.PackageNotFoundError:  # req not installed
+        yield req
+    else:
+        if req.specifier.contains(version, prereleases=True):
+            for child_req in (importlib_metadata.metadata(req.name).get_all('Requires-Dist') or []):
+                child_req_obj = Requirement(child_req)
+                need_check, ext = False, None
+                for extra in req.extras:
+                    if child_req_obj.marker and child_req_obj.marker.evaluate({'extra': extra}):
+                        need_check = True
+                        ext = extra
+                        break
+                if need_check:  # check for extra reqs
+                    yield from _yield_reqs_to_install(child_req_obj, ext)
+        else:  # main version not match
+            yield req
+def _check_req(req: Requirement):
+    return not bool(list(itertools.islice(_yield_reqs_to_install(req), 1)))
+def check_reqs(reqs: List[str]) -> bool:
+    """
+    Overview:
+        Check if the given requirements are all satisfied.
+    :param reqs: List of requirements.
+    :return satisfied: All the requirements in ``reqs`` satisfied or not.
+    Examples::
+        >>> from hbutils.system import check_reqs
+        >>> check_reqs(['pip>=20.0'])
+        True
+        >>> check_reqs(['pip~=19.2'])
+        False
+        >>> check_reqs(['pip>=20.0', 'setuptools>=50.0'])
+        True
+    .. note::
+        If a requirement's marker is not satisfied in this environment,
+        **it will be ignored** instead of return ``False``.
+    """
+    return all(map(lambda x: _check_req(Requirement(x)), reqs))
+def check_req_file(requirements_file: str) -> bool:
+    """
+    Overview:
+        Check if the requirements in the given ``requirements_file`` is satisfied.
+    :param requirements_file: Requirements file, such as ``requirements.txt``.
+    :return satisfied: All the requirements in ``requirements_file`` satisfied or not.
+    Examples::
+        >>> from hbutils.system import check_req_file
+        >>>
+        >>> check_req_file('requirements.txt')
+        True
+        >>> check_req_file('requirements-test.txt')
+        True
+    """
+    return check_reqs(load_req_file(requirements_file))
+def pip_install(reqs: List[str], silent: bool = False, force: bool = False, user: bool = False):
+    """
+    Overview:
+        Pip install requirements with code.
+        Similar to ``pip install req1 req2 ...``.
+    :param reqs: Requirement items to install.
+    :param silent: Do not print anything. Default is ``False``.
+    :param force: Force execute the ``pip install`` command. Default is ``False`` which means the requirements \
+        will be checked before installation, and the installation will be only executed when \
+        some requirements not installed.
+    :param user: User mode, represents ``--user`` option in ``pip``.
+    Examples::
+        >>> from hbutils.system import pip_install
+        >>> pip_install(['scikit-learn'])  # not installed
+        Looking in indexes: https://xxx/simple
+        Collecting scikit-learn
+          Using cached https://xxx/scikit_learn-1.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (24.8 MB)
+        Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
+        Successfully installed joblib-1.2.0 scikit-learn-1.0.2 scipy-1.7.3 threadpoolctl-3.1.0
+        >>> pip_install(['numpy>=1.10.0'])  # installed
+        >>> pip_install(['numpy>=1.10.0'], force=True)  # force execute
+        Looking in indexes: https://xxx/simple
+        Requirement already satisfied: numpy>=1.10.0 in ./venv/lib/python3.7/site-packages (1.21.6)
+    """
+    if force or not check_reqs(reqs):
+        pip('install', *(('--user',) if user else ()), *reqs, silent=silent)
+def pip_install_req_file(requirements_file: str, silent: bool = False, force: bool = False, user: bool = False):
+    """
+    Overview:
+        Pip install requirements from file with code.
+        Similar to ``pip install -r requirements.txt``.
+    :param requirements_file: Requirements file, such as ``requirements.txt``.
+    :param silent: Do not print anything. Default is ``False``.
+    :param force: Force execute the ``pip install`` command. Default is ``False`` which means the requirements \
+        will be checked before installation, and the installation will be only executed when \
+        some requirements not installed.
+    :param user: User mode, represents ``--user`` option in ``pip``.
+    Examples::
+        >>> from hbutils.system import pip_install_req_file
+        >>> pip_install_req_file('requirements.txt')  # pip install -r requirements.txt
+    """
+    if force or not check_req_file(requirements_file):
+        pip('install', *(('--user',) if user else ()), '-r', requirements_file, silent=silent)

utils/proj_imgtrans.py ADDED Viewed

	@@ -0,0 +1,720 @@

+import os, json, shutil, re, docx, docx2txt, piexif, cv2
+from docx.shared import Inches
+from docx import Document
+import piexif.helper
+import numpy as np
+import os.path as osp
+from typing import Tuple, Union, List, Dict
+from PIL import Image
+from utils.watermark_utils import apply_watermark_to_pil_image
+from .logger import logger as LOGGER
+from .io_utils import find_all_imgs, imread, imwrite, NumpyEncoder
+from .textblock import TextBlock, FontFormat
+from .config import pcfg
+from . import shared
+from .exceptions import ImgnameNotInProjectException, ProjectLoadFailureException, ProjectDirNotExistException, ProjectNotSupportedException
+class ImageLoadException(Exception):
+    def __init__(self, img_path):
+        super().__init__(f"Failed to load image: {img_path}")
+        self.img_path = img_path
+def get_last_modified_file(file_prefix, exts, ext_fallback=None):
+    '''
+    get last modified file from files sharing same prefix
+    '''
+    latest_time = -1
+    latest_f = None
+    for ext in exts:
+        tmp_p = file_prefix + ext
+        if osp.exists(tmp_p) and osp.getmtime(tmp_p) > latest_time:
+            latest_time = osp.getmtime(tmp_p)
+            latest_f = tmp_p
+    if latest_f is None:
+        if ext_fallback is not None:
+            latest_f = file_prefix + ext_fallback
+        else:
+            latest_f = file_prefix + exts[0]
+    return latest_f
+def write_jpg_metadata(imgpath: str, metadata="a metadata"):
+    exif_dict = {"Exif":{piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(metadata, encoding='unicode')}}
+    exif_bytes = piexif.dump(exif_dict)
+    piexif.insert(exif_bytes, imgpath)
+def read_jpg_metadata(imgpath: str):
+    exif_dict = piexif.load(imgpath)
+    user_comment = piexif.helper.UserComment.load(exif_dict["Exif"][piexif.ExifIFD.UserComment])
+    bubdict = json.loads(user_comment)
+    return bubdict
+page_start_pattern = re.compile(r'^###\s+', re.MULTILINE)
+text_blkid_start_pattern = re.compile(r'^\d+\.', re.MULTILINE)
+def parse_txt_translation(file_path: str):
+    with open(file_path, 'r', encoding='utf8') as f:
+        content = f.read()
+    page_start = None
+    page_list = []
+    for matched in page_start_pattern.finditer(content):
+        start, end = matched.span()
+        if page_start is not None:
+            page_list.append({'page_content': content[page_start: start]})
+        page_start = start
+    if page_start is not None:
+        page_list.append({'page_content': content[page_start:]})
+    for page_dict in page_list:
+        page_content = page_dict['page_content']
+        page_dict['page_name'] = page_start_pattern.sub('', page_content.split('\n')[0]).strip()
+        blkid_start = blkid_end = None
+        blk_list = []
+        for matched in text_blkid_start_pattern.finditer(page_content):
+            start, end = matched.span()
+            if blkid_start is not None:
+                blk_list.append(page_content[blkid_end: start].strip())
+            blkid_start = start
+            blkid_end = end
+        if blkid_start is not None:
+            blk_list.append(page_content[blkid_end:].strip())
+        page_dict['blk_list'] = blk_list
+    return page_list
+class TextBlkEncoder(NumpyEncoder):
+    def default(self, obj):
+        if isinstance(obj, TextBlock):
+            return obj.to_dict()
+        elif isinstance(obj, FontFormat):
+            return vars(obj)
+        return NumpyEncoder.default(self, obj)
+class ProjImgTrans:
+    def __init__(self, directory: str = None):
+        self.type = 'imgtrans'
+        self.directory: str = None
+        self.pages: Dict[str, List[TextBlock]] = {}
+        self._pagename2idx = {}
+        self._idx2pagename = {}
+        self._fuzzy_inpainted_list = None
+        self.not_found_pages: Dict[str, List[TextBlock]] = {}
+        self.new_pages: List[str] = []
+        self.proj_path: str = None
+        self.current_img: str = None
+        self.img_array: np.ndarray = None
+        self.mask_array: np.ndarray = None
+        self.inpainted_array: np.ndarray = None
+        # Watermark settings
+        self.enable_watermark = False
+        self.watermark_path = ""
+        self.watermark_opacity = 0.7
+        if directory is not None:
+            self.load(directory)
+    def idx2pagename(self, idx: int) -> str:
+        return self._idx2pagename[idx]
+    def pagename2idx(self, pagename: str) -> int:
+        if pagename in self.pages:
+            return self._pagename2idx[pagename]
+        return -1
+    def proj_name(self) -> str:
+        return self.type+'_'+osp.basename(self.directory)
+    def load(self, directory: str, json_path: str = None) -> bool:
+        self.directory = directory
+        if json_path is None:
+            self.proj_path = osp.join(self.directory, self.proj_name() + '.json')
+        else:
+            self.proj_path = json_path
+        new_proj = False
+        if not osp.exists(self.proj_path):
+            new_proj = True
+            self.new_project()
+        else:
+            try:
+                with open(self.proj_path, 'r', encoding='utf8') as f:
+                    proj_dict = json.loads(f.read())
+            except Exception as e:
+                raise ProjectLoadFailureException(e)
+            self.load_from_dict(proj_dict)
+        if not osp.exists(self.inpainted_dir()):
+            os.makedirs(self.inpainted_dir())
+        if not osp.exists(self.mask_dir()):
+            os.makedirs(self.mask_dir())
+        # Fix: use self instead of proj and check proj_dict existence
+        if 'enable_watermark' in locals() and 'proj_dict' in locals() and 'enable_watermark' in proj_dict:
+            self.enable_watermark = proj_dict['enable_watermark']
+        if 'proj_dict' in locals() and 'watermark_path' in proj_dict:
+            self.watermark_path = proj_dict['watermark_path']
+        if 'proj_dict' in locals() and 'watermark_opacity' in proj_dict:
+            self.watermark_opacity = proj_dict['watermark_opacity']
+        return new_proj
+    def mask_dir(self):
+        return osp.join(self.directory, 'mask')
+    def inpainted_dir(self):
+        return osp.join(self.directory, 'inpainted')
+    def result_dir(self):
+        return osp.join(self.directory, 'result')
+    def load_from_dict(self, proj_dict: dict):
+        self.set_current_img(None)
+        try:
+            self.pages = {}
+            self._pagename2idx = {}
+            self._idx2pagename = {}
+            self.not_found_pages = {}
+            page_dict = proj_dict['pages']
+            not_found_pages = list(page_dict.keys())
+            found_pages = find_all_imgs(img_dir=self.directory, abs_path=False, sort=True)
+            for ii, imname in enumerate(found_pages):
+                if imname in page_dict:
+                    self.pages[imname] = [TextBlock(**blk_dict) for blk_dict in page_dict[imname]]
+                    not_found_pages.remove(imname)
+                else:
+                    self.pages[imname] = []
+                    self.new_pages.append(imname)
+                self._pagename2idx[imname] = ii
+                self._idx2pagename[ii] = imname
+            for imname in not_found_pages:
+                self.not_found_pages[imname] = [TextBlock(**blk_dict) for blk_dict in page_dict[imname]]
+        except Exception as e:
+            raise ProjectNotSupportedException(e)
+        set_img_failed = False
+        if 'current_img' in proj_dict:
+            current_img = proj_dict['current_img']
+            try:
+                self.set_current_img(current_img)
+            except (ImgnameNotInProjectException, RuntimeError) as e:
+                LOGGER.error(f"Failed to set current image {current_img}: {e}")
+                set_img_failed = True
+        else:
+            set_img_failed = True
+            LOGGER.warning(f'{current_img} not found.')
+        if set_img_failed:
+            if len(self.pages) > 0:
+                try:
+                    self.set_current_img_byidx(0)
+                except RuntimeError as e:
+                    LOGGER.error(f"Failed to set current image by index 0: {e}")
+    def load_translation_from_txt(self, file_path: str):
+        page_list = parse_txt_translation(file_path)
+        missing_pages = []
+        unmatched_pages = []
+        unexpected_pages = []
+        matched_pages = []
+        for page_dict in page_list:
+            page_name = page_dict['page_name']
+            if page_name in self.pages:
+                matched_pages.append(page_name)
+            else:
+                unexpected_pages.append(page_name)
+                continue
+            blklist = self.pages[page_name]
+            n_blk = len(blklist)
+            src_blk_list = page_dict['blk_list']
+            n_src_blk = len(src_blk_list)
+            if n_src_blk != n_blk:
+                LOGGER.warning(f'Unmatched text blocks in {page_name}, number of text blocks in this page vs source file: {n_blk}-{n_src_blk}')
+                unmatched_pages.append(page_name)
+            for blkid in range(min(n_blk, n_src_blk)):
+                blk = blklist[blkid]
+                blk.rich_text = ''
+                blk.translation = src_blk_list[blkid]
+        matched_pages = set(matched_pages)
+        if len(matched_pages) != self.num_pages:
+            for page_name in self.pages:
+                if page_name not in matched_pages:
+                    missing_pages.append(page_name)
+        all_matched = len(missing_pages) == 0 and len(unmatched_pages) == 0 and len(unexpected_pages) == 0
+        return all_matched, {'missing_pages': missing_pages, 'unmatched_pages': unmatched_pages, 'unexpected_pages': unexpected_pages, 'matched_pages': matched_pages}
+    def load_from_json(self, json_path: str):
+        old_dir = self.directory
+        directory = osp.dirname(json_path)
+        try:
+            self.load(directory, json_path=json_path)
+        except Exception as e:
+            self.load(old_dir)
+            raise ProjectLoadFailureException(e)
+    def set_current_img(self, imgname: str):
+        if imgname is not None:
+            if imgname not in self.pages:
+                raise ImgnameNotInProjectException
+            self.current_img = imgname
+            img_path = self.current_img_path()
+            mask_path = self.get_mask_path(get_last_modified=True)
+            self.img_array = imread(img_path)
+            if self.img_array is None:
+                raise RuntimeError(f"Failed to load image: {img_path}")
+            im_h, im_w = self.img_array.shape[:2]
+            if osp.exists(mask_path):
+                self.mask_array = imread(mask_path, cv2.IMREAD_GRAYSCALE)
+            else:
+                self.mask_array = np.zeros((im_h, im_w), dtype=np.uint8)
+            self.inpainted_array = self.load_inpainted_by_imgname(imgname)
+            if self.inpainted_array is None:
+                self.inpainted_array = np.copy(self.img_array)
+        else:
+            self.current_img = None
+            self.img_array = None
+            self.mask_array = None
+            self.inpainted_array = None
+    def set_current_img_byidx(self, idx: int):
+        num_pages = self.num_pages
+        if idx < 0:
+            idx = idx + self.num_pages
+        if idx < 0 or idx > num_pages - 1:
+            self.set_current_img(None)
+        else:
+            self.set_current_img(self.idx2pagename(idx))
+    def get_blklist_byidx(self, idx: int) -> List[TextBlock]:
+        return self.pages[self.idx2pagename(idx)]
+    @property
+    def num_pages(self) -> int:
+        return len(self.pages)
+    @property
+    def current_idx(self) -> int:
+        return self.pagename2idx(self.current_img)
+    def new_project(self):
+        if not osp.exists(self.directory):
+            raise ProjectDirNotExistException
+        self.set_current_img(None)
+        imglist = find_all_imgs(self.directory, abs_path=False, sort=True)
+        self.pages = {}
+        self._pagename2idx = {}
+        self._idx2pagename = {}
+        for ii, imgname in enumerate(imglist):
+            self.pages[imgname] = []
+            self._pagename2idx[imgname] = ii
+            self._idx2pagename[ii] = imgname
+        self.set_current_img_byidx(0)
+        self.save()
+    def save(self):
+        if not osp.exists(self.directory):
+            raise ProjectDirNotExistException
+        with open(self.proj_path, "w", encoding="utf-8") as f:
+            f.write(json.dumps(self.to_dict(), ensure_ascii=False, cls=TextBlkEncoder))
+            LOGGER.debug(f'project saved to {self.proj_path}')
+    def to_dict(self) -> Dict:
+        pages = self.pages.copy()
+        pages.update(self.not_found_pages)
+        return {
+            'directory': self.directory,
+            'pages': pages,
+            'current_img': self.current_img,
+            'enable_watermark': self.enable_watermark,
+            'watermark_path': self.watermark_path,
+            'watermark_opacity': self.watermark_opacity
+        }
+    def read_img(self, imgname: str) -> np.ndarray:
+        if imgname not in self.pages:
+            raise ImgnameNotInProjectException
+        return imread(osp.join(self.directory, imgname))
+    def save_mask(self, img_name, mask: np.ndarray):
+        imwrite(self.get_mask_path(img_name), mask, ext=pcfg.intermediate_imgsave_ext)
+    def save_inpainted(self, img_name, inpainted: np.ndarray):
+        imwrite(self.get_inpainted_path(img_name), inpainted, ext=pcfg.intermediate_imgsave_ext)
+    def current_img_path(self) -> str:
+        if self.current_img is None:
+            return None
+        return osp.join(self.directory, self.current_img)
+    def get_mask_path(self, imgname: str = None, get_last_modified=False) -> str:
+        if imgname is None:
+            imgname = self.current_img
+        fileprefix = osp.join(self.mask_dir(), osp.splitext(imgname)[0])
+        if get_last_modified:
+            p = get_last_modified_file(fileprefix, ['.jxl', '.png'], ext_fallback=pcfg.intermediate_imgsave_ext)
+        else:
+            p = fileprefix+pcfg.intermediate_imgsave_ext
+        return p
+    def load_mask_by_imgname(self, imgname: str) -> np.ndarray:
+        mask = None
+        mp = self.get_mask_path(imgname, get_last_modified=True)
+        if osp.exists(mp):
+            mask = imread(mp, cv2.IMREAD_GRAYSCALE)
+        return mask
+    def get_inpainted_path(self, imgname: str = None, get_last_modified=False) -> str:
+        if imgname is None:
+            imgname = self.current_img
+        fileprefix = osp.join(self.inpainted_dir(), osp.splitext(imgname)[0])
+        if get_last_modified:
+            p = get_last_modified_file(fileprefix, ['.jxl', '.png'], ext_fallback=pcfg.intermediate_imgsave_ext)
+        else:
+            p = fileprefix+pcfg.intermediate_imgsave_ext
+        if not osp.exists(p) and shared.FUZZY_MATCH_IMAGE_NAME:
+            if self._fuzzy_inpainted_list is None:
+                if osp.exists(self.inpainted_dir()):
+                    self._fuzzy_inpainted_list = find_all_imgs(self.inpainted_dir(), sort=True)
+                else:
+                    self._fuzzy_inpainted_list = []
+            pidx = self.pagename2idx(imgname)
+            if pidx < len(self._fuzzy_inpainted_list):
+                return osp.join(self.inpainted_dir(), self._fuzzy_inpainted_list[pidx])
+        return p
+    def load_inpainted_by_imgname(self, imgname: str, scale_to_src: bool = True) -> np.ndarray:
+        inpainted = None
+        mp = self.get_inpainted_path(imgname, get_last_modified=True)
+        if mp is not None and osp.exists(mp):
+            inpainted = imread(mp)
+            if imgname == self.current_img and self.img_array is not None:
+                h, w = self.img_array.shape[:2]
+            else:
+                i = Image.open(osp.join(self.directory, imgname))
+                h, w = i.height, i.width
+            ih, iw = inpainted.shape[:2]
+            if ih != h or iw != w:
+                inpainted = Image.fromarray(inpainted).resize((w, h), resample=Image.Resampling.LANCZOS)
+                inpainted = np.array(inpainted)
+        return inpainted
+    def get_result_path(self, imgname: str) -> str:
+        ext = '.png'
+        if pcfg is not None:
+            if pcfg.imgsave_ext not in {'.jpg', '.png', '.webp', '.jxl'}:
+                LOGGER.warning('invalid image saving ext in config.json')
+            else:
+                ext = pcfg.imgsave_ext
+        return osp.join(self.result_dir(), osp.splitext(imgname)[0]+ext)
+    def backup(self):
+        raise NotImplementedError
+    @property
+    def is_empty(self):
+        return len(self.pages) == 0
+    @property
+    def is_all_pages_no_text(self):
+        return all([len(blklist) == 0 for blklist in self.pages.values()])
+    @property
+    def img_valid(self):
+        return self.img_array is not None
+    @property
+    def mask_valid(self):
+        return self.mask_array is not None
+    @property
+    def inpainted_valid(self):
+        return self.inpainted_array is not None
+    def set_next_img(self):
+        if self.current_img is not None:
+            next_idx = (self.current_idx + 1) % self.num_pages
+            self.set_current_img(self.idx2pagename(next_idx))
+    def set_prev_img(self):
+        if self.current_img is not None:
+            next_idx = (self.current_idx - 1 + self.num_pages) % self.num_pages
+            self.set_current_img(self.idx2pagename(next_idx))
+    def current_block_list(self) -> List[TextBlock]:
+        if self.current_img is not None:
+            assert self.current_img in self.pages
+            return self.pages[self.current_img]
+        else:
+            return None
+    def doc_path(self) -> str:
+        return os.path.join(self.directory, self.proj_name() + ".docx")
+    def doc_exist(self) -> bool:
+        return osp.exists(self.doc_path())
+    def dump_doc(self, delete_tmp_folder=True, fin_page_signal=None):
+        cuts_dir = os.path.join(self.directory, "bubcuts")
+        if os.path.exists(cuts_dir):
+            shutil.rmtree(cuts_dir)
+        os.mkdir(cuts_dir)
+        document = Document()
+        style = document.styles['Normal']
+        font = style.font
+        target_font = 'Arial'
+        font.name = target_font
+        for pagename, blklist in self.pages.items():
+            imgpath = os.path.join(self.directory, pagename)
+            cuts_path_list, cut_width_list = gen_ballon_cuts(cuts_dir, imgpath, blklist)
+            paragraph = document.add_paragraph(pagename)
+            paragraph.style = document.styles['Normal']
+            table = document.add_table(rows=len(cuts_path_list), cols=2, style='Table Grid')
+            for index, (cut_path, width) in enumerate(zip(cuts_path_list, cut_width_list)):
+                run = table.cell(index, 0).paragraphs[0].add_run()
+                run.style.font.name = target_font
+                blk: TextBlock = blklist[index]
+                bubdict = vars(blk).copy()
+                bubdict["imgkey"] = pagename
+                bubdict["rich_text"] = ''
+                bubdict["text"] = blk.get_text()
+                write_jpg_metadata(cut_path, metadata=json.dumps(bubdict, ensure_ascii=False, cls=TextBlkEncoder))
+                run.add_picture(cut_path, width=Inches(width/96 * 0.85))
+                table.cell(index, 1).text = bubdict["translation"]
+            document.add_page_break()
+            if fin_page_signal is not None:
+                fin_page_signal.emit()
+                # time.sleep(1)
+        doc_path = self.doc_path()
+        document.save(doc_path)
+        if delete_tmp_folder:
+            shutil.rmtree(cuts_dir)
+    def dump_txt_path(self, dump_target, suffix):
+        save_path = osp.join(self.directory, self.proj_name() + f'_{dump_target}{suffix}')
+        return save_path
+    def dump_txt(self, dump_target: str, suffix='.txt'):
+        save_path = self.dump_txt_path(dump_target, suffix=suffix)
+        text_all = []
+        assert dump_target in {'source', 'translation'}
+        assert suffix in {'.txt', '.md'}
+        for page_name, blk_list in self.pages.items():
+            text_in_page = ['### ' + page_name]
+            for ii, blk in enumerate(blk_list):
+                if dump_target == 'translation':
+                    text = blk.translation.strip()
+                elif dump_target == 'source':
+                    text = blk.get_text().strip()
+                text_in_page.append(f'{ii + 1}. {text}')
+            text_all.append('\n\n'.join(text_in_page))
+        with open(save_path, 'w', encoding='utf8') as f:
+            f.write('\n\n\n'.join(text_all))
+    def load_doc(self, doc_path, delete_tmp_folder=True, fin_page_signal=None):
+        tmp_bubble_folder = osp.join(self.directory, 'img_folder')
+        os.makedirs(tmp_bubble_folder, exist_ok=True)
+        docx2txt.process(doc_path, tmp_bubble_folder)
+        doc = docx.Document(doc_path)
+        body_xml_str = doc._body._element.xml
+        pages = {}
+        bub_index = 0
+        for tbl in re.findall(r'<w:tbl>(.*?)</w:tbl>', body_xml_str, re.DOTALL):
+            for tr in re.findall(r'<w:tr(.*?)>(.*?)</w:tr>', tbl, re.DOTALL):
+                if re.findall(r'<pic:cNvPr id=\"(.*?)\" name=\"(.*?)\"(.*?)>', tr[1]):
+                    bub_index += 1
+                    translation = ""
+                    for paragraph in re.findall(r'<w:p(.*?)>(.*?)</w:p>', tr[1], re.DOTALL):
+                        for wt in re.findall(r'<w:t>(.*?)</w:t>', paragraph[1], re.DOTALL):
+                            translation += wt
+                        translation += "\n"
+                    translation = translation[:-1]
+                    if len(translation) != 0 and translation[0] == "\n":
+                        translation = translation[1:]
+                    bubpath = os.path.join(tmp_bubble_folder, "image"+str(bub_index))
+                    if osp.exists(bubpath+'.jpg'):
+                        bubpath = bubpath + '.jpg'
+                    else:
+                        bubpath = bubpath + '.jpeg'
+                    meta_dict = read_jpg_metadata(bubpath)
+                    meta_dict["translation"] = translation
+                    imgkey = meta_dict.pop("imgkey")
+                    if not imgkey in pages:
+                        pages[imgkey] = []
+                    pages[imgkey].append(TextBlock(**meta_dict))
+                    if fin_page_signal is not None:
+                        fin_page_signal.emit()
+        self.merge_from_proj_dict(pages)
+        if delete_tmp_folder:
+            shutil.rmtree(tmp_bubble_folder)
+    def merge_from_proj_dict(self, tgt_dict: Dict) -> Dict:
+        if self.pages is None:
+            self.pages = {}
+        src_dict = self.pages if self.pages is not None else {}
+        key_lst = list(dict.fromkeys(list(src_dict.keys()) + list(tgt_dict.keys())))
+        key_lst.sort()
+        rst_dict = {}
+        pagename2idx = {}
+        idx2pagename = {}
+        page_counter = 0
+        for key in key_lst:
+            if key in src_dict and not key in tgt_dict:
+                rst_dict[key] = src_dict[key]
+            else:
+                rst_dict[key] = tgt_dict[key]
+            pagename2idx[key] = page_counter
+            idx2pagename[page_counter] = key
+            page_counter += 1
+        self.pages.clear()
+        self.pages.update(rst_dict)
+        self._pagename2idx = pagename2idx
+        self._idx2pagename = idx2pagename
+def gen_ballon_cuts(cuts_dir: str, imgpath: str, blk_list: List[TextBlock], resize=True) -> Tuple[List[str], List[int]]:
+    img = imread(imgpath)
+    imgname = os.path.basename(imgpath)
+    cuts_path_list = []
+    cut_width_list = []
+    for ii, blk in enumerate(blk_list):
+        x, y, w, h = blk.bounding_rect()
+        x, y = max(x, 0), max(y, 0)
+        w = max(w, 1)
+        h = max(h, 1)
+        x1, y1, x2, y2 = int(x), int(y), int(x+w), int(y+h)
+        cut_path = os.path.join(cuts_dir, f'{imgname}-{ii}.jpg')
+        bub = img[y1:y2, x1:x2]
+        max_width = 448
+        if bub.shape[0] < 1 or bub.shape[1] < 1:
+            emptyw = 60
+            resized = np.full((emptyw, emptyw, 3), fill_value=0, dtype=np.uint8)
+            width = emptyw
+        else:
+            # scale_percent = 60 # percent of original size
+            scale_percent = min(1920 / img.shape[0], max_width / w)
+            if scale_percent < 1:
+                width = max(1, int(bub.shape[1] * scale_percent))
+                height = max(1, int(bub.shape[0] * scale_percent))
+                dim = (width, height)
+                resized = cv2.resize(bub, dim, interpolation = cv2.INTER_AREA) if resize else bub
+            else:
+                width = w
+                resized = bub
+        imwrite(cut_path, resized, '.jpg')
+        cuts_path_list.append(cut_path)
+        cut_width_list.append(width)
+    return cuts_path_list, cut_width_list
+def save_image_with_watermark(
+    img: 'Union[np.ndarray, Image.Image]',
+    output_path: str,
+    watermark_path: str = None,
+    watermark_opacity: float = 0.7,
+    quality: int = 95
+) -> bool:
+    """Save image with optional watermark applied."""
+    try:
+        import cv2
+        from PIL import Image
+        if isinstance(img, np.ndarray):
+            if img.ndim == 2:
+                img_pil = Image.fromarray(img)
+            elif img.shape[2] == 4:
+                img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA))
+            else:
+                img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        else:
+            img_pil = img
+        import os.path as osp
+        from utils.watermark_utils import apply_watermark_to_pil_image
+        if watermark_path and osp.exists(watermark_path):
+            img_pil = apply_watermark_to_pil_image(img_pil, watermark_path, watermark_opacity)
+        ext = osp.splitext(output_path)[1].lower()
+        img_format = "PNG"
+        if ext in ['.jpg', '.jpeg']:
+            img_format = "JPEG"
+            if img_pil.mode == 'RGBA':
+                img_pil = img_pil.convert('RGB')
+        elif ext == '.webp':
+            img_format = "WEBP"
+        elif ext == '.jxl':
+            img_format = "JPEG2000"
+        save_kwargs = {'format': img_format}
+        if img_format in ['JPEG', 'WEBP', 'JPEG2000']:
+            save_kwargs['quality'] = quality
+        elif img_format == 'PNG':
+            save_kwargs['compress_level'] = 3
+        img_pil.save(output_path, **save_kwargs)
+        return True
+    except Exception as e:
+        LOGGER.error(f"Error saving image with watermark: {str(e)}")
+        return False
+def save_result(self, imgname: str, img: np.ndarray) -> bool:
+    output_path = self.get_result_path(imgname)
+    if self.watermark_enabled and self.watermark_path:
+        # تحويل إلى صورة PIL
+        if img.ndim == 2:
+            img_pil = Image.fromarray(img)
+        elif img.shape[2] == 3:
+            img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        elif img.shape[2] == 4:
+            img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA))
+        # تطبيق العلامة المائية
+        img_pil = apply_watermark_to_pil_image(
+            img_pil,
+            self.watermark_path,
+            self.watermark_opacity
+        )
+        # التحويل مرة أخرى إلى numpy array
+        if img_pil.mode == 'RGB':
+            img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
+        elif img_pil.mode == 'RGBA':
+            img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGBA2BGRA)
+        else:
+            img = np.array(img_pil)
+    return imwrite(output_path, img)

utils/registry.py ADDED Viewed

	@@ -0,0 +1,272 @@

+# modified from https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py
+import inspect
+import warnings
+from functools import partial
+class Registry:
+    """A registry to map strings to classes.
+    Registered object could be built from registry.
+    Example:
+        >>> MODELS = Registry('models')
+        >>> @MODELS.register_module()
+        >>> class ResNet:
+        >>>     pass
+        >>> resnet = MODELS.build(dict(type='ResNet'))
+    Please refer to
+    https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for
+    advanced usage.
+    Args:
+        name (str): Registry name.
+        build_func(func, optional): Build function to construct instance from
+            Registry, func:`build_from_cfg` is used if neither ``parent`` or
+            ``build_func`` is specified. If ``parent`` is specified and
+            ``build_func`` is not given,  ``build_func`` will be inherited
+            from ``parent``. Default: None.
+        parent (Registry, optional): Parent registry. The class registered in
+            children registry could be built from parent. Default: None.
+        scope (str, optional): The scope of registry. It is the key to search
+            for children registry. If not specified, scope will be the name of
+            the package where class is defined, e.g. mmdet, mmcls, mmseg.
+            Default: None.
+    """
+    def __init__(self, name, build_func=None, parent=None, scope=None):
+        self._name = name
+        self._module_dict = dict()
+        self._children = dict()
+        # self._scope = self.infer_scope() if scope is None else scope
+        # self.build_func will be set with the following priority:
+        # 1. build_func
+        # 2. parent.build_func
+        # 3. build_from_cfg
+        # if build_func is None:
+        #     if parent is not None:
+        #         self.build_func = parent.build_func
+        #     else:
+        #         self.build_func = build_from_cfg
+        # else:
+        #     self.build_func = build_func
+        if parent is not None:
+            assert isinstance(parent, Registry)
+            parent._add_children(self)
+            self.parent = parent
+        else:
+            self.parent = None
+    def __len__(self):
+        return len(self._module_dict)
+    def __contains__(self, key):
+        return self.get(key) is not None
+    def __repr__(self):
+        format_str = self.__class__.__name__ + \
+                     f'(name={self._name}, ' \
+                     f'items={self._module_dict})'
+        return format_str
+    @staticmethod
+    def infer_scope():
+        """Infer the scope of registry.
+        The name of the package where registry is defined will be returned.
+        Example:
+            >>> # in mmdet/models/backbone/resnet.py
+            >>> MODELS = Registry('models')
+            >>> @MODELS.register_module()
+            >>> class ResNet:
+            >>>     pass
+            The scope of ``ResNet`` will be ``mmdet``.
+        Returns:
+            str: The inferred scope name.
+        """
+        # inspect.stack() trace where this function is called, the index-2
+        # indicates the frame where `infer_scope()` is called
+        filename = inspect.getmodule(inspect.stack()[2][0]).__name__
+        split_filename = filename.split('.')
+        return split_filename[0]
+    @staticmethod
+    def split_scope_key(key):
+        """Split scope and key.
+        The first scope will be split from key.
+        Examples:
+            >>> Registry.split_scope_key('mmdet.ResNet')
+            'mmdet', 'ResNet'
+            >>> Registry.split_scope_key('ResNet')
+            None, 'ResNet'
+        Return:
+            tuple[str | None, str]: The former element is the first scope of
+            the key, which can be ``None``. The latter is the remaining key.
+        """
+        split_index = key.find('.')
+        if split_index != -1:
+            return key[:split_index], key[split_index + 1:]
+        else:
+            return None, key
+    @property
+    def name(self):
+        return self._name
+    # @property
+    # def scope(self):
+    #     return self._scope
+    @property
+    def module_dict(self):
+        return self._module_dict
+    @property
+    def children(self):
+        return self._children
+    def get(self, key):
+        """Get the registry record.
+        Args:
+            key (str): The class name in string format.
+        Returns:
+            class: The corresponding class.
+        """
+        scope, real_key = self.split_scope_key(key)
+        if scope is None or scope == self._scope:
+            # get from self
+            if real_key in self._module_dict:
+                return self._module_dict[real_key]
+        else:
+            # get from self._children
+            if scope in self._children:
+                return self._children[scope].get(real_key)
+            else:
+                # goto root
+                parent = self.parent
+                while parent.parent is not None:
+                    parent = parent.parent
+                return parent.get(key)
+    # def build(self, *args, **kwargs):
+    #     return self.build_func(*args, **kwargs, registry=self)
+    def _add_children(self, registry):
+        """Add children for a registry.
+        The ``registry`` will be added as children based on its scope.
+        The parent registry could build objects from children registry.
+        Example:
+            >>> models = Registry('models')
+            >>> mmdet_models = Registry('models', parent=models)
+            >>> @mmdet_models.register_module()
+            >>> class ResNet:
+            >>>     pass
+            >>> resnet = models.build(dict(type='mmdet.ResNet'))
+        """
+        assert isinstance(registry, Registry)
+        assert registry.scope is not None
+        assert registry.scope not in self.children, \
+            f'scope {registry.scope} exists in {self.name} registry'
+        self.children[registry.scope] = registry
+    def _register_module(self, module_class, module_name=None, force=False):
+        if not inspect.isclass(module_class):
+            raise TypeError('module must be a class, '
+                            f'but got {type(module_class)}')
+        if module_name is None:
+            module_name = module_class.__name__
+        if isinstance(module_name, str):
+            module_name = [module_name]
+        for name in module_name:
+            if not force and name in self._module_dict:
+                raise KeyError(f'{name} is already registered '
+                               f'in {self.name}')
+            self._module_dict[name] = module_class
+    def deprecated_register_module(self, cls=None, force=False):
+        warnings.warn(
+            'The old API of register_module(module, force=False) '
+            'is deprecated and will be removed, please use the new API '
+            'register_module(name=None, force=False, module=None) instead.',
+            DeprecationWarning)
+        if cls is None:
+            return partial(self.deprecated_register_module, force=force)
+        self._register_module(cls, force=force)
+        return cls
+    def register_module(self, name=None, force=False, module=None):
+        """Register a module.
+        A record will be added to `self._module_dict`, whose key is the class
+        name or the specified name, and value is the class itself.
+        It can be used as a decorator or a normal function.
+        Example:
+            >>> backbones = Registry('backbone')
+            >>> @backbones.register_module()
+            >>> class ResNet:
+            >>>     pass
+            >>> backbones = Registry('backbone')
+            >>> @backbones.register_module(name='mnet')
+            >>> class MobileNet:
+            >>>     pass
+            >>> backbones = Registry('backbone')
+            >>> class ResNet:
+            >>>     pass
+            >>> backbones.register_module(ResNet)
+        Args:
+            name (str | None): The module name to be registered. If not
+                specified, the class name will be used.
+            force (bool, optional): Whether to override an existing class with
+                the same name. Default: False.
+            module (type): Module class to be registered.
+        """
+        if not isinstance(force, bool):
+            raise TypeError(f'force must be a boolean, but got {type(force)}')
+        # NOTE: This is a walkaround to be compatible with the old api,
+        # while it may introduce unexpected bugs.
+        if isinstance(name, type):
+            return self.deprecated_register_module(name, force=force)
+        # raise the error ahead of time
+        if not (name is None or isinstance(name, str)):
+            raise TypeError(
+                'name must be either of None, an instance of str or a sequence'
+                f'  of str, but got {type(name)}')
+        # use it as a normal method: x.register_module(module=SomeClass)
+        if module is not None:
+            self._register_module(
+                module_class=module, module_name=name, force=force)
+            return module
+        # use it as a decorator: @x.register_module()
+        def _register(cls):
+            self._register_module(
+                module_class=cls, module_name=name, force=force)
+            return cls
+        return _register
+    def __getitem__(self, key: str):
+        return self.get(key)

utils/shared.py ADDED Viewed

	@@ -0,0 +1,160 @@

+from typing import Dict
+import os
+import os.path as osp
+import json
+import sys
+ICON_PATH = 'icons/icon.icns'
+PROGRAM_PATH = osp.abspath(osp.dirname(osp.dirname(__file__)))
+LOGGING_PATH = osp.join(PROGRAM_PATH, 'logs')
+LIBS_PATH = osp.join(PROGRAM_PATH, 'data/libs')
+STYLESHEET_PATH = osp.join(PROGRAM_PATH, 'config/stylesheet.css')
+THEME_PATH = osp.join(PROGRAM_PATH, 'config/themes.json')
+CONFIG_PATH = osp.join(PROGRAM_PATH, 'config/config.json')
+DEFAULT_TEXTSTYLE_DIR = osp.join(PROGRAM_PATH, 'config/textstyles')
+if not osp.exists(DEFAULT_TEXTSTYLE_DIR):
+    os.makedirs(DEFAULT_TEXTSTYLE_DIR)
+st_manager = None
+CONFIG_FONTSIZE_HEADER = 18
+CONFIG_FONTSIZE_TABLE = 16
+CONFIG_FONTSIZE_CONTENT = 16
+CONFIG_COMBOBOX_HEIGHT = 30
+CONFIG_COMBOBOX_SHORT = 200
+CONFIG_COMBOBOX_MIDEAN = 332
+CONFIG_COMBOBOX_LONG = 468
+_size2width = {
+    'short': CONFIG_COMBOBOX_SHORT,
+    'median': CONFIG_COMBOBOX_MIDEAN,
+    'long':CONFIG_COMBOBOX_LONG
+}
+def size2width(size: str):
+    global _size2width
+    return _size2width[size]
+HORSLIDER_FIXHEIGHT = 36
+WIDGET_SPACING_CLOSE = 8
+TEXTEDIT_FIXWIDTH = 350
+TEXTEFFECT_FIXWIDTH = 400
+TEXTEFFECT_MAXHEIGHT = 500
+LEFTBAR_WIDTH = 48
+LEFTBTN_WIDTH = 28
+LDPI = 96.
+DPI = 188.75
+SCREEN_H = 2160
+SCREEN_W = 3840
+DEFAULT_FONT_FAMILY = 'Microsoft YaHei UI'
+APP_DEFAULT_FONT = 'Microsoft YaHei UI'
+WINDOW_BORDER_WIDTH = 4
+BOTTOMBAR_HEIGHT = 32
+TITLEBAR_HEIGHT = 30
+PAGELIST_THUMBNAIL_MAXNUM = 100
+PAGELIST_THUMBNAIL_SIZE = 48
+FLAG_QT6 = True
+SLIDERHANDLE_COLOR = (85,85,96)
+FOREGROUND_FONTCOLOR = (93,93,95)
+MAX_NUM_LOG = 7
+TRANSLATE_DIR = osp.join(PROGRAM_PATH, 'translate')
+DISPLAY_LANGUAGE_MAP = {
+    "English": "English",
+    "简体中文": "zh_CN",
+    "Русский": "ru_RU",
+    "Português (Brasil)": "pt_BR",
+    "한국어": "ko_KR",
+    "Español": "es_MX",
+    "Hungarian": "hu_HU"
+}
+VALID_LANG_SET = set(list(DISPLAY_LANGUAGE_MAP.values()))
+for p in os.listdir(TRANSLATE_DIR):
+    if p.endswith('.qm'):
+        lang = p.replace('.qm', '')
+        if lang not in VALID_LANG_SET:
+            DISPLAY_LANGUAGE_MAP[lang] = lang
+DEFAULT_DISPLAY_LANG = 'English'
+USE_PYSIDE6 = False
+ON_MACOS = sys.platform == 'darwin'
+ON_WINDOWS = sys.platform == 'win32'
+HEADLESS = False
+DEBUG = False
+args = None
+FUZZY_MATCH_IMAGE_NAME = False
+cache_data: Dict = None
+cache_dir: str = osp.join(PROGRAM_PATH, '.btrans_cache')
+cache_path: str = osp.join(PROGRAM_PATH, '.btrans_cache/cache.json')
+CACHE_UPDATED = False
+check_local_file_hash = True
+FONT_FAMILIES: set = None
+CUSTOM_FONTS = []
+pbar = {}
+runtime_widget_set = set()
+def add_to_runtime_widget_set(widget):
+    runtime_widget_set.add(widget)
+def remove_from_runtime_widget_set(widget):
+    if widget in runtime_widget_set:
+        runtime_widget_set.remove(widget)
+showed_exception = set()
+# it will be set to ui.mainwindow.create_errdialog.emit after UI initialized
+create_errdialog_in_mainthread = lambda *args, **kwargs: None
+create_infodialog_in_mainthread = lambda *args, **kwargs: None
+def load_cache():
+    global cache_data
+    if cache_data is None:
+        if osp.exists(cache_path):
+            try:
+                with open(cache_path, "r", encoding="utf8") as file:
+                    cache_data = json.load(file)
+            except:
+                print(f'cached file {cache_path} is invalid')
+                cache_data = {}
+        else:
+            cache_data = {}
+def dump_cache():
+    global cache_data
+    if cache_data is None:
+        return
+    cache_dir = osp.dirname(cache_path)
+    if not osp.exists(cache_dir):
+        os.makedirs(cache_dir)
+    with open(cache_path, "w", encoding="utf8") as file:
+        json.dump(cache_data, file, indent=4)
+    global CACHE_UPDATED
+    CACHE_UPDATED = False
+config_name_to_view_widget = {}
+action_to_view_config_name = {}
+register_view_widget: lambda *args, **kwargs: None

utils/split_text_region.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import cv2, os, re, random
+import numpy as np
+# import tesserocr
+# from tesserocr import PyTessBaseAPI, PSM, OEM
+class TextSpan(object):
+    def __init__(self, top_bnd=None, bottom_bnd=None, left_bnd=None, right_bnd=None):
+        self.top = top_bnd
+        self.bottom = bottom_bnd
+        self.height = self.bottom - self.top if bottom_bnd is not None else None
+        self.left = left_bnd
+        self.right = right_bnd
+        self.width = self.right - self.left if right_bnd is not None else None
+    def set_top(self, top_bnd):
+        self.top = top_bnd
+        return True
+    def set_bottom(self, bottom_bnd):
+        if self.top is None or bottom_bnd <= self.top:
+            return False
+        self.bottom = bottom_bnd
+        self.height = self.bottom - self.top
+        return True
+    def set_left(self, left_bnd):
+        self.left = left_bnd
+        return True
+    def set_right(self, right_bnd):
+        if self.left is None or right_bnd <= self.left:
+            return False
+        self.right = right_bnd
+        self.width = right_bnd - self.left
+        return True
+    def __getitem__(self, index):
+        if isinstance(index, int) and index >=0 and index < 4:
+            return [self.left, self.top, self.right, self.bottom][index]
+        else:
+            raise AttributeError(f'Invalid key: {index}')
+def split_step0(span, thresh, sumby_yaxis, thresh2=None) -> list[TextSpan]:
+    candidate_pnts = (np.where(sumby_yaxis[span.top: span.bottom] > thresh)[0] + span.top).tolist()
+    span_list = []
+    if len(candidate_pnts) == 0:
+        return None
+    stride_tol = 1
+    span0, span1 = TextSpan(candidate_pnts[0]), TextSpan()
+    for pnt_ind in range(len(candidate_pnts)-1):
+        if candidate_pnts[pnt_ind+1] - candidate_pnts[pnt_ind] > stride_tol:
+            if not span0.set_bottom(candidate_pnts[pnt_ind]):
+                continue
+            span_list = split_step1(span0, span_list, thresh=thresh2, sumby_yaxis=sumby_yaxis)
+            span1.set_top(candidate_pnts[pnt_ind+1])
+            span0 = span1
+            span1 = TextSpan()
+    if len(candidate_pnts)-1 == 0:
+        if candidate_pnts[0] == candidate_pnts[-1]:
+            span_list = None
+        else:
+            span0 = TextSpan(candidate_pnts[0], candidate_pnts[-1])
+            span_list = split_step1(span0, span_list, thresh=thresh2, sumby_yaxis=sumby_yaxis)
+    elif span0.top != candidate_pnts[-1]:
+        span0.set_bottom(candidate_pnts[-1])
+        span_list = split_step1(span0, span_list, thresh=thresh2, sumby_yaxis=sumby_yaxis)
+    return span_list
+def split_step1(span, span_list, thresh=None, sumby_yaxis=None):
+    if thresh is None:
+        span_list.append(span)
+        return span_list
+    else:
+        subspan_list = split_step0(span, thresh, sumby_yaxis)
+        # print(np.var(sumby_yaxis[span.top:span.bottom]))
+        if subspan_list is not None:
+            _, maxspan = find_span(subspan_list, max)
+            _, minspan = find_span(subspan_list, min)
+            sum_height = sum(c.height for c in subspan_list)
+            if maxspan.height / minspan.height > 2.5 or sum_height / span.height < 0.3 or len(subspan_list) == 1:
+                subspan_list = None
+        if subspan_list is not None and len(subspan_list) > 1:
+            span_list += subspan_list
+        else:
+            span_list.append(span)
+        return span_list
+def shrink_span_list(src_img, span_list, shrink_vert_space=True, shrink_hor_space=True):
+    height, width = src_img.shape[0], src_img.shape[1]
+    sum_spacing = 0
+    if shrink_vert_space:
+        for ii in range(len(span_list)-1):
+            line_spacing = span_list[ii+1].top - span_list[ii].bottom
+            sum_spacing += line_spacing
+            line_spacing = int(round(line_spacing / 2))
+            span_list[ii+1].top -= line_spacing
+            span_list[ii].set_bottom(span_list[ii].bottom + line_spacing)
+        if len(span_list) >= 2:
+            mean_spacing = int(0.5 * round(sum_spacing / (len(span_list)-1)))
+            span_list[0].top = max(0, span_list[0].top-mean_spacing)
+            span_list[0].set_bottom(span_list[0].bottom)
+            span_list[-1].set_bottom(min(src_img.shape[0], span_list[-1].bottom))
+    left_var, middle_var = -1, -1
+    if shrink_hor_space:
+        left_pnts, middle_pnts = [], []
+        for ii in range(len(span_list)):
+            s = span_list[ii]
+            im = src_img[s.top: s.bottom, 0: width]
+            sumby_yaxis = np.mean(im, axis=0)
+            content_array = np.where(sumby_yaxis > 10)[0].tolist()
+            left, right = 0, width
+            if len(content_array) != 0:
+                left, right = content_array[0], content_array[-1]
+            span_list[ii].set_left(left)
+            span_list[ii].set_right(right)
+            s = span_list[ii]
+            left_pnts.append(left)
+            middle_pnts.append((left+right)/2)
+        left_var, middle_var = np.var(np.array(left_pnts)), np.var(np.array(middle_pnts))
+    return span_list, (left_var, middle_var)
+def find_span(span_list, max_or_min=max, key="height"):
+    if key=="height":
+        return max_or_min(enumerate(span_list), key=(lambda x: span_list[x[0]].height), default = -1)
+    else:
+        return max_or_min(enumerate(span_list), key=(lambda x: span_list[x[0]].width), default = -1)
+def discard_spans(span_list, thresh_ratio=0.3):
+    index, max_span = find_span(span_list, max)
+    max_height = max_span.height
+    height_thresh = max_height * thresh_ratio
+    new_spanlist = []
+    for sp in span_list:
+        if sp.height < height_thresh:
+            continue
+        new_spanlist.append(sp)
+    return new_spanlist
+def plot_mapresult(sumbyvector, xlength, span_list=None, thresh=None):
+    '''for experiment'''
+    try:
+        import matplotlib.pyplot as plt
+        plt.plot(sumbyvector)
+        plt.ylabel('div pnt value')
+        plt.xlabel('div pnt coord')
+        s = [0, 255]
+        x_cords = []
+        if span_list is not None:
+            for sp in span_list:
+                x_cords.append(sp.top)
+                x_cords.append(sp.bottom)
+        if thresh is not None:
+            for tr in thresh:
+                plt.vlines(x = x_cords, ymin = 0, ymax = max(s),
+                        colors = 'purple',
+                        label = 'vline_multiple - full height')
+                plt.hlines(y = tr * sumbyvector.mean(), xmin = 0, xmax = xlength, linestyles='--')
+        plt.show()
+    except:
+        pass
+def box(width, height):
+    return np.ones((height, width), dtype=np.uint8)
+def crop_img(img, crop_ratio=0.2, clip_width=True, dilate=False):
+    h, w = img.shape[:2]
+    moments = cv2.moments(img)
+    area = moments['m00']
+    if area != 0:
+        mean_x = int(round(moments['m10'] / area))
+        mean_y = int(round(moments['m01'] / area))
+        crop_r = int(round(crop_ratio * w))
+        if clip_width:
+            crop_x0 = np.clip(mean_x - crop_r, 0, w)
+            crop_x1 = np.clip(mean_x + crop_r, 0, w)
+            if crop_x1 > crop_x0:
+                img = img[:, crop_x0: crop_x1]
+        else:
+            crop_r = np.clip(crop_r * 2, 0, w - 1)
+            img = img[:, crop_r:]
+    img = np.copy(img)
+    if clip_width and dilate:
+        w = int(round(w/7))
+        if w > 1:
+            img = cv2.dilate(img, box(w, 1), 1)
+    return img, img.shape[0], img.shape[1]
+def split_textblock(src_img, crop_ratio=0.2, blur=False, show_process=False, discard=True, shrink=True, recheck=False, clip_width=True, dilate=True):
+    if blur:
+        src_img = cv2.GaussianBlur(src_img,(3,3),cv2.BORDER_DEFAULT)
+    if crop_ratio > 0:
+        img, height, width = crop_img(src_img, crop_ratio=crop_ratio, clip_width=clip_width, dilate=dilate)
+    else:
+        img, height, width = src_img, src_img.shape[0], src_img.shape[1]
+    sumby_yaxis = img.mean(axis=1)
+    bound0 = np.where(sumby_yaxis > sumby_yaxis.mean() * 0.1)[0].tolist()
+    vars = (-1, -1)
+    if len(bound0) < 2:
+        return [TextSpan(0, height-1, 0, width - 1)], vars
+    base_span = TextSpan(bound0[0], bound0[-1])
+    meanby_yaxis = sumby_yaxis.mean()
+    thresh_ratio = [0.4, 0.8]
+    thresh0 = meanby_yaxis * thresh_ratio[0]
+    thresh2 = meanby_yaxis * thresh_ratio[1]
+    span_list = split_step0(base_span, thresh0, sumby_yaxis, thresh2=thresh2)
+    if span_list is None:
+        return None, None
+    if discard:
+        span_list = discard_spans(span_list)
+    if shrink:
+        span_list, vars = shrink_span_list(src_img, span_list)
+    '''for experiment'''
+    if show_process:
+        plot_mapresult(sumby_yaxis, height, span_list=span_list, thresh=thresh_ratio)
+    if recheck and len(span_list) == 1 and crop_ratio > 0:
+        return split_textblock(src_img, crop_ratio==-1, show_process=show_process, discard=discard, shrink=shrink, recheck=False)
+    valid_span_list = []
+    for span in span_list:
+        if span.top is None:
+            span.set_top(0)
+        if span.left is None:
+            span.set_left(0)
+        if span.right is None:
+            span.set_right(width)
+        if span.bottom is None:
+            span.set_bottom(height)
+        valid_span_list.append(span)
+    return valid_span_list, vars
+# def tessocr_img2text(img, lang):
+#     img = Image.fromarray(img)
+#     if re.findall("vert", lang):
+#         psm = PSM.SINGLE_BLOCK_VERT_TEXT
+#     else:
+#         psm = PSM.SINGLE_LINE
+#     return tesserocr.image_to_text(img, psm=psm, lang=lang, path=TESSDATA_PATH)
+# def tessocr_img2text(img, lang):
+#     psm = "5" if re.findall("vert", lang) else "7"
+#     config = r'--tessdata-dir "models\tessdata" --psm ' + psm
+#     return pytesseract.image_to_string(img, lang=lang, config=config)
+def textspan2list(span_list):
+    converted_list = []
+    for ii, s in enumerate(span_list):
+        converted_list.append([])
+        converted_list[ii].append(s.top)
+        converted_list[ii].append(s.left)
+        converted_list[ii].append(s.bottom)
+        converted_list[ii].append(s.right)
+    return converted_list
+def manga_split(img, bbox=None, show_process=False, clip_width=False) -> list[TextSpan]:
+    im = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
+    imh, imw = im.shape[:2]
+    if bbox is None:
+        bbox = [0, 0, im.shape[1], im.shape[0]]
+    bboxes = [bbox]
+    span_list, _ = split_textblock(im, show_process=show_process, shrink=False, recheck=True, discard=False, crop_ratio=0)
+    if span_list is None:
+        return [TextSpan(0, 0, im.shape[1], im.shape[0])]
+    # span_list, _ = shrink_span_list(im, span_list, shrink_vert_space=False)
+    for ii, span in enumerate(span_list):
+        left = span.left
+        right = span.right
+        if ii == 0:
+            span.left = 0
+        else:
+            span.left = span.top
+        if ii == len(span_list) - 1:
+            span.right = im.shape[0]
+        else:
+            span.right = span.bottom
+        span.top =  imw - right
+        span.bottom = imw - left
+        span.height = span.bottom - span.top
+        span.width = span.right - span.left
+    return span_list
+def tessocr_img2text_linemode(img, span_list=None, combine_lines=True, show_process=False, gen_data=False, lang="comic6k", jpn_vert=False):
+    if jpn_vert:
+        lang = "jpn_vert"
+        img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    hig = img.shape[0]
+    wid = img.shape[1]
+    if hig * wid < 5:
+        return '', -1, -1
+    bw = 3
+    text = ''
+    alignment, vars = 0, (-1, -1)
+    if span_list is None:
+        span_list, vars = split_textblock(img, show_process=show_process)
+        _, maxspan = find_span(span_list, max)
+        maxh = bw*2 + maxspan.height
+    else:
+        maxh = max([s[2]-s[0] for s in span_list])
+        maxh = bw*2 + maxh
+    long_line = []
+    word_space = int(round(maxh / 8))
+    img = 255 - img
+    for ind, s in enumerate(span_list):
+        if isinstance(s, list):
+            im = img[s[0]: s[2], s[1]: s[3]]
+        else:
+            im = img[s.top: s.bottom, s.left: s.right]
+        hw1 = int(round((maxh - im.shape[0])/2))
+        hw2 = maxh - hw1 - im.shape[0]
+        dst = cv2.copyMakeBorder(im, hw1, hw2, word_space, word_space, cv2.BORDER_CONSTANT, None, value=[255, 255, 255])
+        if not combine_lines:
+            text += tessocr_img2text(dst, lang=lang) +'\n'
+        else:
+            long_line.append(dst)
+        if show_process:
+            cv2.imshow(str(ind), dst)
+    if combine_lines:
+        long_line = cv2.hconcat(long_line)
+        if jpn_vert:
+            long_line = cv2.rotate(long_line, cv2.ROTATE_90_CLOCKWISE)
+        if show_process:
+            cv2.namedWindow("long line:", cv2.WINDOW_NORMAL)
+            cv2.imshow("long line:", long_line)
+        if gen_data:
+            return long_line
+        res = tessocr_img2text(long_line, lang=lang)
+    mean_height = -1
+    if len(span_list) != 0:
+        if isinstance(span_list[0], list):
+            mean_height = np.mean(np.array([s[2]-s[0] for s in span_list]))
+        else:
+            mean_height = np.mean(np.array([s.height for s in span_list]))
+        alignment = 1 if vars[1] < vars[0] else 0
+    return res, mean_height, alignment

utils/stroke_width_calculator.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import cv2, os, time
+import numpy as np
+def calculate_derivatives(gx, gy):
+    mag = np.sqrt(gx*gx + gy*gy)
+    if mag==0:
+        return False, -1, -1
+    else:
+        return True, gx / mag, gy / mag
+def sw_calculator(mask, canny_img, gradient_x, gradient_y, show_process=False):
+    height, width = canny_img.shape[0], canny_img.shape[1]
+    if show_process:
+        drawborder = np.zeros((canny_img.shape[0], canny_img.shape[1], 3), dtype=np.uint8)
+    pnts = np.where(np.logical_and(canny_img != 0, mask!=0))
+    total_pnt_num = pnts[0].shape[0]
+    sample_pnt_num = 150
+    sample_step = total_pnt_num / sample_pnt_num if total_pnt_num > sample_pnt_num else 1
+    cur_pnt_ind = 0
+    ray_list = []
+    while cur_pnt_ind < total_pnt_num:
+        start_x, start_y = pnts[1][cur_pnt_ind], pnts[0][cur_pnt_ind]
+        ray_arr = [start_x, start_y, -1, -1, -1]
+        valid, dx, dy = calculate_derivatives(gradient_x[start_y][start_x], gradient_y[start_y][start_x])
+        if valid:
+            inc = 0.2
+            cur_x, cur_y = start_x + inc * dx, start_y + inc * dy
+            while (True):
+                tmp_curx, tmp_cury = int(cur_x), int(cur_y)
+                if tmp_curx < 0 or tmp_curx >= width or tmp_cury <= 0 or tmp_cury >= height:
+                    break
+                if canny_img[tmp_cury][tmp_curx] == 0:
+                    valid, dx_t, dy_t = calculate_derivatives(gradient_x[tmp_cury][tmp_curx], gradient_y[tmp_cury][tmp_curx])
+                    if not valid:
+                        break
+                    if np.arccos(-dx * dx_t + -dy * dy_t) < np.pi / 2.0:
+                        ray_arr[2] = tmp_curx
+                        ray_arr[3] = tmp_cury
+                        ray_arr[4] = np.sqrt((start_x - tmp_curx)**2 + (start_y - tmp_cury)**2)
+                    break
+                cur_x += dx
+                cur_y += dy
+            if ray_arr[2] != -1:
+                ray_list.append(ray_arr)
+                if show_process:
+                    drawborder = cv2.arrowedLine(drawborder, (ray_arr[0], ray_arr[1]), (ray_arr[2], ray_arr[3]),
+                                                    (0, 255, 0), 1)
+        cur_pnt_ind += sample_step
+        cur_pnt_ind = int(round(cur_pnt_ind))
+    if show_process and len(ray_list) != 0:
+        ray_list.sort(key=lambda x: x[4])
+        cv2.imshow("border", drawborder)
+        cv2.imshow("cannyimg", canny_img)
+        cv2.waitKey(0)
+    return ray_list
+def strokewidth_check(text_mask, labels, num_labels, stats, debug_type=0):
+    rays_width = []
+    height, width = text_mask.shape[0], text_mask.shape[1]
+    blur_img = cv2.dilate(text_mask ,(3,3),cv2.BORDER_DEFAULT)
+    # canny_img = cv2.Canny(cv2.dilate(text_mask, (3,3), 1), 170, 320, L2gradient=True, apertureSize=3)
+    _, canny_img = cv2.threshold(text_mask, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
+    blur2 = blur_img.astype(float) / 255
+    gradient_x = cv2.Scharr(blur2, ddepth=-1, dx=1, dy=0)
+    gradient_x = cv2.GaussianBlur(gradient_x ,(3, 3),cv2.BORDER_DEFAULT)
+    gradient_y = cv2.Scharr(blur2, ddepth=-1, dx=0, dy=1)
+    gradient_y = cv2.GaussianBlur(gradient_y ,(3, 3),cv2.BORDER_DEFAULT)
+    img_area = text_mask.shape[0] * text_mask.shape[1]
+    show_process = True if debug_type > 0 else False
+    for lab in range(num_labels):
+        stat = stats[lab]
+        if lab != 0 and stat[4] > img_area * 0.002:
+            x1, y1, x2, y2 = stat[0] - 2, stat[1] - 2, stat[0] + stat[2] + 2, stat[1] + stat[3] + 2
+            x1, x2 = max(x1, 0), min(x2, width)
+            y1, y2 = max(y1, 0), min(y2, height)
+            labcord = np.where(labels==lab)
+            labcord2 = (labcord[0] - y1, labcord[1] - x1)
+            text_roi = np.zeros((y2-y1, x2-x1), dtype=np.uint8)
+            text_roi[labcord2] = 255
+            text_roi = cv2.GaussianBlur(text_roi ,(3,3), cv2.BORDER_DEFAULT)
+            ray_list = sw_calculator(text_roi,
+                                    canny_img[y1: y2, x1: x2],
+                                    gradient_x[y1: y2, x1: x2],
+                                    gradient_y[y1: y2, x1: x2],
+                                    show_process=show_process)
+            if len(ray_list) != 0:
+                ray_list.sort(key=lambda x: x[4])
+                rays_width.append([int(lab), ray_list[int(len(ray_list)/2)][4]])
+    if len(rays_width) != 0:
+        rays_width = np.array(rays_width)
+        mean_width = np.mean(rays_width[:, 1])
+        ma = np.int0(rays_width[:, 0])
+        mean_area = np.mean(stats[ma][:, 4])
+        false_labels = np.where(rays_width[:, 1] > 2*mean_width)[0]
+        false_labels = rays_width[false_labels, 0].astype(np.int32)
+        for fl in false_labels:
+            if stats[fl][4] > 2 * mean_area:
+                text_mask[np.where(labels==fl)] = 0
+    return text_mask

utils/structures.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import Tuple, List, ClassVar, Union, Any, Dict, Set
+from dataclasses import dataclass, field, is_dataclass
+import copy
+import os
+import numpy as np
+# decorator to wrap original __init__
+# https://www.geeksforgeeks.org/creating-nested-dataclass-objects-in-python/
+def nested_dataclass(*args, **dataclass_kwargs):
+    '''
+    nested dataclass support \n
+    also ignore extra arguments
+    '''
+    def wrapper(check_class):
+        # passing class to investigate
+        check_class = dataclass(check_class, **dataclass_kwargs)
+        o_init = check_class.__init__
+        def __init__(self, *args, **kwargs):
+            store_deprecated = 'deprecated_attributes' in self.__annotations__
+            deprecated = {}
+            for name in list(kwargs.keys()):
+                if name not in self.__annotations__:
+                    # print(f'warning: type object \'{self.__class__.__name__}\' has no attribute {name}, might be loading from an older config')
+                    val = kwargs.pop(name)
+                    if store_deprecated:
+                        deprecated[name] = val
+                    continue
+                value = kwargs[name]
+                # getting field type
+                ft = check_class.__annotations__.get(name, None)
+                if is_dataclass(ft) and isinstance(value, dict):
+                    obj = ft(**value)
+                    kwargs[name]= obj
+            if len(deprecated) > 0:
+                kwargs['deprecated_attributes'] = deprecated
+            o_init(self, *args, **kwargs)
+        check_class.__init__=__init__
+        return check_class
+    return wrapper(args[0]) if args else wrapper
+@dataclass
+class Config:
+    def update(self, key: str, value):
+        assert key in self.__annotations__, f'type object \'{self.__class__.__name__}\' has no attribute {key}'
+        self.__setattr__(key, value)
+    @classmethod
+    def annotations_set(cls):
+        return set(list(cls.__annotations__))
+    def __getitem__(self, key: str):
+        assert key in self.__annotations__, f'type object \'{self.__class__.__name__}\' has no attribute {key}'
+        return self.__getattribute__(key)
+    def __setitem__(self, key: str, value):
+        self.__setattr__(key, value)
+    @classmethod
+    def params(cls):
+        return cls.__annotations__
+    def merge(self, target):
+        tgt_keys = target.annotations_set()
+        for key in tgt_keys:
+            self.update(key, target[key])
+    def copy(self):
+        return copy.deepcopy(self)
+MODULE_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+BASE_PATH = os.path.dirname(MODULE_PATH)

utils/text_layout.py ADDED Viewed

	@@ -0,0 +1,477 @@

+from typing import List, Tuple
+import numpy as np
+from .imgproc_utils import rotate_image
+from .textblock import TextBlock, TextAlignment
+class Line:
+    def __init__(self, text: str = '', pos_x: int = 0, pos_y: int = 0, length: float = 0, spacing: int = 0) -> None:
+        self.text = text
+        self.pos_x = pos_x
+        self.pos_y = pos_y
+        self.length = int(length)
+        self.num_words = 0
+        if text:
+            self.num_words += 1
+        self.spacing = 0
+        self.add_spacing(spacing)
+    def append_right(self, word: str, w_len: int, delimiter: str = ''):
+        self.text = self.text + delimiter + word
+        if word:
+            self.num_words += 1
+        self.length += w_len
+    def append_left(self, word: str, w_len: int, delimiter: str = ''):
+        self.text = word + delimiter + self.text
+        if word:
+            self.num_words += 1
+        self.length += w_len
+    def add_spacing(self, spacing: int):
+        self.spacing = spacing
+        self.pos_x -= spacing
+        self.length += 2 * spacing
+    def strip_spacing(self):
+        self.length -= self.spacing * 2
+        self.pos_x += self.spacing
+        self.spacing = 0
+def line_is_valid(line: Line, new_len: int, delimiter_len, max_width, words_length, srcline_wlist, line_no: int, line_height, ref_src_lines: bool = False):
+    if ref_src_lines:
+        # if line_no >= 0 and line_no < len(srcline_wlist):
+        #     _max_width = min(srcline_wlist[line_no], max_width)
+        # else:
+        #     _max_width = max_width
+        if line_no >= 0 and line_no < len(srcline_wlist):
+            _max_width = srcline_wlist[line_no] * words_length
+        else:
+            _max_width = np.inf
+            _max_width = max(srcline_wlist) * words_length
+        _max_width = _max_width + delimiter_len * line.num_words
+        max_width = min(max_width, _max_width)
+    if new_len < max_width:
+        return True
+    else:
+        if line.length / max_width < max_width / new_len:
+            return True
+        else:
+            return False
+def layout_lines_aligncenter(
+    blk: TextBlock,
+    mask: np.ndarray,
+    words: List[str],
+    centroid: List[int],
+    wl_list: List[int],
+    delimiter_len: int,
+    line_height: int,
+    spacing: int = 0,
+    delimiter: str = ' ',
+    max_central_width: float = np.inf,
+    word_break: bool = False,
+    ref_src_lines = False,
+    srcline_wlist=None,
+    start_from_top=False
+)->List[Line]:
+    lh_pad = 0
+    if blk.line_spacing > 1:
+        lh_pad = int(np.ceil(line_height - line_height / blk.line_spacing))
+    centroid_x, centroid_y = centroid
+    adjust_x = adjust_y = 0
+    border_thr = 220
+    # layout the central line, the center word is approximately aligned with the centroid of the mask
+    num_words = len(words)
+    len_left, len_right = [], []
+    wlst_left, wlst_right = [], []
+    sum_left, sum_right = 0, 0
+    words_length = sum(wl_list)
+    if num_words > 1:
+        wl_array = np.array(wl_list, dtype=np.float64)
+        wl_cumsums = np.cumsum(wl_array)
+        wl_cumsums = wl_cumsums - wl_cumsums[-1] / 2 - wl_array / 2
+        central_index = np.argmin(np.abs(wl_cumsums))
+        if central_index > 0:
+            wlst_left = words[:central_index]
+            len_left = wl_list[:central_index]
+            sum_left = np.sum(len_left)
+        if central_index < num_words - 1:
+            wlst_right = words[central_index + 1:]
+            len_right = wl_list[central_index + 1:]
+            sum_right = np.sum(len_right)
+    else:
+        central_index = 0
+    pos_y = centroid_y - line_height // 2
+    pos_x = centroid_x - wl_list[central_index] // 2
+    bh, bw = mask.shape[:2]
+    central_line = Line(words[central_index], pos_x, pos_y, wl_list[central_index], spacing)
+    line_bottom = pos_y + line_height
+    while (sum_left > 0 or sum_right > 0) and not start_from_top:
+        left_valid, right_valid = False, False
+        if sum_left > 0:
+            new_len_l = central_line.length + len_left[-1] + delimiter_len
+            new_x_l = centroid_x - new_len_l // 2
+            new_r_l = new_x_l + new_len_l
+            if (new_x_l > 0 and new_r_l < bw):
+                if mask[pos_y: line_bottom - lh_pad, new_x_l].mean() > border_thr and \
+                    mask[pos_y: line_bottom - lh_pad, new_r_l].mean() > border_thr:
+                    left_valid = True
+        if sum_right > 0:
+            new_len_r = central_line.length + len_right[0] + delimiter_len
+            new_x_r = centroid_x - new_len_r // 2 - line_height // 2
+            new_r_r = centroid_x + new_len_r // 2 + line_height // 2
+            if (new_x_r > 0 and new_r_r < bw):
+                if mask[pos_y: line_bottom - lh_pad, new_x_r].mean() > border_thr and \
+                    mask[pos_y: line_bottom - lh_pad, new_r_r].mean() > border_thr:
+                    right_valid = True
+        insert_left = False
+        if left_valid and right_valid:
+            if sum_left > sum_right:
+                insert_left = True
+        elif left_valid:
+            insert_left = True
+        elif not right_valid:
+            break
+        if insert_left:
+            new_len = central_line.length + len_left[-1] + delimiter_len
+        else:
+            new_len = central_line.length + len_right[0] + delimiter_len
+        line_valid = line_is_valid(central_line, new_len, delimiter_len, max_central_width, words_length, srcline_wlist, -1, line_height, ref_src_lines)
+        if ref_src_lines and not line_valid and len(srcline_wlist) == 1:
+            if new_len < max_central_width:
+                line_valid = True
+        if not line_valid:
+            break
+        if insert_left:
+            central_line.append_left(wlst_left.pop(-1), len_left[-1] + delimiter_len, delimiter)
+            sum_left -= len_left.pop(-1)
+            central_line.pos_x = new_x_l
+        else:
+            central_line.append_right(wlst_right.pop(0), len_right[0] + delimiter_len, delimiter)
+            sum_right -= len_right.pop(0)
+            central_line.pos_x = new_x_r
+    line_right_no = line_left_no = 0
+    if ref_src_lines:
+        nl = len(srcline_wlist)
+        if nl % 2 == 0:
+            line_right_no = nl // 2
+            line_left_no = nl // 2 - 1
+        else:
+            line_right_no = nl // 2 + 1
+            line_left_no = nl // 2 - 1
+    if not start_from_top:
+        central_line.strip_spacing()
+        lines = [central_line]
+    else:
+        lines = []
+        sum_right = sum(wl_list)
+        sum_left = 0
+        wlst_right = words
+        len_right = wl_list
+        line_right_no = 0
+    # layout bottom half
+    if sum_right > 0:
+        w, wl = wlst_right.pop(0), len_right.pop(0)
+        pos_x = centroid_x - wl // 2
+        if start_from_top:
+            pos_y = centroid_y - int(blk.bounding_rect()[3] / 2)
+        else:
+            pos_y = centroid_y + line_height // 2
+        pos_y = max(0, min(pos_y, mask.shape[0] - 1))
+        top_mean = mask[pos_y, :].mean()
+        x_mean = mask.mean(axis=1)
+        base_mean = x_mean.max() / 2
+        if top_mean < base_mean:
+            available_y = np.where(
+                x_mean[pos_y:] > base_mean
+            )[0]
+            if len(available_y) > 0:
+                adjust_y = min(available_y[0], line_height)
+                pos_y = pos_y + adjust_y
+        line_bottom = pos_y + line_height
+        line = Line(w, pos_x, pos_y, wl, spacing)
+        lines.append(line)
+        sum_right -= wl
+        while sum_right > 0:
+            w, wl = wlst_right.pop(0), len_right.pop(0)
+            sum_right -= wl
+            new_len = line.length + wl + delimiter_len
+            new_x = centroid_x - new_len // 2 - line_height // 2
+            right_x = new_x + new_len + line_height // 2
+            if new_x < 0 or right_x >= bw:
+                line_valid = False
+            elif mask[pos_y: line_bottom - lh_pad, new_x].mean() < border_thr or\
+                mask[pos_y: line_bottom - lh_pad, right_x].mean() < border_thr:
+                line_valid = False
+                if ref_src_lines and (len(wl_list) == 1 or line_right_no + 1 >= len(srcline_wlist)) and \
+                    line_is_valid(line, new_len, delimiter_len, max_central_width, words_length, srcline_wlist, line_right_no, line_height, ref_src_lines):
+                    line_valid = True
+            else:
+                line_valid = True
+            if line_valid:
+                line.append_right(w, wl+delimiter_len, delimiter)
+                line.pos_x = new_x
+                line_valid = line_is_valid(line, new_len, delimiter_len, max_central_width, words_length, srcline_wlist, line_right_no, line_height, ref_src_lines)
+                if not line_valid:
+                    if sum_right > 0:
+                        w, wl = wlst_right.pop(0), len_right.pop(0)
+                        sum_right -= wl
+                    else:
+                        line.strip_spacing()
+                        break
+            if not line_valid:
+                pos_x = centroid_x - wl // 2
+                pos_y = line_bottom
+                line_bottom += line_height
+                line.strip_spacing()
+                line = Line(w, pos_x, pos_y, wl, spacing)
+                lines.append(line)
+                line_right_no += 1
+    # layout top half
+    if sum_left > 0:
+        w, wl = wlst_left.pop(-1), len_left.pop(-1)
+        pos_x = centroid_x - wl // 2
+        pos_y = centroid_y - line_height // 2 - line_height
+        pos_y = max(0, min(pos_y, mask.shape[0] - 1))
+        line_bottom = pos_y + line_height
+        line = Line(w, pos_x, pos_y, wl, spacing)
+        lines.insert(0, line)
+        sum_left -= wl
+        while sum_left > 0:
+            w, wl = wlst_left.pop(-1), len_left.pop(-1)
+            sum_left -= wl
+            new_len = line.length + wl + delimiter_len
+            new_x = centroid_x - new_len // 2 - line_height // 2
+            right_x = new_x + new_len + line_height // 2
+            if new_x <= 0 or right_x >= bw:
+                line_valid = False
+            elif mask[pos_y: line_bottom - lh_pad, new_x].mean() < border_thr or\
+                mask[pos_y: line_bottom - lh_pad, right_x].mean() < border_thr:
+                line_valid = False
+                if ref_src_lines and line_left_no - 1 < 0 and \
+                    line_is_valid(line, new_len, delimiter_len, max_central_width, words_length, srcline_wlist, line_left_no, line_height, ref_src_lines):
+                    line_valid = True
+            else:
+                line_valid = True
+            if line_valid:
+                line.append_left(w, wl+delimiter_len, delimiter)
+                line.pos_x = new_x
+                line_valid = line_is_valid(line, new_len, delimiter_len, max_central_width, words_length, srcline_wlist, line_left_no, line_height, ref_src_lines)
+                if not line_valid:
+                    if sum_left > 0:
+                        w, wl = wlst_left.pop(-1), len_left.pop(-1)
+                        sum_left -= wl
+                    else:
+                        line.strip_spacing()
+                        break
+            if not line_valid :
+                pos_x = centroid_x - wl // 2
+                pos_y -= line_height
+                line_bottom = pos_y + line_height
+                line.strip_spacing()
+                line = Line(w, pos_x, pos_y, wl, spacing)
+                lines.insert(0, line)
+                line_left_no -= 1
+    return lines, (adjust_x, adjust_y)
+def layout_lines_alignside(
+    blk: TextBlock,
+    mask: np.ndarray,
+    words: List[str],
+    origin: List[int],
+    wl_list: List[int],
+    delimiter_len: int,
+    line_height: int,
+    spacing: int = 0,
+    delimiter: str = ' ',
+    word_break: bool = False,
+    max_width: int = np.inf,
+    ref_src_lines = False,
+    srcline_wlist=None,
+)->List[Line]:
+    align_right = blk.fontformat.alignment == TextAlignment.Right
+    ox, oy = origin
+    bh, bw = mask.shape[:2]
+    num_words = len(words)
+    blk_rect = blk.bounding_rect()
+    blk_width = blk_rect[2]
+    lines = []
+    words_length = sum(wl_list)
+    lh_pad = 0
+    if blk.line_spacing > 1:
+        lh_pad = int(np.ceil(line_height - line_height / blk.line_spacing))
+    if num_words > 0:
+        sum_right = np.array(wl_list).sum()
+        w, wl = words.pop(0), wl_list.pop(0)
+        line = Line(w, ox, oy, wl)
+        lines.append(line)
+        sum_right -= wl
+        line_bottom = oy + line_height
+        pos_y = oy
+        line_id = 0
+        while sum_right > 0:
+            w, wl = words.pop(0), wl_list.pop(0)
+            sum_right -= wl
+            new_len = line.length + wl + delimiter_len
+            if align_right:
+                new_x = ox + blk_width - new_len - line_height // 2
+            else:
+                new_x = ox + new_len + line_height // 2
+            line_valid = False
+            if new_x < bw and new_x > 0:
+                if mask[np.clip(pos_y, 0, bh - 1): np.clip(line_bottom - lh_pad, 0, bh), new_x].mean() > 240:
+                    line_valid = True
+                else:
+                    if ref_src_lines and line_id + 1 >= len(srcline_wlist) and line_is_valid(line, new_len, delimiter_len, max_width, words_length, srcline_wlist, line_id, line_height, ref_src_lines):
+                        line_valid = True
+            if line_valid:
+                line_valid = line_is_valid(line, new_len, delimiter_len, max_width, words_length, srcline_wlist, line_id, line_height, ref_src_lines)
+            if line_valid:
+                line.append_right(w, wl+delimiter_len, delimiter)
+            else:
+                pos_y = line_bottom
+                line_bottom += line_height
+                line = Line(w, ox, pos_y, wl)
+                line_id += 1
+                lines.append(line)
+    return lines, (0, 0)
+def layout_text(
+    blk: TextBlock,
+    mask: np.ndarray,
+    mask_xyxy: List,
+    centroid: List,
+    words: List[str],
+    wl_list: List[int],
+    delimiter: str,
+    delimiter_len: int,
+    line_height: int,
+    spacing: int = 0,
+    max_central_width=np.inf,
+    src_is_cjk=False,
+    tgt_is_cjk=False,
+    ref_src_lines = False
+) -> Tuple[str, List]:
+    angle = blk.angle
+    alignment = blk.alignment
+    start_from_top = False
+    srcline_wlist = None
+    if ref_src_lines:
+        srcline_wlist, srcline_width = blk.normalizd_width_list(normalize=False)
+        # tgtline_width = sum(wl_list) + delimiter_len * max(len(wl_list) - 1, 0)
+        # if tgtline_width < srcline_width:
+        #     min_bbox = blk.min_rect(rotate_back=True)[0]
+        #     x1, y1 = min_bbox[0]
+        #     x2, y2 = min_bbox[2]
+        #     w = x2 - x1
+        #     max_central_width = min(max_central_width, w)
+        #     pass
+        if alignment == TextAlignment.Center and \
+        len(srcline_wlist) > 1:
+            if len(srcline_wlist) == 2:
+                start_from_top = True
+            else:
+                nw = len(srcline_wlist)
+                # nl = min(nw // 2, 2)
+                nl = 1
+                sum_top = sum(srcline_wlist[:nl])
+                sum_btn = sum(srcline_wlist[-nl:])
+                start_from_top = sum_top / sum_btn > 1.2 and srcline_wlist[0] / max(srcline_wlist) > 0.9
+        srcline_wlist = np.array(srcline_wlist) / srcline_width
+        srcline_wlist = srcline_wlist.tolist()
+        # line_height = min((blk.detected_font_size), line_height)
+    # if ref_src_lines:
+    #     mask = np.ones_like(mask) * 255
+    if max_central_width == np.inf:
+        max_central_width = mask.shape[1]
+    centroid_x, centroid_y = centroid
+    center_x = mask_xyxy[0] + centroid_x
+    center_y = mask_xyxy[1] + centroid_y
+    shifted_x, shifted_y = 0, 0
+    if abs(angle) > 0:
+        old_h, old_w = mask.shape[:2]
+        old_origin = (old_w // 2, old_h // 2)
+        rel_cx, rel_cy = centroid[0] - old_origin[0], centroid[1] - old_origin[1]
+        mask = rotate_image(mask, angle)
+        rad = np.deg2rad(angle)
+        r_sin, r_cos = np.sin(rad), np.cos(rad)
+        new_rel_cy =  -rel_cx * r_sin + rel_cy * r_cos
+        new_rel_cx =  rel_cy * r_sin + rel_cx * r_cos
+        shifted_x, shifted_y = new_rel_cx - rel_cx, new_rel_cy - rel_cy
+        new_h, new_w = mask.shape[:2]
+        new_origin = (new_w // 2, new_h // 2)
+        new_cx, new_cy = new_origin[0] + new_rel_cx, new_origin[1] + new_rel_cy
+        centroid = [int(new_cx), int(new_cy)]
+    if alignment == TextAlignment.Center:
+        lines, adjust_xy = layout_lines_aligncenter(blk, mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter,
+                                         max_central_width, ref_src_lines=ref_src_lines, srcline_wlist=srcline_wlist,
+                                         start_from_top=start_from_top)
+    else:
+        lines, adjust_xy = layout_lines_alignside(blk, mask, words, centroid, wl_list, delimiter_len, line_height, spacing, delimiter, False, max_central_width,
+                                       ref_src_lines=ref_src_lines, srcline_wlist=srcline_wlist)
+    concated_text = []
+    pos_x_lst, pos_right_lst = [], []
+    for line in lines:
+        pos_x_lst.append(line.pos_x)
+        pos_right_lst.append(max(line.pos_x, 0) + line.length)
+        concated_text.append(line.text)
+    concated_text = '\n'.join(concated_text)
+    pos_x_lst = np.array(pos_x_lst)
+    pos_right_lst = np.array(pos_right_lst)
+    canvas_l, canvas_r = pos_x_lst.min(), pos_right_lst.max()
+    canvas_t, canvas_b = lines[0].pos_y, lines[-1].pos_y + line_height
+    canvas_h = int(canvas_b - canvas_t)
+    canvas_w = int(canvas_r - canvas_l)
+    if alignment == 1:
+        abs_x = int(round(center_x - canvas_w / 2))
+        abs_y = int(round(center_y - canvas_h / 2))
+    else:
+        abs_x = shifted_x
+        abs_y = shifted_y
+    return concated_text, [abs_x, abs_y, canvas_w, canvas_h], start_from_top, adjust_xy

utils/text_processing.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from typing import List, Tuple
+import json
+import os.path as osp
+import os
+HALF2FULL = {i: i + 0xFEE0 for i in range(0x21, 0x7F)}
+HALF2FULL[0x20] = 0x3000
+FULL2HALF = dict((i + 0xFEE0, i) for i in range(0x21, 0x7F))
+FULL2HALF[0x3000] = 0x20
+FULL2HALF[0x3002] = 0x2E
+LANGSET_CJK = {'简体中文', '繁體中文', '日本語'}
+LANGSET_CH = {'简体中文', '繁體中文'}
+PUNSET_RIGHT_ENG = {'.', '?', '!', ':', ';', ')', '}', "\""}
+PUNCTUATION_L = {'「', '『', '【', '《', '〈', '〔', '［', '｛', '（', '(', '[', '{', '“', '‘'}
+PKUSEG_PUNCSET = {' ', '.', '　'}
+PKUSEGPATH = r'data/pkusegscores.json'
+PKUSEGSCORES = None
+CHSEG = None
+def full_len(s: str):
+    """
+    Convert all ASCII characters to their full-width counterpart.
+    https://stackoverflow.com/questions/2422177/python-how-can-i-replace-full-width-characters-with-half-width-characters
+    """
+    return s.translate(HALF2FULL)
+def half_len(s):
+    '''
+    Convert full-width characters to ASCII counterpart
+    '''
+    return s.translate(FULL2HALF)
+def seg_to_chars(text: str) -> List[str]:
+    text = text.replace('\n', '')
+    return [c for c in text]
+def seg_eng(text: str) -> List[str]:
+    text = text.replace('  ', ' ').replace(' .', '.').replace('\n', ' ')
+    processed_text = ''
+    # dumb way to insure spaces between words
+    text_len = len(text)
+    for ii, c in enumerate(text):
+        if c in PUNSET_RIGHT_ENG and ii < text_len - 1:
+            next_c = text[ii + 1]
+            if next_c.isalpha() or next_c.isnumeric():
+                processed_text += c + ' '
+            else:
+                processed_text += c
+        else:
+            processed_text += c
+    word_list = processed_text.split(' ')
+    word_num = len(word_list)
+    if word_num <= 1:
+        return word_list
+    words = []
+    skip_next = False
+    for ii, word in enumerate(word_list):
+        if skip_next:
+            skip_next = False
+            continue
+        if len(word) < 3:
+            append_left, append_right = False, False
+            len_word, len_next, len_prev = len(word), -1, -1
+            if ii < word_num - 1:
+                len_next = len(word_list[ii + 1])
+            if ii > 0:
+                len_prev = len(words[-1])
+            cond_next = (len_word == 2 and len_next <= 4) or len_word == 1
+            cond_prev = (len_word == 2 and len_prev <= 4) or len_word == 1
+            if len_next > 0 and len_prev > 0:
+                if len_next < len_prev:
+                    append_right = cond_next
+                else:
+                    append_left = cond_prev
+            elif len_next > 0:
+                append_right = cond_next
+            elif len_prev > 0:
+                append_left = cond_prev
+            if append_left:
+                words[-1] = words[-1] + ' ' + word
+            elif append_right:
+                words.append(word + ' ' + word_list[ii + 1])
+                skip_next = True
+            else:
+                words.append(word)
+            continue
+        words.append(word)
+    return words
+def _seg_ch_pkg(text: str) -> List[str]:
+    if text == ' ':
+        return [' ']
+    elif text == '':
+        return []
+    segments = CHSEG.cut(text)
+    num_segments = len(segments)
+    if num_segments == 0:
+        return []
+    if num_segments == 1:
+        return [segments[0][0]]
+    words = []
+    tags = []
+    max_concat_len = 4
+    skip_next = False
+    try:
+        for ii, (word, tag) in enumerate(segments):
+            if skip_next:
+                skip_next = False
+                continue
+            len_word, len_next, len_prev = len(word), -1, -1
+            next_valid, prev_valid = False, False
+            word_next, tag_next = '', ''
+            word_prev, tag_prev = '', ''
+            score_next, score_prev = 0, 0
+            if ii < num_segments - 1:
+                word_next, tag_next = segments[ii + 1]
+                len_next = len(word_next)
+                next_valid = True
+                if tag_next != 'w' and not word_next in PKUSEG_PUNCSET:
+                    score_next = PKUSEGSCORES[tag][tag_next]
+            if ii > 0:
+                word_prev, tag_prev = words[-1], segments[ii - 1][1]
+                len_prev = len(word_prev)
+                prev_valid = True
+                if tag_prev != 'w' and not word_prev[-1] in PKUSEG_PUNCSET:
+                    score_prev = PKUSEGSCORES[tag_prev][tag]
+            append_prev, append_next = False, False
+            if tag == 'w' or word in PKUSEG_PUNCSET:  # puntuation
+                if word in PUNCTUATION_L:
+                    append_next = next_valid
+                elif len_word  <= 1:
+                    append_prev = prev_valid
+            else:
+                next_valid = score_next > 0 and len_next < max_concat_len
+                prev_valid = score_prev > 0 and len_prev < max_concat_len
+                need_concat = len_word < max_concat_len
+                append_prev = score_prev == 1
+                append_next = score_next == 1
+                if score_prev != 1 and score_next != 1 and need_concat:
+                    append_prev = prev_valid
+                    append_next = next_valid
+                    if append_next and append_prev:
+                        if len_prev == len_next:
+                            if score_prev >= score_next:
+                                append_next = False
+                            else:
+                                append_prev = False
+                        elif len_prev < len_next:
+                            append_next = False
+                        else:
+                            append_prev = False
+            if append_next and append_prev:
+                words[-1] = word_prev + word + word_next
+                tags[-1] = tags[-1] + [tag, tag_next]
+                skip_next = True
+            elif append_prev:
+                words[-1] = words[-1] + word
+                tags[-1].append(tag)
+            elif append_next:
+                words.append(word + word_next)
+                tags.append([tag, tag_next])
+                skip_next = True
+            else:
+                words.append(word)
+                tags.append([tag])
+    except Exception as e:
+        print('exp at line: ', text)
+        raise e
+    return words
+def seg_ch_pkg(text: str):
+    global CHSEG
+    if CHSEG is None:
+        try:
+            import pkuseg
+        except:
+            import spacy_pkuseg as pkuseg
+        CHSEG = pkuseg.pkuseg(postag=True)
+    # pkuseg won't work with half-width punctuations
+    fullen_text = full_len(text).replace('　', ' ')
+    cvt_back = False
+    if fullen_text != text:
+        cvt_back = True
+        text = fullen_text
+    global PKUSEGSCORES
+    if PKUSEGSCORES is None:
+        with open(PKUSEGPATH, 'r', encoding='utf8') as f:
+            PKUSEGSCORES = json.loads(f.read())
+    text_list = text.replace('\n', '').replace('　', ' ').split(' ')
+    result_list = []
+    for ii, text in enumerate(text_list):
+        words = None
+        if text:
+            words = _seg_ch_pkg(text)
+        if words is not None:
+            if ii > 0:
+                words[0] = ' ' + words[0]
+            result_list.extend(words)
+    if cvt_back:
+        # pkuseg w
+        result_list = [half_len(word) for word in result_list]
+    return result_list
+def seg_text(text: str, lang: str) -> Tuple[List, str]:
+    delimiter = ''
+    if lang in LANGSET_CH:
+        words = seg_ch_pkg(text)
+    elif lang in LANGSET_CJK:
+        words = seg_to_chars(text)
+    else:
+        words = seg_eng(text)
+        delimiter = ' '
+    return words, delimiter
+def is_cjk(lang: str) -> bool:
+    return lang in LANGSET_CJK

utils/textblock.py ADDED Viewed

	@@ -0,0 +1,908 @@

+from typing import List, Tuple, Callable
+import numpy as np
+from shapely.geometry import Polygon
+import math
+import copy
+import cv2
+import re
+from .imgproc_utils import union_area, xywh2xyxypoly, rotate_polygons, color_difference
+from .structures import Union, List, Dict, field, nested_dataclass
+from .split_text_region import split_textblock as split_text_region
+from .fontformat import FontFormat, LineSpacingType, TextAlignment, fix_fontweight_qt
+from .textblock_mask import canny_flood
+from .textlines_merge import sort_pnts, Quadrilateral, merge_bboxes_text_region
+LANG_LIST = ['eng', 'ja', 'unknown']
+LANGCLS2IDX = {'eng': 0, 'ja': 1, 'unknown': 2}
+# https://ayaka.shn.hk/hanregex/
+# https://medium.com/the-artificial-impostor/detecting-chinese-characters-in-unicode-strings-4ac839ba313a
+CJKPATTERN = re.compile(r'[\uac00-\ud7a3\u3040-\u30ff\u4e00-\u9FFF]')
+@nested_dataclass
+class TextBlock:
+    xyxy: List = field(default_factory = lambda: [0, 0, 0, 0])
+    lines: List = field(default_factory = lambda: [])
+    language: str = 'unknown'
+    # font_size: float = -1.
+    distance: np.ndarray = None
+    angle: int = 0
+    vec: List = None
+    norm: float = -1
+    merged: bool = False
+    text: List = field(default_factory = lambda : [])
+    translation: str = ""
+    rich_text: str = ""
+    _bounding_rect: List = None
+    src_is_vertical: bool = None
+    _detected_font_size: float = -1
+    det_model: str = None
+    region_mask: np.ndarray = None
+    region_inpaint_dict: Dict = None
+    fontformat: FontFormat = field(default_factory=lambda: FontFormat())
+    deprecated_attributes: dict = field(default_factory = lambda: dict())
+    @property
+    def vertical(self):
+        return self.fontformat.vertical
+    @vertical.setter
+    def vertical(self, value: bool):
+        self.fontformat.vertical = value
+    @property
+    def font_size(self):
+        return self.fontformat.font_size
+    @font_size.setter
+    def font_size(self, value: float):
+        self.fontformat.font_size = value
+    @property
+    def line_spacing(self):
+        return self.fontformat.line_spacing
+    @line_spacing.setter
+    def line_spacing(self, value: float):
+        self.fontformat.line_spacing = value
+    @property
+    def letter_spacing(self):
+        return self.fontformat.letter_spacing
+    @letter_spacing.setter
+    def letter_spacing(self, value: float):
+        self.fontformat.letter_spacing = value
+    @property
+    def font_family(self):
+        return self.fontformat.font_family
+    @font_family.setter
+    def font_family(self, value: str):
+        self.fontformat.font_family = value
+    @property
+    def font_weight(self):
+        return self.fontformat.font_weight
+    @font_weight.setter
+    def font_weight(self, value: int):
+        self.fontformat.font_weight = value
+    @property
+    def bold(self):
+        return self.fontformat.bold
+    @bold.setter
+    def bold(self, value: bool):
+        self.fontformat.bold = value
+    @property
+    def italic(self):
+        return self.fontformat.italic
+    @italic.setter
+    def italic(self, value: bool):
+        self.fontformat.italic = value
+    @property
+    def underline(self):
+        return self.fontformat.underline
+    @underline.setter
+    def underline(self, value: bool):
+        self.fontformat.underline = value
+    @property
+    def stroke_width(self):
+        return self.fontformat.stroke_width
+    @stroke_width.setter
+    def stroke_width(self, value: float):
+        self.fontformat.stroke_width = value
+    @property
+    def opacity(self):
+        return self.fontformat.opacity
+    @opacity.setter
+    def opacity(self, value: float):
+        self.fontformat.opacity = value
+    @property
+    def shadow_radius(self):
+        return self.fontformat.shadow_radius
+    @shadow_radius.setter
+    def shadow_radius(self, value: float):
+        self.fontformat.shadow_radius = value
+    @property
+    def shadow_strength(self):
+        return self.fontformat.shadow_strength
+    @shadow_strength.setter
+    def shadow_strength(self, value: float):
+        self.fontformat.shadow_strength = value
+    @property
+    def shadow_color(self):
+        return self.fontformat.shadow_color
+    @shadow_color.setter
+    def shadow_color(self, value: float):
+        self.fontformat.shadow_color = value
+    @property
+    def shadow_offset(self):
+        return self.fontformat.shadow_offset
+    @shadow_offset.setter
+    def shadow_offset(self, value: float):
+        self.fontformat.shadow_offset = value
+    @property
+    def fg_colors(self):
+        return self.fontformat.frgb
+    @fg_colors.setter
+    def fg_colors(self, value: Union[np.ndarray, List]):
+        self.fontformat.frgb = value
+    @property
+    def bg_colors(self):
+       return self.fontformat.srgb
+    @bg_colors.setter
+    def bg_colors(self, value: np.ndarray):
+        self.fontformat.srgb = value
+    @property
+    def alignment(self):
+       return self.fontformat.alignment
+    @alignment.setter
+    def alignment(self, value: int):
+        self.fontformat.alignment = value
+    def __post_init__(self):
+        if self.xyxy is not None:
+            self.xyxy = [int(num) for num in self.xyxy]
+        if self.distance is not None:
+            self.distance = np.array(self.distance, np.float32)
+        if self.vec is not None:
+            self.vec = np.array(self.vec, np.float32)
+        if self.src_is_vertical is None:
+            self.src_is_vertical = self.vertical
+        if self.rich_text:
+            self.rich_text = fix_fontweight_qt(self.rich_text)
+        da = self.deprecated_attributes
+        if len(da) > 0:
+            if 'accumulate_color' in da:
+                self.fg_colors = np.array([da['fg_r'], da['fg_g'], da['fg_b']], dtype=np.float32)
+                self.bg_colors = np.array([da['bg_r'], da['bg_g'], da['bg_b']], dtype=np.float32)
+                nlines = len(self)
+                if da['accumulate_color'] and len(self) > 0:
+                    self.fg_colors /= nlines
+                    self.bg_colors /= nlines
+            deprecated_blk_fmt_keys = {'vertical': None, 'line_spacing': None, 'letter_spacing': None, 'bold': None, 'underline': None, 'italic': None,
+                'opacity': None, 'shadow_radius': None, 'shadow_strength': None, 'shadow_color': None, 'shadow_offset': None,
+                 'font_size': 'size', 'font_family': None, '_alignment': 'alignment', 'default_stroke_width': 'stroke_width', 'font_weight': None,
+                 'fg_colors': 'frgb', 'bg_colors': 'srgb'
+            }
+            for src_k, v in da.items():
+                if src_k in deprecated_blk_fmt_keys:
+                    if deprecated_blk_fmt_keys[src_k] is None:
+                        tgt_k = src_k
+                    else:
+                        tgt_k = deprecated_blk_fmt_keys[src_k]
+                    setattr(self.fontformat, tgt_k, v)
+            self.font_weight = fix_fontweight_qt(self.font_weight)
+        del self.deprecated_attributes
+    @property
+    def detected_font_size(self):
+        if self._detected_font_size > 0:
+            return self._detected_font_size
+        return self.font_size
+    def adjust_bbox(self, with_bbox=False, x_range=None, y_range=None):
+        lines = self.lines_array().astype(np.int32)
+        if with_bbox:
+            self.xyxy[0] = min(lines[..., 0].min(), self.xyxy[0])
+            self.xyxy[1] = min(lines[..., 1].min(), self.xyxy[1])
+            self.xyxy[2] = max(lines[..., 0].max(), self.xyxy[2])
+            self.xyxy[3] = max(lines[..., 1].max(), self.xyxy[3])
+        else:
+            self.xyxy[0] = lines[..., 0].min()
+            self.xyxy[1] = lines[..., 1].min()
+            self.xyxy[2] = lines[..., 0].max()
+            self.xyxy[3] = lines[..., 1].max()
+        if x_range is not None:
+            self.xyxy[0] = np.clip(self.xyxy[0], x_range[0], x_range[1])
+            self.xyxy[2] = np.clip(self.xyxy[2], x_range[0], x_range[1])
+        if y_range is not None:
+            self.xyxy[1] = np.clip(self.xyxy[1], y_range[0], y_range[1])
+            self.xyxy[3] = np.clip(self.xyxy[3], y_range[0], y_range[1])
+    def sort_lines(self):
+        if self.distance is not None:
+            idx = np.argsort(self.distance)
+            self.distance = self.distance[idx]
+            lines = np.array(self.lines, dtype=np.int32)
+            self.lines = lines[idx].tolist()
+    def lines_array(self, dtype=np.float64):
+        return np.array(self.lines, dtype=dtype)
+    def set_lines_by_xywh(self, xywh: np.ndarray, angle=0, x_range=None, y_range=None, adjust_bbox=False):
+        if isinstance(xywh, List):
+            xywh = np.array(xywh)
+        lines = xywh2xyxypoly(np.array([xywh]))
+        if angle != 0:
+            cx, cy = xywh[0], xywh[1]
+            cx += xywh[2] / 2.
+            cy += xywh[3] / 2.
+            lines = rotate_polygons([cx, cy], lines, angle)
+        lines = lines.reshape(-1, 4, 2)
+        if x_range is not None:
+            lines[..., 0] = np.clip(lines[..., 0], x_range[0], x_range[1])
+        if y_range is not None:
+            lines[..., 1] = np.clip(lines[..., 1], y_range[0], y_range[1])
+        self.lines = lines.tolist()
+        if adjust_bbox:
+            self.adjust_bbox()
+    def aspect_ratio(self) -> float:
+        min_rect = self.min_rect()
+        middle_pnts = (min_rect[:, [1, 2, 3, 0]] + min_rect) / 2
+        norm_v = np.linalg.norm(middle_pnts[:, 2] - middle_pnts[:, 0])
+        norm_h = np.linalg.norm(middle_pnts[:, 1] - middle_pnts[:, 3])
+        return norm_v / norm_h
+    def center(self) -> np.ndarray:
+        xyxy = np.array(self.xyxy)
+        return (xyxy[:2] + xyxy[2:]) / 2
+    def unrotated_polygons(self, ids=None) -> np.ndarray:
+        angled = self.angle != 0
+        center = self.center()
+        polygons = self.lines_array().reshape(-1, 8)
+        if ids is not None:
+            polygons = polygons[ids]
+        if angled:
+            polygons = rotate_polygons(center, polygons, self.angle)
+        return angled, center, polygons
+    def min_rect(self, rotate_back=True, ids=None) -> List[int]:
+        angled, center, polygons = self.unrotated_polygons(ids=ids)
+        min_x = polygons[:, ::2].min()
+        min_y = polygons[:, 1::2].min()
+        max_x = polygons[:, ::2].max()
+        max_y = polygons[:, 1::2].max()
+        min_bbox = np.array([[min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]])
+        if angled and rotate_back:
+            min_bbox = rotate_polygons(center, min_bbox, -self.angle)
+        return min_bbox.reshape(-1, 4, 2).astype(np.int64)
+    def normalizd_width_list(self, normalize=True):
+        angled, center, polygons = self.unrotated_polygons()
+        width_list = []
+        for polygon in polygons:
+            width_list.append((polygon[[2, 4]] - polygon[[0, 6]]).mean())
+        sum_width = sum(width_list)
+        if normalize:
+            width_list = np.array(width_list)
+            width_list = width_list / sum_width
+            width_list = width_list.tolist()
+        return width_list, sum_width
+    # equivalent to qt's boundingRect, ignore angle
+    def bounding_rect(self) -> List[int]:
+        if self._bounding_rect is None:
+        # if True:
+            min_bbox = self.min_rect(rotate_back=False)[0]
+            x, y = min_bbox[0]
+            w, h = min_bbox[2] - min_bbox[0]
+            return [int(x), int(y), int(w), int(h)]
+        return self._bounding_rect
+    def __getattribute__(self, name: str):
+        if name == 'pts':
+            return self.lines_array()
+        # else:
+        return object.__getattribute__(self, name)
+    def __len__(self):
+        return len(self.lines)
+    def __getitem__(self, idx):
+        return self.lines[idx]
+    def to_dict(self, deep_copy=False):
+        blk_dict = vars(self)
+        if deep_copy:
+            blk_dict = copy.deepcopy(blk_dict)
+        return blk_dict
+    def get_transformed_region(self, img: np.ndarray, idx: int, textheight: int, maxwidth: int = None) -> np.ndarray :
+        im_h, im_w = img.shape[:2]
+        line = np.round(np.array(self.lines[idx])).astype(np.int64)
+        if not self.src_is_vertical and self.det_model == 'ctd':
+            # ctd detected horizontal bbox is smaller than GT
+            expand_size = max(int(self._detected_font_size * 0.1), 3)
+            rad = np.deg2rad(self.angle)
+            shifted_vec = np.array([[[-1, -1],[1, -1],[1, 1],[-1, 1]]])
+            shifted_vec = shifted_vec * np.array([[[np.sin(rad), np.cos(rad)]]]) * expand_size
+            line = line + shifted_vec
+            line[..., 0] = np.clip(line[..., 0], 0, im_w)
+            line[..., 1] = np.clip(line[..., 1], 0, im_h)
+            line = np.round(line[0]).astype(np.int64)
+        x1, y1, x2, y2 = line[:, 0].min(), line[:, 1].min(), line[:, 0].max(), line[:, 1].max()
+        x1 = np.clip(x1, 0, im_w)
+        y1 = np.clip(y1, 0, im_h)
+        x2 = np.clip(x2, 0, im_w)
+        y2 = np.clip(y2, 0, im_h)
+        img_croped = img[y1: y2, x1: x2]
+        direction = 'v' if self.src_is_vertical else 'h'
+        src_pts = line.copy()
+        src_pts[:, 0] -= x1
+        src_pts[:, 1] -= y1
+        middle_pnt = (src_pts[[1, 2, 3, 0]] + src_pts) / 2
+        vec_v = middle_pnt[2] - middle_pnt[0]   # vertical vectors of textlines
+        vec_h = middle_pnt[1] - middle_pnt[3]   # horizontal vectors of textlines
+        norm_v = np.linalg.norm(vec_v)
+        norm_h = np.linalg.norm(vec_h)
+        if textheight is None:
+            if direction == 'h' :
+                textheight = int(norm_v)
+            else:
+                textheight = int(norm_h)
+        if norm_v <= 0 or norm_h <= 0:
+            print('invalid textpolygon to target img')
+            return np.zeros((textheight, textheight, 3), dtype=np.uint8)
+        ratio = norm_v / norm_h
+        if direction == 'h' :
+            h = int(textheight)
+            w = int(round(textheight / ratio))
+            dst_pts = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]]).astype(np.float32)
+            M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+            if M is None:
+                print('invalid textpolygon to target img')
+                return np.zeros((textheight, textheight, 3), dtype=np.uint8)
+            region = cv2.warpPerspective(img_croped, M, (w, h))
+        elif direction == 'v' :
+            w = int(textheight)
+            h = int(round(textheight * ratio))
+            dst_pts = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]]).astype(np.float32)
+            M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+            if M is None:
+                print('invalid textpolygon to target img')
+                return np.zeros((textheight, textheight, 3), dtype=np.uint8)
+            region = cv2.warpPerspective(img_croped, M, (w, h))
+            region = cv2.rotate(region, cv2.ROTATE_90_COUNTERCLOCKWISE)
+        if maxwidth is not None:
+            h, w = region.shape[: 2]
+            if w > maxwidth:
+                region = cv2.resize(region, (maxwidth, h))
+        return region
+    def get_text(self) -> str:
+        if isinstance(self.text, str):
+            return self.text
+        text = ''
+        for t in self.text:
+            if text and t:
+                if text[-1].isalpha() and t[0].isalpha() \
+                    and CJKPATTERN.search(text[-1]) is None \
+                    and CJKPATTERN.search(t[0]) is None:
+                    text += ' '
+            text += t
+        return text.strip()
+    def set_font_colors(self, fg_colors = None, bg_colors = None):
+        if fg_colors is not None:
+            self.fg_colors = fg_colors
+        if bg_colors is not None:
+            self.bg_colors = bg_colors
+    def update_font_colors(self, fg_colors: np.ndarray, bg_colors: np.ndarray):
+        nlines = len(self)
+        if nlines > 0:
+            if not isinstance(fg_colors, np.ndarray):
+                fg_colors = np.array(fg_colors, dtype=np.float32)
+            if not isinstance(bg_colors, np.ndarray):
+                bg_colors = np.array(bg_colors, dtype=np.float32)
+            if not isinstance(self.fg_colors, np.ndarray):
+                self.fg_colors = np.array(self.fg_colors, dtype=np.float32)
+            if not isinstance(self.bg_colors, np.ndarray):
+                self.bg_colors = np.array(self.bg_colors, dtype=np.float32)
+            self.fg_colors += fg_colors / nlines
+            self.bg_colors += bg_colors / nlines
+    def get_font_colors(self, bgr=False):
+        frgb = np.array(self.fg_colors).astype(np.int32)
+        brgb = np.array(self.bg_colors).astype(np.int32)
+        if bgr:
+            frgb = frgb[::-1]
+            brgb = brgb[::-1]
+        return frgb, brgb
+    def xywh(self):
+        x, y, w, h = self.xyxy
+        return [x, y, w-x, h-y]
+    def recalulate_alignment(self):
+        angled, center, polygons = self.unrotated_polygons()
+        polygons = polygons.reshape(-1, 4, 2)
+        left_std = np.std(polygons[:, 0, 0])
+        right_std = np.std(polygons[:, 1, 0])
+        center_std = np.std((polygons[:, 0, 0] + polygons[:, 1, 0]) / 2) * 0.7
+        if left_std < right_std and left_std < center_std:
+            self.alignment = TextAlignment.Left
+        elif right_std < left_std and right_std < center_std:
+            self.alignment = TextAlignment.Right
+        else:
+            self.alignment = TextAlignment.Center
+    def recalulate_stroke_width(self, color_diff_tol = 15, stroke_width: float = 0.2):
+        if color_difference(*self.get_font_colors()) < color_diff_tol:
+            self.stroke_width = 0.
+        else:
+            self.stroke_width = stroke_width
+    def adjust_pos(self, dx: int, dy: int):
+        self.xyxy[0] += dx
+        self.xyxy[1] += dy
+        self.xyxy[2] += dx
+        self.xyxy[3] += dy
+        if self._bounding_rect is not None:
+            self._bounding_rect[0] += dx
+            self._bounding_rect[1] += dy
+    def line_coord_valid(self, rect):
+        if self.det_model is None:
+            return False
+        if rect is None:
+            rect = self.bounding_rect()
+        min_bbox = self.min_rect(rotate_back=True)[0]
+        x1, y1 = min_bbox[0]
+        x2, y2 = min_bbox[2]
+        w = x2 - x1
+        h = y2 - y1
+        if w < 1 or h < 1:
+            return False
+        rx1, ry1, rx2, ry2 = rect
+        rx2 += rx1
+        ry2 += ry1
+        intersect = max(min(x2, rx2) - max(x1, rx1), 0) * max(min(y2, ry2) - max(y1, ry1), 0)
+        if intersect == 0:
+            return False
+        if intersect / (w * h) < 0.6:
+            return False
+        return True
+def sort_regions(regions: List[TextBlock], right_to_left=None) -> List[TextBlock]:
+    # from manga image translator
+    # Sort regions from right to left, top to bottom
+    nr = len(regions)
+    if right_to_left is None and nr > 0:
+        nv = 0
+        for r in regions:
+            if r.vertical:
+                nv += 1
+        right_to_left = nv / nr > 0
+    sorted_regions = []
+    for region in sorted(regions, key=lambda region: region.center()[1]):
+        for i, sorted_region in enumerate(sorted_regions):
+            if region.center()[1] > sorted_region.xyxy[3]:
+                continue
+            if region.center()[1] < sorted_region.xyxy[1]:
+                sorted_regions.insert(i + 1, region)
+                break
+            # y center of region inside sorted_region so sort by x instead
+            if right_to_left and region.center()[0] > sorted_region.center()[0]:
+                sorted_regions.insert(i, region)
+                break
+            if not right_to_left and region.center()[0] < sorted_region.center()[0]:
+                sorted_regions.insert(i, region)
+                break
+        else:
+            sorted_regions.append(region)
+    return sorted_regions
+def examine_textblk(blk: TextBlock, im_w: int, im_h: int, sort: bool = False) -> None:
+    lines = blk.lines_array()
+    middle_pnts = (lines[:, [1, 2, 3, 0]] + lines) / 2
+    vec_v = middle_pnts[:, 2] - middle_pnts[:, 0]   # vertical vectors of textlines
+    vec_h = middle_pnts[:, 1] - middle_pnts[:, 3]   # horizontal vectors of textlines
+    # if sum of vertical vectors is longer, then text orientation is vertical, and vice versa.
+    center_pnts = (lines[:, 0] + lines[:, 2]) / 2
+    v = np.sum(vec_v, axis=0)
+    h = np.sum(vec_h, axis=0)
+    norm_v, norm_h = np.linalg.norm(v), np.linalg.norm(h)
+    vertical = blk.src_is_vertical
+    # calcuate distance between textlines and origin
+    if vertical:
+        primary_vec, primary_norm = v, norm_v
+        distance_vectors = center_pnts - np.array([[im_w, 0]], dtype=np.float64)   # vertical manga text is read from right to left, so origin is (imw, 0)
+        font_size = int(round(norm_h / len(lines)))
+    else:
+        primary_vec, primary_norm = h, norm_h
+        distance_vectors = center_pnts - np.array([[0, 0]], dtype=np.float64)
+        font_size = int(round(norm_v / len(lines)))
+    rotation_angle = int(math.atan2(primary_vec[1], primary_vec[0]) / math.pi * 180)     # rotation angle of textlines
+    distance = np.linalg.norm(distance_vectors, axis=1)     # distance between textlinecenters and origin
+    rad_matrix = np.arccos(np.einsum('ij, j->i', distance_vectors, primary_vec) / (distance * primary_norm))
+    distance = np.abs(np.sin(rad_matrix) * distance)
+    blk.lines = lines.astype(np.int32).tolist()
+    blk.distance = distance
+    blk.angle = rotation_angle
+    if vertical:
+        blk.angle -= 90
+    if abs(blk.angle) < 3:
+        blk.angle = 0
+    blk.font_size = font_size
+    blk.vec = primary_vec
+    blk.norm = primary_norm
+    if sort:
+        blk.sort_lines()
+def try_merge_textline(blk: TextBlock, blk2: TextBlock, fntsize_tol=1.7, distance_tol=2) -> bool:
+    if blk2.merged:
+        return False
+    fntsize_div = blk.font_size / blk2.font_size
+    num_l1, num_l2 = len(blk), len(blk2)
+    fntsz_avg = (blk.font_size * num_l1 + blk2.font_size * num_l2) / (num_l1 + num_l2)
+    vec_prod = blk.vec @ blk2.vec
+    vec_sum = blk.vec + blk2.vec
+    cos_vec = vec_prod / blk.norm / blk2.norm
+    # distance = blk2.distance[-1] - blk.distance[-1]
+    # distance_p1 = np.linalg.norm(np.array(blk2.lines[-1][0]) - np.array(blk.lines[-1][0]))
+    minrect1 = blk.min_rect(ids=[-1])[0]
+    xyxy1 = [*minrect1[0], *minrect1[2]]
+    minrect2 = blk2.min_rect(ids=[-1])[0]
+    xyxy2 = [*minrect2[0], *minrect2[2]]
+    distance_x = max(xyxy1[0], xyxy2[0]) - min(xyxy1[2], xyxy2[2])
+    distance_y = max(xyxy1[1], xyxy2[1]) - min(xyxy1[3], xyxy2[3])
+    l1, l2 = Polygon(blk.lines[-1]), Polygon(blk2.lines[-1])
+    if not l1.intersects(l2):
+        if blk.vertical:
+            if distance_y > 0:
+                return False
+        else:
+            if distance_x > 0:
+                return False
+        if fntsize_div > fntsize_tol or 1 / fntsize_div > fntsize_tol:
+            return False
+        if abs(cos_vec) < 0.866:   # cos30
+            return False
+        # if distance > distance_tol * fntsz_avg:
+        #     return False
+        if blk.vertical and blk2.vertical and distance_x > fntsz_avg * 0.8:
+            return False
+        if not blk.vertical and distance_y > fntsz_avg * 0.5:
+            return False
+    # merge
+    for line in blk2.lines:
+        blk.lines.append(line)
+    blk.vec = vec_sum
+    blk.angle = int(round(np.rad2deg(math.atan2(vec_sum[1], vec_sum[0]))))
+    if blk.vertical:
+        blk.angle -= 90
+    blk.norm = np.linalg.norm(vec_sum)
+    blk.distance = np.append(blk.distance, blk2.distance[-1])
+    blk.font_size = fntsz_avg
+    blk2.merged = True
+    return True
+def merge_textlines(blk_list: List[TextBlock]) -> List[TextBlock]:
+    if len(blk_list) < 2:
+        return blk_list
+    blk_list.sort(key=lambda blk: blk.distance[0])
+    merged_list = []
+    for ii, current_blk in enumerate(blk_list):
+        if current_blk.merged:
+            continue
+        for jj, blk in enumerate(blk_list[ii+1:]):
+            try_merge_textline(current_blk, blk)
+        merged_list.append(current_blk)
+    for blk in merged_list:
+        blk.adjust_bbox(with_bbox=False)
+    return merged_list
+def split_textblk(blk: TextBlock):
+    font_size, distance, lines = blk.font_size, blk.distance, blk.lines
+    l0 = np.array(blk.lines[0])
+    lines.sort(key=lambda line: np.linalg.norm(np.array(line[0]) - l0[0]))
+    distance_tol = font_size * 2
+    current_blk = copy.deepcopy(blk)
+    current_blk.lines = [l0]
+    sub_blk_list = [current_blk]
+    textblock_splitted = False
+    for jj, line in enumerate(lines[1:]):
+        l1, l2 = Polygon(lines[jj]), Polygon(line)
+        split = False
+        if not l1.intersects(l2):
+            line_disance = abs(distance[jj+1] - distance[jj])
+            if line_disance > distance_tol:
+                split = True
+            elif blk.vertical and abs(blk.angle) < 15:
+                if len(current_blk.lines) > 1 or line_disance > font_size:
+                    split = abs(lines[jj][0][1] - line[0][1]) > font_size
+        if split:
+            current_blk = copy.deepcopy(current_blk)
+            current_blk.lines = [line]
+            sub_blk_list.append(current_blk)
+        else:
+            current_blk.lines.append(line)
+    if len(sub_blk_list) > 1:
+        textblock_splitted = True
+        for current_blk in sub_blk_list:
+            current_blk.adjust_bbox(with_bbox=False)
+    return textblock_splitted, sub_blk_list
+def group_output(blks, lines, im_w, im_h, mask=None, sort_blklist=True, canvas=None) -> List[TextBlock]:
+    blk_list: List[TextBlock] = []
+    scattered_lines = {'ver': [], 'hor': []}
+    for bbox, cls, conf in zip(*blks):
+        # cls could give wrong result
+        blk_list.append(TextBlock(bbox, language=LANG_LIST[cls]))
+    # step1: filter & assign lines to textblocks
+    bbox_score_thresh = 0.4
+    mask_score_thresh = 0.1
+    for ii, line in enumerate(lines):
+        line, is_vertical = sort_pnts(line)
+        bx1, bx2 = line[:, 0].min(), line[:, 0].max()
+        by1, by2 = line[:, 1].min(), line[:, 1].max()
+        bbox_score, bbox_idx = -1, -1
+        line_area = (by2-by1) * (bx2-bx1)
+        for jj, blk in enumerate(blk_list):
+            score = union_area(blk.xyxy, [bx1, by1, bx2, by2]) / line_area
+            if bbox_score < score:
+                bbox_score = score
+                bbox_idx = jj
+        if bbox_score > bbox_score_thresh:
+            blk_list[bbox_idx].lines.append(line)
+            blk_list[bbox_idx].adjust_bbox(with_bbox=True)
+        else:   # if no textblock was assigned, check whether there is "enough" textmask
+            if mask is not None:
+                mask_score = mask[by1: by2, bx1: bx2].mean() / 255
+                if mask_score < mask_score_thresh:
+                    continue
+            blk = TextBlock([bx1, by1, bx2, by2], [line])
+            blk.vertical = blk.src_is_vertical = is_vertical
+            examine_textblk(blk, im_w, im_h, sort=False)
+            if blk.vertical:
+                scattered_lines['ver'].append(blk)
+            else:
+                scattered_lines['hor'].append(blk)
+    # step2: filter textblocks, sort & split textlines
+    final_blk_list = []
+    for blk in blk_list:
+        # filter textblocks
+        if len(blk.lines) == 0:
+            bx1, by1, bx2, by2 = blk.xyxy
+            if mask is not None:
+                mask_score = mask[by1: by2, bx1: bx2].mean() / 255
+                if mask_score < mask_score_thresh:
+                    continue
+            xywh = np.array([[bx1, by1, bx2-bx1, by2-by1]])
+            blk.lines = xywh2xyxypoly(xywh).reshape(-1, 4, 2).tolist()
+        else:
+            blk.adjust_bbox(with_bbox=False)
+        examine_textblk(blk, im_w, im_h, sort=True)
+        # split manga text if there is a distance gap
+        textblock_splitted = False
+        if len(blk.lines) > 1:
+            if blk.language == 'ja':
+                textblock_splitted = True
+            elif blk.vertical:
+                textblock_splitted = True
+        # if textblock_splitted:
+        #     textblock_splitted, sub_blk_list = split_textblk(blk)
+        # else:
+        sub_blk_list = [blk]
+        # modify textblock to fit its textlines
+        if not textblock_splitted:
+            for blk in sub_blk_list:
+                blk.adjust_bbox(with_bbox=True)
+        final_blk_list += sub_blk_list
+    _final_blk_list = []
+    for blk in final_blk_list:
+        if blk.vertical:
+            scattered_lines['ver'].append(blk)
+        else:
+            _final_blk_list.append(blk)
+    final_blk_list = _final_blk_list
+    # step3: merge scattered lines, sort textblocks by "grid"
+    final_blk_list += merge_textlines(scattered_lines['hor'])
+    final_blk_list += merge_textlines(scattered_lines['ver'])
+    if sort_blklist:
+        final_blk_list = sort_regions(final_blk_list, )
+    for blk in final_blk_list:
+        blk.distance = None
+    if len(final_blk_list) > 1:
+        _final_blks = [final_blk_list[0]]
+        for blk in final_blk_list[1:]:
+            ax1, ay1, ax2, ay2 = blk.xyxy
+            keep_blk = True
+            aarea = (ax2 - ax1) * (ay2 - ay1) + 1e-6
+            for eb in _final_blks:
+                bx1, by1, bx2, by2 = eb.xyxy
+                x1 = max(ax1, bx1)
+                y1 = max(ay1, by1)
+                x2 = min(ax2, bx2)
+                y2 = min(ay2, by2)
+                if y2 < y1 or x2 < x1:
+                    continue
+                inter_area = (y2 - y1) * (x2 - x1)
+                if inter_area / aarea > 0.9:
+                    keep_blk = False
+                    break
+            if keep_blk:
+                _final_blks.append(blk)
+        final_blk_list = _final_blks
+    for blk in final_blk_list:
+        if blk.language != 'ja' and not blk.vertical:
+            num_lines = len(blk.lines)
+            if num_lines == 0:
+                continue
+        blk._detected_font_size = blk.font_size
+    return final_blk_list
+def visualize_textblocks(canvas, blk_list:  List[TextBlock]):
+    lw = max(round(sum(canvas.shape) / 2 * 0.003), 2)  # line width
+    for ii, blk in enumerate(blk_list):
+        bx1, by1, bx2, by2 = blk.xyxy
+        cv2.rectangle(canvas, (bx1, by1), (bx2, by2), (127, 255, 127), lw)
+        lines = blk.lines_array(dtype=np.int32)
+        for jj, line in enumerate(lines):
+            cv2.putText(canvas, str(jj), line[0], cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,127,0), 1)
+            cv2.polylines(canvas, [line], True, (0,127,255), 2)
+        cv2.polylines(canvas, [blk.min_rect()], True, (127,127,0), 2)
+        center = [int((bx1 + bx2)/2), int((by1 + by2)/2)]
+        cv2.putText(canvas, str(blk.angle), center, cv2.FONT_HERSHEY_SIMPLEX, 1, (127,127,255), 2)
+        cv2.putText(canvas, str(ii), (bx1, by1 + lw + 2), 0, lw / 3, (255,127,127), max(lw-1, 1), cv2.LINE_AA)
+    return canvas
+def collect_textblock_regions(img: np.ndarray, textblk_lst: List[TextBlock], text_height=48, maxwidth=8100, split_textblk = False, seg_func: Callable = None):
+    regions = []
+    textblk_lst_indices = []
+    for blk_idx, textblk in enumerate(textblk_lst):
+        for ii in range(len(textblk)):
+            if split_textblk and len(textblk) == 1:
+                seg_func = canny_flood
+                region = textblk.get_transformed_region(img, ii, None, maxwidth=None)
+                mask  = seg_func(region)[0]
+                split_lines = split_text_region(mask)[0]
+                for jj, line in enumerate(split_lines):
+                    bottom = line[3]
+                    if len(split_lines) == 1:
+                        bottom = region.shape[0]
+                    r = region[line[1]: bottom]
+                    h, w = r.shape[:2]
+                    tgt_h, tgt_w = text_height, min(maxwidth, int(text_height / h * w))
+                    if tgt_h != h or tgt_w != w:
+                        r = cv2.resize(r, (tgt_w, tgt_h), interpolation=cv2.INTER_LINEAR)
+                    regions.append(r)
+                    textblk_lst_indices.append(blk_idx)
+                #     cv2.imwrite(f'local_region{jj}.jpg', r)
+                # cv2.imwrite('local_mask.jpg', mask)
+                # cv2.imwrite('local_region.jpg',region)
+            else:
+                textblk_lst_indices.append(blk_idx)
+                region = textblk.get_transformed_region(img, ii, text_height, maxwidth=maxwidth)
+                regions.append(region)
+    return regions, textblk_lst_indices
+def mit_merge_textlines(textlines: List[Quadrilateral], width: int, height: int, verbose: bool = False) -> List[TextBlock]:
+    # from https://github.com/zyddnys/manga-image-translator
+    quadrilateral_lst = []
+    for line in textlines:
+        if not isinstance(line, Quadrilateral):
+            line = Quadrilateral(np.array(line), '',  1.)
+        quadrilateral_lst.append(line)
+    textlines = quadrilateral_lst
+    text_regions: List[TextBlock] = []
+    textlines_total_area = sum([txtln.area for txtln in textlines])
+    for (txtlns, fg_color, bg_color) in merge_bboxes_text_region(textlines, width, height):
+        total_logprobs = 0
+        for txtln in txtlns:
+            total_logprobs += np.log(txtln.prob) * txtln.area
+        total_logprobs /= textlines_total_area
+        font_size = int(min([txtln.font_size for txtln in txtlns]))
+        angle = np.rad2deg(np.mean([txtln.angle for txtln in txtlns])) - 90
+        if abs(angle) < 3:
+            angle = 0
+        lines = [txtln.pts for txtln in txtlns]
+        texts = [txtln.text for txtln in txtlns]
+        ffmt = FontFormat(font_size=font_size, frgb=fg_color, srgb=bg_color)
+        nv = 0
+        for txtln in txtlns:
+            if txtln.direction == 'v':
+                nv += 1
+        is_vertical = nv >= len(txtlns) // 2
+        region = TextBlock(
+            lines=lines, text=texts, angle=angle, fontformat=ffmt,
+            _detected_font_size=font_size, src_is_vertical=is_vertical, vertical=is_vertical)
+        region.adjust_bbox()
+        if region.src_is_vertical:
+            region.alignment = 1
+        else:
+            region.recalulate_alignment()
+        text_regions.append(region)
+    return text_regions

utils/textblock_mask.py ADDED Viewed

	@@ -0,0 +1,394 @@

+import cv2
+import numpy as np
+from typing import Tuple
+from .imgproc_utils import draw_connected_labels
+from .stroke_width_calculator import strokewidth_check
+opencv_inpaint = lambda img, mask: cv2.inpaint(img, mask, 3, cv2.INPAINT_NS)
+def show_img_by_dict(imgdicts):
+    for keyname in imgdicts.keys():
+        cv2.imshow(keyname, imgdicts[keyname])
+    cv2.waitKey(0)
+# 计算文本rgb均值
+def letter_calculator(img, mask, bground_rgb, show_process=False):
+    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    # rgb to grey
+    aver_bground_rgb = 0.299 * bground_rgb[0] + 0.587 * bground_rgb[1] + 0.114 * bground_rgb[2]
+    thresh_low = 127
+    retval, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_OTSU)
+    if aver_bground_rgb < thresh_low:
+        threshed = 255 - threshed
+    threshed = 255 - threshed
+    threshed = cv2.bitwise_and(threshed, mask)
+    le_region = np.where(threshed==255)
+    mat_region = img[le_region]
+    if mat_region.shape[0] == 0:
+        # retval, threshed = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY)
+        # cv2.imshow("xxx", threshed)
+        # cv2.imshow("2xxx", img)
+        # cv2.waitKey(0)
+        return [-1, -1, -1], threshed
+    letter_rgb = np.mean(mat_region, axis=0).astype(int).tolist()
+    if show_process:
+        cv2.imshow("thresh", threshed)
+        # ocr_protest(threshed)
+        imgcp = np.copy(img)
+        imgcp *= 0
+        imgcp += 127
+        imgcp[le_region] = letter_rgb
+        cv2.imshow("letter_img", imgcp)
+        # cv2.waitKey(0)
+    return letter_rgb, threshed
+# 预处理让文本颜色提取准确点
+def usm(src):
+    blur_img = cv2.GaussianBlur(src, (0, 0), 5)
+    usm = cv2.addWeighted(src, 1.5, blur_img, -0.5, 0)
+    h, w = src.shape[:2]
+    result = np.zeros([h, w*2, 3], dtype=src.dtype)
+    result[0:h,0:w,:] = src
+    result[0:h,w:2*w,:] = usm
+    return usm
+# 计算文本rgb均值方法2，可能用中位数代替均值会好点
+def textrgb_calculator(img, text_mask, show_process=False):
+    text_mask = cv2.erode(text_mask, (3, 3), iterations=1)
+    usm_img = usm(img)
+    overall_meanrgb = np.mean(usm_img[np.where(text_mask==255)], axis=0)
+    if show_process:
+        colored_text_board = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) + 127
+        colored_text_board[np.where(text_mask==255)] = overall_meanrgb
+        cv2.imshow("usm", usm_img)
+        cv2.imshow("textcolor", colored_text_board)
+    return overall_meanrgb.astype(np.uint8)
+# 计算背景rgb均值和标准差
+def bground_calculator(buble_img, back_ground_mask, dilate=True):
+    kernel = np.ones((3,3),np.uint8)
+    if dilate:
+        back_ground_mask = cv2.dilate(back_ground_mask, kernel, iterations = 1)
+    bground_region = np.where(back_ground_mask==0)
+    sd = -1
+    if len(bground_region[0]) != 0:
+        pix_array = buble_img[bground_region]
+        bground_aver = np.mean(pix_array, axis=0).astype(int)
+        pix_array - bground_aver
+        gray = cv2.cvtColor(buble_img, cv2.COLOR_RGB2GRAY)
+        gray_pixarray = gray[bground_region]
+        gray_aver = np.mean(gray_pixarray)
+        gray_pixarray = gray_pixarray - gray_aver
+        gray_pixarray = np.power(gray_pixarray, 2)
+        # gray_pixarray = np.sqrt(gray_pixarray)
+        sd = np.mean(gray_pixarray)
+    else: bground_aver = np.array([-1, -1, -1])
+    return bground_aver, bground_region, sd
+# 输入：文本块roi，分割出文本mask，根据mask计算文本bgr均值和标准差，决定纯色覆盖/inpaint修复
+def canny_flood(img, show_process=False, inpaint_sdthresh=10, **kwargs):
+    # cv2.setNumThreads(4)
+    WHITE = (255, 255, 255)
+    BLACK = (0, 0, 0)
+    kernel = np.ones((3,3),np.uint8)
+    orih, oriw = img.shape[0], img.shape[1]
+    scaleR = 1
+    if orih > 300 and oriw > 300:
+        scaleR = 0.6
+    elif orih < 120 or oriw < 120:
+        scaleR = 1.4
+    if scaleR != 1:
+        h, w = img.shape[0], img.shape[1]
+        orimg = np.copy(img)
+        img = cv2.resize(img, (int(w*scaleR), int(h*scaleR)), interpolation=cv2.INTER_AREA)
+    h, w = img.shape[0], img.shape[1]
+    img_area = h * w
+    cpimg = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
+    detected_edges = cv2.Canny(cpimg, 70, 140, L2gradient=True, apertureSize=3)
+    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)
+    cons, hiers = cv2.findContours(detected_edges, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+    cv2.rectangle(detected_edges, (0, 0), (w-1, h-1), BLACK, 1, cv2.LINE_8)
+    ballon_mask, outer_index = np.zeros((h, w), np.uint8), -1
+    min_retval = np.inf
+    mask = np.zeros((h, w), np.uint8)
+    difres = 10
+    seedpnt = (int(w/2), int(h/2))
+    for ii in range(len(cons)):
+        rect = cv2.boundingRect(cons[ii])
+        if rect[2]*rect[3] < img_area*0.4:
+            continue
+        mask = cv2.drawContours(mask, cons, ii, (255), 2)
+        cpmask = np.copy(mask)
+        cv2.rectangle(mask, (0, 0), (w-1, h-1), WHITE, 1, cv2.LINE_8)
+        retval, _, _, rect = cv2.floodFill(cpmask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+        if retval <= img_area * 0.3:
+            mask = cv2.drawContours(mask, cons, ii, (0), 2)
+        if retval < min_retval and retval > img_area * 0.3:
+            min_retval = retval
+            ballon_mask = cpmask
+    ballon_mask = 127 - ballon_mask
+    ballon_mask = cv2.dilate(ballon_mask, kernel,iterations = 1)
+    outer_area, _, _, rect = cv2.floodFill(ballon_mask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(30), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+    ballon_mask = 30 - ballon_mask
+    retval, ballon_mask = cv2.threshold(ballon_mask, 1, 255, cv2.THRESH_BINARY)
+    ballon_mask = cv2.bitwise_not(ballon_mask, ballon_mask)
+    detected_edges = cv2.dilate(detected_edges, kernel, iterations = 1)
+    for ii in range(2):
+        detected_edges = cv2.bitwise_and(detected_edges, ballon_mask)
+        mask = np.copy(detected_edges)
+        bgarea1, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(0, 0),  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+        bgarea2, _, _, rect = cv2.floodFill(mask, mask=None, seedPoint=(detected_edges.shape[1]-1, detected_edges.shape[0]-1),  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+        txt_area = min(img_area - bgarea1, img_area - bgarea2)
+        ratio_ob = txt_area / outer_area
+        ballon_mask = cv2.erode(ballon_mask, kernel,iterations = 1)
+        if ratio_ob < 0.85:
+            break
+    mask = 127 - mask
+    retval, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
+    if scaleR != 1:
+        img = orimg
+        ballon_mask = cv2.resize(ballon_mask, (oriw, orih))
+        mask = cv2.resize(mask, (oriw, orih))
+    bg_mask = cv2.bitwise_or(mask, 255-ballon_mask)
+    mask = cv2.bitwise_and(mask, ballon_mask)
+    bground_aver, bground_region, sd = bground_calculator(img, bg_mask)
+    inner_rect = None
+    threshed = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+    if bground_aver[0] != -1:
+        letter_aver, threshed = letter_calculator(img, mask, bground_aver, show_process=show_process)
+        if letter_aver[0] != -1:
+            mask = cv2.dilate(threshed, kernel, iterations=1)
+            inner_rect = cv2.boundingRect(cv2.findNonZero(mask))
+    else: letter_aver = [0, 0, 0]
+    if sd != -1 and sd < inpaint_sdthresh:
+        need_inpaint = False
+    else:
+        need_inpaint = True
+    if show_process:
+        print(f"\nneed_inpaint: {need_inpaint}, sd: {sd}, {type(inner_rect)}")
+        show_img_by_dict({"outermask": ballon_mask, "detect": detected_edges, "mask": mask})
+    if isinstance(inner_rect, tuple):
+        inner_rect = [ii for ii in inner_rect]
+    if inner_rect is None:
+        inner_rect = [-1, -1, -1, -1]
+    else:
+        inner_rect.append(-1)
+    bground_aver = bground_aver.astype(np.uint8)
+    bub_dict = {"rgb": letter_aver,
+                "bground_rgb": bground_aver,
+                "inner_rect": inner_rect,
+                "need_inpaint": need_inpaint}
+    return mask, ballon_mask, bub_dict
+# 输入：文本块roi，分割出文本mask，根据mask计算文本bgr均值和标准差，决定纯色覆盖/inpaint修复
+def connected_canny_flood(img, show_process=False, inpaint_sdthresh=10, apply_strokewidth_check=0, **kwargs):
+    # 寻找最可能是气泡的外轮廓mask
+    def find_outermask(img):
+        connectivity = 4
+        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img, connectivity, cv2.CV_16U)
+        drawtext = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+        max_ind = np.argmax(stats[:, 4])
+        maxbbox_area, sec_ind = -1, -1
+        for ind, stat in enumerate(stats):
+            if ind != max_ind:
+                bbarea = stat[2] * stat[3]
+                if bbarea > maxbbox_area:
+                    maxbbox_area = bbarea
+                    sec_ind = ind
+        drawtext[np.where(labels==max_ind)] = 255
+        cv2.rectangle(drawtext, (0, 0), (img.shape[1]-1, img.shape[0]-1), (0, 0, 0), 1, cv2.LINE_8)
+        cons, hiers = cv2.findContours(drawtext, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+        img_area = img.shape[0] * img.shape[1]
+        rects = np.array([cv2.boundingRect(cnt) for cnt in cons])
+        rect_area = np.array([rect[2] * rect[3] for rect in rects])
+        quali_ind = np.where(rect_area > img_area * 0.3)[0]
+        ballon_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+        for ind in quali_ind:
+            ballon_mask = cv2.drawContours(ballon_mask, cons, ind, (255), 2)
+        seedpnt = (int(ballon_mask.shape[1]/2), int(ballon_mask.shape[0]/2))
+        difres = 10
+        retval, _, _, rect = cv2.floodFill(ballon_mask, mask=None, seedPoint=seedpnt,  flags=4, newVal=(127), loDiff=(difres, difres, difres), upDiff=(difres, difres, difres))
+        ballon_mask = 255 - cv2.threshold(ballon_mask - 127, 1, 255, cv2.THRESH_BINARY)[1]
+        return num_labels, labels, stats, centroids, ballon_mask
+    # BGR直接转灰度图可能导致文本区域和背景难以区分，比如测试样例中的黑底红字
+    # 但是总有一个通道文本和背景容易区分
+    # 返回最容易区分的那个通道
+    def ccctest(img, crop_r=0.1):
+        # img = usm(img)
+        maxh = 100
+        if img.shape[0] > maxh:
+            scaleR = maxh / img.shape[0]
+            im = cv2.resize(img, (int(img.shape[1]*scaleR), int(img.shape[0]*scaleR)), interpolation=cv2.INTER_AREA)
+        else:
+            im = img
+        textlabel_counter = 0
+        reverse = False
+        c_ind = 0
+        num_labels, labels, stats, centroids, pseduo_outermask = find_outermask(cv2.threshold(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY), 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)[1])
+        grayim = np.expand_dims(np.array(cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)), axis=2)
+        im = np.append(im, grayim, axis=2)
+        outer_cords = np.where(pseduo_outermask==255)
+        for bgr_ind in range(4):
+            channel = im[:, :, bgr_ind]
+            ret, thresh = cv2.threshold(channel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
+            tmp_reverse = False
+            if np.mean(thresh[outer_cords]) > 160:
+                thresh = 255 - thresh
+                tmp_reverse = True
+            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, 4, cv2.CV_16U)
+            # draw_connected_labels(num_labels, labels, stats, centroids)
+            # cv2.waitKey(0)
+            max_ind = np.argmax(stats[:, 4])
+            maxr, minr = 0.5, 0.001
+            maxw, maxh = stats[max_ind][2] * maxr, stats[max_ind][3] * maxr
+            minarea = im.shape[0] * im.shape[1] * minr
+            tmp_counter = 0
+            for stat in stats:
+                bboxarea = stat[2] * stat[3]
+                if stat[2] < maxw and stat[3] < maxh and bboxarea > minarea:
+                    tmp_counter += 1
+            if tmp_counter > textlabel_counter:
+                textlabel_counter = tmp_counter
+                c_ind = bgr_ind
+                reverse = tmp_reverse
+        return c_ind, reverse
+    channel_index, reverse = ccctest(img)
+    chanel = img[:, :, channel_index] if channel_index < 3 else cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    ret, thresh = cv2.threshold(chanel, 1, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
+    # reverse to get white text on black bg
+    if reverse:
+        thresh = 255 - thresh
+    num_labels, labels, stats, centroids, ballon_mask = find_outermask(thresh)
+    img_area = img.shape[0] * img.shape[1]
+    text_mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+    max_ind = np.argmax(stats[:, 4])
+    for lab in (range(num_labels)):
+        stat = stats[lab]
+        if lab != max_ind and stat[4] < img_area * 0.4:
+            labcord = np.where(labels==lab)
+            text_mask[labcord] = 255
+    text_mask = cv2.bitwise_and(text_mask, ballon_mask)
+    if apply_strokewidth_check > 0:
+        text_mask = strokewidth_check(text_mask, labels, num_labels, stats, debug_type=show_process-1)
+    text_color = textrgb_calculator(img, text_mask, show_process=show_process)
+    inner_rect = cv2.boundingRect(cv2.findNonZero(cv2.dilate(text_mask, (3, 3), iterations=1)))
+    inner_rect = [ii for ii in inner_rect]
+    inner_rect.append(-1)
+    bg_mask = cv2.bitwise_or(text_mask, 255-ballon_mask)
+    bground_aver, bground_region, sd = bground_calculator(img, bg_mask)
+    mask = cv2.GaussianBlur(text_mask,(3,3),cv2.BORDER_DEFAULT)
+    _, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
+    if sd != -1 and sd < inpaint_sdthresh:
+        need_inpaint = False
+    else:
+        need_inpaint = True
+    if show_process:
+        print(f"\nuse inpaint: {need_inpaint}, sd: {sd}, {type(inner_rect)}")
+        draw_connected_labels(num_labels, labels, stats, centroids)
+        show_img_by_dict({"thresh": thresh, "ori": img, "outer": ballon_mask, "text": text_mask, "bgmask": bg_mask})
+    bground_aver = bground_aver.astype(np.uint8)
+    bub_dict = {"rgb": text_color,
+                "bground_rgb": bground_aver,
+                "inner_rect": inner_rect,
+                "need_inpaint": need_inpaint}
+    return mask, ballon_mask, bub_dict
+def existing_mask(img, mask: np.ndarray):
+    bub_dict = {"rgb": [0, 0, 0],"bground_rgb": [255, 255, 255],"need_inpaint": True}
+    return mask, mask, bub_dict
+def extract_ballon_mask(img: np.ndarray, mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    '''
+    Given original img and text mask (cropped)
+    return ballon mask & non text mask
+    '''
+    img = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
+    h, w = img.shape[:2]
+    text_sum = np.sum(mask)
+    cannyed = cv2.Canny(img, 70, 140, L2gradient=True, apertureSize=3)
+    e_size = 1
+    element = cv2.getStructuringElement(cv2.MORPH_RECT, (2 * e_size + 1, 2 * e_size + 1),(e_size, e_size))
+    cannyed = cv2.dilate(cannyed, element, iterations=1)
+    br = cv2.boundingRect(cv2.findNonZero(mask))
+    br_xyxy = [br[0], br[1], br[0] + br[2], br[1] + br[3]]
+    # draw the bounding rect in case there is no closed ballon
+    cv2.rectangle(cannyed, (0, 0), (w-1, h-1), (255, 255, 255), 1, cv2.LINE_8)
+    cannyed = cv2.bitwise_and(cannyed, 255 - mask)
+    cons, _ = cv2.findContours(cannyed, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
+    min_ballon_area = w * h
+    ballon_mask = None
+    non_text_mask = None
+    # minimum contour which covers all text mask must be the ballon
+    for ii, con in enumerate(cons):
+        br_c = cv2.boundingRect(con)
+        br_c = [br_c[0], br_c[1], br_c[0] + br_c[2], br_c[1] + br_c[3]]
+        if br_c[0] > br_xyxy[0] or br_c[1] > br_xyxy[1] or br_c[2] < br_xyxy[2] or br_c[3] < br_xyxy[3]:
+            continue
+        tmp = np.zeros_like(cannyed)
+        cv2.drawContours(tmp, cons, ii, (255, 255, 255), -1, cv2.LINE_8)
+        if cv2.bitwise_and(tmp, mask).sum() >= text_sum:
+            con_area = cv2.contourArea(con)
+            if con_area < min_ballon_area:
+                min_ballon_area = con_area
+                ballon_mask = tmp
+    if ballon_mask is not None:
+        non_text_mask = cv2.bitwise_and(ballon_mask, 255 - mask)
+    #     cv2.imshow('ballon', ballon_mask)
+    #     cv2.imshow('non_text', non_text_mask)
+    # cv2.imshow('im', img)
+    # cv2.imshow('msk', mask)
+    # cv2.imshow('canny', cannyed)
+    # cv2.waitKey(0)
+    return ballon_mask, non_text_mask

utils/textlines_merge.py ADDED Viewed

	@@ -0,0 +1,568 @@

+import itertools
+import functools
+from typing import Tuple, List, ClassVar, Union, Any, Dict, Set
+from collections import Counter
+try:
+    functools.cached_property
+except AttributeError: # Supports Python versions below 3.8
+    from backports.cached_property import cached_property
+    functools.cached_property = cached_property
+import numpy as np
+from shapely.geometry import Polygon, MultiPoint
+import cv2
+import networkx as nx
+class BBox(object):
+    def __init__(self, x: int, y: int, w: int, h: int, text: str, prob: float, fg_r: int = 0, fg_g: int = 0, fg_b: int = 0, bg_r: int = 0, bg_g: int = 0, bg_b: int = 0):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.text = text
+        self.prob = prob
+        self.fg_r = fg_r
+        self.fg_g = fg_g
+        self.fg_b = fg_b
+        self.bg_r = bg_r
+        self.bg_g = bg_g
+        self.bg_b = bg_b
+    def width(self):
+        return self.w
+    def height(self):
+        return self.h
+    def to_points(self):
+        tl, tr, br, bl = np.array([self.x, self.y]), np.array([self.x + self.w, self.y]), np.array([self.x + self.w, self.y+ self.h]), np.array([self.x, self.y + self.h])
+        return tl, tr, br, bl
+    @property
+    def xywh(self):
+        return np.array([self.x, self.y, self.w, self.h], dtype=np.int32)
+class Quadrilateral(object):
+    """
+    Helper for storing textlines that contains various helper functions.
+    """
+    def __init__(self, pts: np.ndarray, text: str, prob: float, fg_r: int = 0, fg_g: int = 0, fg_b: int = 0, bg_r: int = 0, bg_g: int = 0, bg_b: int = 0):
+        self.pts, is_vertical = sort_pnts(pts)
+        if is_vertical:
+            self.direction = 'v'
+        else:
+            self.direction = 'h'
+        self.text = text
+        self.prob = prob
+        self.fg_r = fg_r
+        self.fg_g = fg_g
+        self.fg_b = fg_b
+        self.bg_r = bg_r
+        self.bg_g = bg_g
+        self.bg_b = bg_b
+        self.assigned_direction: str = None
+        self.textlines: List[Quadrilateral] = []
+    @functools.cached_property
+    def structure(self) -> List[np.ndarray]:
+        p1 = ((self.pts[0] + self.pts[1]) / 2).astype(int)
+        p2 = ((self.pts[2] + self.pts[3]) / 2).astype(int)
+        p3 = ((self.pts[1] + self.pts[2]) / 2).astype(int)
+        p4 = ((self.pts[3] + self.pts[0]) / 2).astype(int)
+        return [p1, p2, p3, p4]
+    @functools.cached_property
+    def valid(self) -> bool:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        v2 = l2b - l2a
+        unit_vector_1 = v1 / np.linalg.norm(v1)
+        unit_vector_2 = v2 / np.linalg.norm(v2)
+        dot_product = np.dot(unit_vector_1, unit_vector_2)
+        angle = np.arccos(dot_product) * 180 / np.pi
+        return abs(angle - 90) < 10
+    @property
+    def fg_colors(self):
+        return np.array([self.fg_r, self.fg_g, self.fg_b])
+    @property
+    def bg_colors(self):
+        return np.array([self.bg_r, self.bg_g, self.bg_b])
+    @functools.cached_property
+    def aspect_ratio(self) -> float:
+        """hor/ver"""
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        v2 = l2b - l2a
+        return np.linalg.norm(v2) / np.linalg.norm(v1)
+    @functools.cached_property
+    def font_size(self) -> float:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        v2 = l2b - l2a
+        return min(np.linalg.norm(v2), np.linalg.norm(v1))
+    def width(self) -> int:
+        return self.aabb.w
+    def height(self) -> int:
+        return self.aabb.h
+    @functools.cached_property
+    def xyxy(self):
+        return self.aabb.x, self.aabb.y, self.aabb.x + self.aabb.w, self.aabb.y + self.aabb.h
+    def clip(self, width, height):
+        self.pts[:, 0] = np.clip(np.round(self.pts[:, 0]), 0, width)
+        self.pts[:, 1] = np.clip(np.round(self.pts[:, 1]), 0, height)
+    # @functools.cached_property
+    # def points(self):
+    #     ans = [a.astype(np.float32) for a in self.structure]
+    #     return [Point(a[0], a[1]) for a in ans]
+    @functools.cached_property
+    def aabb(self) -> BBox:
+        kq = self.pts
+        max_coord = np.max(kq, axis = 0)
+        min_coord = np.min(kq, axis = 0)
+        return BBox(min_coord[0], min_coord[1], max_coord[0] - min_coord[0], max_coord[1] - min_coord[1], self.text, self.prob, self.fg_r, self.fg_g, self.fg_b, self.bg_r, self.bg_g, self.bg_b)
+    def get_transformed_region(self, img, direction, textheight) -> np.ndarray:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v_vec = l1b - l1a
+        h_vec = l2b - l2a
+        ratio = np.linalg.norm(v_vec) / np.linalg.norm(h_vec)
+        src_pts = self.pts.astype(np.int64).copy()
+        im_h, im_w = img.shape[:2]
+        x1, y1, x2, y2 = src_pts[:, 0].min(), src_pts[:, 1].min(), src_pts[:, 0].max(), src_pts[:, 1].max()
+        x1 = np.clip(x1, 0, im_w)
+        y1 = np.clip(y1, 0, im_h)
+        x2 = np.clip(x2, 0, im_w)
+        y2 = np.clip(y2, 0, im_h)
+        # cv2.warpPerspective could overflow if image size is too large, better crop it here
+        img_croped = img[y1: y2, x1: x2]
+        src_pts[:, 0] -= x1
+        src_pts[:, 1] -= y1
+        self.assigned_direction = direction
+        if direction == 'h':
+            h = max(int(textheight), 2)
+            w = max(int(round(textheight / ratio)), 2)
+            dst_pts = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]]).astype(np.float32)
+            M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+            region = cv2.warpPerspective(img_croped, M, (w, h))
+            return region
+        elif direction == 'v':
+            w = max(int(textheight), 2)
+            h = max(int(round(textheight * ratio)), 2)
+            dst_pts = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]]).astype(np.float32)
+            M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+            region = cv2.warpPerspective(img_croped, M, (w, h))
+            region = cv2.rotate(region, cv2.ROTATE_90_COUNTERCLOCKWISE)
+            return region
+    @functools.cached_property
+    def is_axis_aligned(self) -> bool:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        v2 = l2b - l2a
+        e1 = np.array([0, 1])
+        e2 = np.array([1, 0])
+        unit_vector_1 = v1 / np.linalg.norm(v1)
+        unit_vector_2 = v2 / np.linalg.norm(v2)
+        if abs(np.dot(unit_vector_1, e1)) < 1e-2 or abs(np.dot(unit_vector_1, e2)) < 1e-2:
+            return True
+        return False
+    @functools.cached_property
+    def is_approximate_axis_aligned(self) -> bool:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        v2 = l2b - l2a
+        e1 = np.array([0, 1])
+        e2 = np.array([1, 0])
+        unit_vector_1 = v1 / np.linalg.norm(v1)
+        unit_vector_2 = v2 / np.linalg.norm(v2)
+        if abs(np.dot(unit_vector_1, e1)) < 0.05 or abs(np.dot(unit_vector_1, e2)) < 0.05 or abs(np.dot(unit_vector_2, e1)) < 0.05 or abs(np.dot(unit_vector_2, e2)) < 0.05:
+            return True
+        return False
+    @functools.cached_property
+    def cosangle(self) -> float:
+        [l1a, l1b, l2a, l2b] = [a.astype(np.float32) for a in self.structure]
+        v1 = l1b - l1a
+        e2 = np.array([1, 0])
+        unit_vector_1 = v1 / np.linalg.norm(v1)
+        return np.dot(unit_vector_1, e2)
+    @functools.cached_property
+    def angle(self) -> float:
+        return np.fmod(np.arccos(self.cosangle) + np.pi, np.pi)
+    @functools.cached_property
+    def centroid(self) -> np.ndarray:
+        return np.average(self.pts, axis = 0)
+    def distance_to_point(self, p: np.ndarray) -> float:
+        d = 1.0e20
+        for i in range(4):
+            d = min(d, distance_point_point(p, self.pts[i]))
+            d = min(d, distance_point_lineseg(p, self.pts[i], self.pts[(i + 1) % 4]))
+        return d
+    @functools.cached_property
+    def polygon(self) -> Polygon:
+        return MultiPoint([tuple(self.pts[0]), tuple(self.pts[1]), tuple(self.pts[2]), tuple(self.pts[3])]).convex_hull
+    @functools.cached_property
+    def area(self) -> float:
+        return self.polygon.area
+    def poly_distance(self, other) -> float:
+        return self.polygon.distance(other.polygon)
+    def distance(self, other, rho = 0.5) -> float:
+        return self.distance_impl(other, rho)# + 1000 * abs(self.angle - other.angle)
+    def distance_impl(self, other, rho = 0.5) -> float:
+        # assert self.assigned_direction == other.assigned_direction
+        #return gjk_distance(self.points, other.points)
+        # b1 = self.aabb
+        # b2 = b2.aabb
+        # x1, y1, w1, h1 = b1.x, b1.y, b1.w, b1.h
+        # x2, y2, w2, h2 = b2.x, b2.y, b2.w, b2.h
+        # return rect_distance(x1, y1, x1 + w1, y1 + h1, x2, y2, x2 + w2, y2 + h2)
+        pattern = ''
+        if self.assigned_direction == 'h':
+            pattern = 'h_left'
+        else:
+            pattern = 'v_top'
+        fs = max(self.font_size, other.font_size)
+        if self.assigned_direction == 'h':
+            poly1 = MultiPoint([tuple(self.pts[0]), tuple(self.pts[3]), tuple(other.pts[0]), tuple(other.pts[3])]).convex_hull
+            poly2 = MultiPoint([tuple(self.pts[2]), tuple(self.pts[1]), tuple(other.pts[2]), tuple(other.pts[1])]).convex_hull
+            poly3 = MultiPoint([
+                tuple(self.structure[0]),
+                tuple(self.structure[1]),
+                tuple(other.structure[0]),
+                tuple(other.structure[1]),
+            ]).convex_hull
+            dist1 = poly1.area / fs
+            dist2 = poly2.area / fs
+            dist3 = poly3.area / fs
+            if dist1 < fs * rho:
+                pattern = 'h_left'
+            if dist2 < fs * rho and dist2 < dist1:
+                pattern = 'h_right'
+            if dist3 < fs * rho and dist3 < dist1 and dist3 < dist2:
+                pattern = 'h_middle'
+            if pattern == 'h_left':
+                return dist(self.pts[0][0], self.pts[0][1], other.pts[0][0], other.pts[0][1])
+            elif pattern == 'h_right':
+                return dist(self.pts[1][0], self.pts[1][1], other.pts[1][0], other.pts[1][1])
+            else:
+                return dist(self.structure[0][0], self.structure[0][1], other.structure[0][0], other.structure[0][1])
+        else:
+            poly1 = MultiPoint([tuple(self.pts[0]), tuple(self.pts[1]), tuple(other.pts[0]), tuple(other.pts[1])]).convex_hull
+            poly2 = MultiPoint([tuple(self.pts[2]), tuple(self.pts[3]), tuple(other.pts[2]), tuple(other.pts[3])]).convex_hull
+            dist1 = poly1.area / fs
+            dist2 = poly2.area / fs
+            if dist1 < fs * rho:
+                pattern = 'v_top'
+            if dist2 < fs * rho and dist2 < dist1:
+                pattern = 'v_bottom'
+            if pattern == 'v_top':
+                return dist(self.pts[0][0], self.pts[0][1], other.pts[0][0], other.pts[0][1])
+            else:
+                return dist(self.pts[2][0], self.pts[2][1], other.pts[2][0], other.pts[2][1])
+    def copy(self, new_pts: np.ndarray):
+        return Quadrilateral(new_pts, self.text, self.prob, *self.fg_colors, *self.bg_colors)
+def sort_pnts(pts: np.ndarray):
+    '''
+    Direction must be provided for sorting.
+    The longer structure vector (mean of long side vectors) of input points is used to determine the direction.
+    It is reliable enough for text lines but not for blocks.
+    '''
+    if isinstance(pts, List):
+        pts = np.array(pts)
+    assert isinstance(pts, np.ndarray) and pts.shape == (4, 2)
+    pairwise_vec = (pts[:, None] - pts[None]).reshape((16, -1))
+    pairwise_vec_norm = np.linalg.norm(pairwise_vec, axis=1)
+    long_side_ids = np.argsort(pairwise_vec_norm)[[8, 10]]
+    long_side_vecs = pairwise_vec[long_side_ids]
+    inner_prod = (long_side_vecs[0] * long_side_vecs[1]).sum()
+    if inner_prod < 0:
+        long_side_vecs[0] = -long_side_vecs[0]
+    struc_vec = np.abs(long_side_vecs.mean(axis=0))
+    is_vertical = struc_vec[0] <= struc_vec[1]
+    if is_vertical:
+        pts = pts[np.argsort(pts[:, 1])]
+        pts = pts[[*np.argsort(pts[:2, 0]), *np.argsort(pts[2:, 0])[::-1] + 2]]
+        return pts, is_vertical
+    else:
+        pts = pts[np.argsort(pts[:, 0])]
+        pts_sorted = np.zeros_like(pts)
+        pts_sorted[[0, 3]] = sorted(pts[[0, 1]], key=lambda x: x[1])
+        pts_sorted[[1, 2]] = sorted(pts[[2, 3]], key=lambda x: x[1])
+        return pts_sorted, is_vertical
+def dist(x1, y1, x2, y2):
+    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
+def distance_point_point(a: np.ndarray, b: np.ndarray) -> float:
+    return np.linalg.norm(a - b)
+# from https://stackoverflow.com/questions/849211/shortest-distance-between-a-point-and-a-line-segment
+def distance_point_lineseg(p: np.ndarray, p1: np.ndarray, p2: np.ndarray):
+    x = p[0]
+    y = p[1]
+    x1 = p1[0]
+    y1 = p1[1]
+    x2 = p2[0]
+    y2 = p2[1]
+    A = x - x1
+    B = y - y1
+    C = x2 - x1
+    D = y2 - y1
+    dot = A * C + B * D
+    len_sq = C * C + D * D
+    param = -1
+    if len_sq != 0:
+        param = dot / len_sq
+    if param < 0:
+        xx = x1
+        yy = y1
+    elif param > 1:
+        xx = x2
+        yy = y2
+    else:
+        xx = x1 + param * C
+        yy = y1 + param * D
+    dx = x - xx
+    dy = y - yy
+    return np.sqrt(dx * dx + dy * dy)
+def quadrilateral_can_merge_region(a: Quadrilateral, b: Quadrilateral, ratio = 1.9, discard_connection_gap = 2, char_gap_tolerance = 0.6, char_gap_tolerance2 = 1.5, font_size_ratio_tol = 1.5, aspect_ratio_tol = 2) -> bool:
+    b1 = a.aabb
+    b2 = b.aabb
+    char_size = min(a.font_size, b.font_size)
+    x1, y1, w1, h1 = b1.x, b1.y, b1.w, b1.h
+    x2, y2, w2, h2 = b2.x, b2.y, b2.w, b2.h
+    # dist = rect_distance(x1, y1, x1 + w1, y1 + h1, x2, y2, x2 + w2, y2 + h2)
+    p1 = Polygon(a.pts)
+    p2 = Polygon(b.pts)
+    dist = p1.distance(p2)
+    if dist > discard_connection_gap * char_size:
+        return False
+    if max(a.font_size, b.font_size) / char_size > font_size_ratio_tol:
+        return False
+    if a.aspect_ratio > aspect_ratio_tol and b.aspect_ratio < 1. / aspect_ratio_tol:
+        return False
+    if b.aspect_ratio > aspect_ratio_tol and a.aspect_ratio < 1. / aspect_ratio_tol:
+        return False
+    a_aa = a.is_approximate_axis_aligned
+    b_aa = b.is_approximate_axis_aligned
+    if a_aa and b_aa:
+        if dist < char_size * char_gap_tolerance:
+            if abs(x1 + w1 // 2 - (x2 + w2 // 2)) < char_gap_tolerance2:
+                return True
+            if w1 > h1 * ratio and h2 > w2 * ratio:
+                return False
+            if w2 > h2 * ratio and h1 > w1 * ratio:
+                return False
+            if w1 > h1 * ratio or w2 > h2 * ratio : # h
+                return abs(x1 - x2) < char_size * char_gap_tolerance2 or abs(x1 + w1 - (x2 + w2)) < char_size * char_gap_tolerance2
+            elif h1 > w1 * ratio or h2 > w2 * ratio : # v
+                return abs(y1 - y2) < char_size * char_gap_tolerance2 or abs(y1 + h1 - (y2 + h2)) < char_size * char_gap_tolerance2
+            return False
+        else:
+            return False
+    if True:#not a_aa and not b_aa:
+        if abs(a.angle - b.angle) < 15 * np.pi / 180:
+            fs_a = a.font_size
+            fs_b = b.font_size
+            fs = min(fs_a, fs_b)
+            if a.poly_distance(b) > fs * char_gap_tolerance2:
+                return False
+            if abs(fs_a - fs_b) / fs > 0.25:
+                return False
+            return True
+    return False
+def quadrilateral_can_merge_region_coarse(a: Quadrilateral, b: Quadrilateral, discard_connection_gap = 2, font_size_ratio_tol = 0.7) -> bool:
+    if a.assigned_direction != b.assigned_direction:
+        return False
+    if abs(a.angle - b.angle) > 15 * np.pi / 180:
+        return False
+    fs_a = a.font_size
+    fs_b = b.font_size
+    fs = min(fs_a, fs_b)
+    if abs(fs_a - fs_b) / fs > font_size_ratio_tol:
+        return False
+    fs = max(fs_a, fs_b)
+    dist = a.poly_distance(b)
+    if dist > discard_connection_gap * fs:
+        return False
+    return True
+def split_text_region(
+        bboxes: List[Quadrilateral],
+        connected_region_indices: Set[int],
+        width,
+        height,
+        gamma = 0.5,
+        sigma = 2
+    ) -> List[Set[int]]:
+    connected_region_indices = list(connected_region_indices)
+    # case 1
+    if len(connected_region_indices) == 1:
+        return [set(connected_region_indices)]
+    # case 2
+    if len(connected_region_indices) == 2:
+        fs1 = bboxes[connected_region_indices[0]].font_size
+        fs2 = bboxes[connected_region_indices[1]].font_size
+        fs = max(fs1, fs2)
+        # print(bboxes[connected_region_indices[0]].pts, bboxes[connected_region_indices[1]].pts)
+        # print(fs, bboxes[connected_region_indices[0]].distance(bboxes[connected_region_indices[1]]), (1 + gamma) * fs)
+        # print(bboxes[connected_region_indices[0]].angle, bboxes[connected_region_indices[1]].angle, 4 * np.pi / 180)
+        if bboxes[connected_region_indices[0]].distance(bboxes[connected_region_indices[1]]) < (1 + gamma) * fs \
+                and abs(bboxes[connected_region_indices[0]].angle - bboxes[connected_region_indices[1]].angle) < 0.2 * np.pi:
+            return [set(connected_region_indices)]
+        else:
+            return [set([connected_region_indices[0]]), set([connected_region_indices[1]])]
+    # case 3
+    G = nx.Graph()
+    for idx in connected_region_indices:
+        G.add_node(idx)
+    for (u, v) in itertools.combinations(connected_region_indices, 2):
+        G.add_edge(u, v, weight=bboxes[u].distance(bboxes[v]))
+    # Get distances from neighbouring bboxes
+    edges = nx.algorithms.tree.minimum_spanning_edges(G, algorithm='kruskal', data=True)
+    edges = sorted(edges, key=lambda a: a[2]['weight'], reverse=True)
+    distances_sorted = [a[2]['weight'] for a in edges]
+    fontsize = np.mean([bboxes[idx].font_size for idx in connected_region_indices])
+    distances_std = np.std(distances_sorted)
+    distances_mean = np.mean(distances_sorted)
+    std_threshold = max(0.3 * fontsize + 5, 5)
+    b1, b2 = bboxes[edges[0][0]], bboxes[edges[0][1]]
+    max_poly_distance = Polygon(b1.pts).distance(Polygon(b2.pts))
+    max_centroid_alignment = min(abs(b1.centroid[0] - b2.centroid[0]), abs(b1.centroid[1] - b2.centroid[1]))
+    # print(edges)
+    # print(f'std: {distances_std} < thrshold: {std_threshold}, mean: {distances_mean}')
+    # print(f'{distances_sorted[0]} <= {distances_mean + distances_std * sigma}' \
+    #         f' or {distances_sorted[0]} <= {fontsize * (1 + gamma)}' \
+    #         f' or {distances_sorted[0] - distances_sorted[1]} < {distances_std * sigma}')
+    if (distances_sorted[0] <= distances_mean + distances_std * sigma \
+            or distances_sorted[0] <= fontsize * (1 + gamma)) \
+            and (distances_std < std_threshold \
+            or max_poly_distance == 0 and max_centroid_alignment < 5):
+        return [set(connected_region_indices)]
+    else:
+        # (split_u, split_v, _) = edges[0]
+        # print(f'split between "{bboxes[split_u].pts}", "{bboxes[split_v].pts}"')
+        G = nx.Graph()
+        for idx in connected_region_indices:
+            G.add_node(idx)
+        # Split out the most deviating bbox
+        for edge in edges[1:]:
+            G.add_edge(edge[0], edge[1])
+        ans = []
+        for node_set in nx.algorithms.components.connected_components(G):
+            ans.extend(split_text_region(bboxes, node_set, width, height))
+        return ans
+def merge_bboxes_text_region(bboxes: List[Quadrilateral], width, height):
+    # step 1: divide into multiple text region candidates
+    G = nx.Graph()
+    for i, box in enumerate(bboxes):
+        G.add_node(i, box=box)
+    for ((u, ubox), (v, vbox)) in itertools.combinations(enumerate(bboxes), 2):
+        # if quadrilateral_can_merge_region_coarse(ubox, vbox):
+        if quadrilateral_can_merge_region(ubox, vbox, aspect_ratio_tol=1.3, font_size_ratio_tol=2,
+                                          char_gap_tolerance=1, char_gap_tolerance2=3):
+            G.add_edge(u, v)
+    # step 2: postprocess - further split each region
+    region_indices: List[Set[int]] = []
+    for node_set in nx.algorithms.components.connected_components(G):
+         region_indices.extend(split_text_region(bboxes, node_set, width, height))
+    # step 3: return regions
+    for node_set in region_indices:
+    # for node_set in nx.algorithms.components.connected_components(G):
+        nodes = list(node_set)
+        txtlns: List[Quadrilateral] = np.array(bboxes)[nodes]
+        # calculate average fg and bg color
+        fg_r = round(np.mean([box.fg_r for box in txtlns]))
+        fg_g = round(np.mean([box.fg_g for box in txtlns]))
+        fg_b = round(np.mean([box.fg_b for box in txtlns]))
+        bg_r = round(np.mean([box.bg_r for box in txtlns]))
+        bg_g = round(np.mean([box.bg_g for box in txtlns]))
+        bg_b = round(np.mean([box.bg_b for box in txtlns]))
+        # majority vote for direction
+        dirs = [box.direction for box in txtlns]
+        majority_dir_top_2 = Counter(dirs).most_common(2)
+        if len(majority_dir_top_2) == 1 :
+            majority_dir = majority_dir_top_2[0][0]
+        elif majority_dir_top_2[0][1] == majority_dir_top_2[1][1] : # if top 2 have the same counts
+            max_aspect_ratio = -100
+            for box in txtlns :
+                if box.aspect_ratio > max_aspect_ratio :
+                    max_aspect_ratio = box.aspect_ratio
+                    majority_dir = box.direction
+                if 1.0 / box.aspect_ratio > max_aspect_ratio :
+                    max_aspect_ratio = 1.0 / box.aspect_ratio
+                    majority_dir = box.direction
+        else :
+            majority_dir = majority_dir_top_2[0][0]
+        # sort textlines
+        if majority_dir == 'h':
+            nodes = sorted(nodes, key=lambda x: bboxes[x].centroid[1])
+        elif majority_dir == 'v':
+            nodes = sorted(nodes, key=lambda x: -bboxes[x].centroid[0])
+        txtlns = np.array(bboxes)[nodes]
+        # yield overall bbox and sorted indices
+        yield txtlns, (fg_r, fg_g, fg_b), (bg_r, bg_g, bg_b)

utils/watermark_utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os.path as osp
+from PIL import Image
+def apply_watermark_to_pil_image(img_pil: Image.Image, watermark_path: str, opacity: float = 0.7) -> Image.Image:
+    """
+    Apply watermark to a PIL image
+    Args:
+        img_pil (Image.Image): Source PIL image
+        watermark_path (str): Path to watermark image
+        opacity (float): Watermark opacity (0.0 - 1.0)
+    Returns:
+        Image.Image: Watermarked PIL image
+    """
+    if not osp.exists(watermark_path):
+        return img_pil
+    try:
+        watermark = Image.open(watermark_path)
+    except Exception:
+        return img_pil
+    # Ensure images are in RGBA mode
+    if img_pil.mode != 'RGBA':
+        img_pil = img_pil.convert('RGBA')
+    if watermark.mode != 'RGBA':
+        watermark = watermark.convert('RGBA')
+    # Fixed watermark size (adjust as needed)
+    WATERMARK_FIXED_WIDTH = 418
+    WATERMARK_FIXED_HEIGHT = 120
+    # Resize watermark
+    watermark = watermark.resize((WATERMARK_FIXED_WIDTH, WATERMARK_FIXED_HEIGHT), Image.LANCZOS)
+    # Apply opacity
+    if opacity < 1.0:
+        alpha = watermark.split()[3]
+        alpha = alpha.point(lambda p: p * opacity)
+        watermark.putalpha(alpha)
+    # Get image dimensions
+    img_width, img_height = img_pil.size
+    # Create transparent layer for watermarks
+    wm_layer = Image.new('RGBA', img_pil.size, (0, 0, 0, 0))
+    # Calculate watermark positions (bottom to top)
+    initial_y = img_height - watermark.height - 10  # 10px from bottom
+    x_position = 10  # 10px from left
+    # Repeat watermark vertically
+    current_y = initial_y
+    while current_y > -watermark.height:
+        if current_y < 0:
+            # Crop watermark if it goes beyond top boundary
+            crop_height = watermark.height + current_y
+            if crop_height > 0:
+                partial_wm = watermark.crop((0, -current_y, watermark.width, watermark.height))
+                wm_layer.paste(partial_wm, (x_position, 0), partial_wm)
+        else:
+            wm_layer.paste(watermark, (x_position, current_y), watermark)
+        current_y -= 8000  # Vertical spacing (adjust as needed)
+    # Composite original image with watermark layer
+    return Image.alpha_composite(img_pil, wm_layer)

utils/zluda_config.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+# 检测是否包含 ZLUDA 标记
+def zluda_available(device_name):
+    return "[ZLUDA]" in device_name
+# 关闭 ZLUDA Cudnn 支持 防止错误
+def enable_zluda_config():
+    if hasattr(torch, 'cuda') and torch.cuda.is_available():
+        device_name = torch.cuda.get_device_name(0)
+        print('Device name: ', device_name)
+        print('Cuda is available: ', torch.cuda.is_available())
+        print('Cuda version: ', torch.version.cuda)
+        print('ZLUDA is available: ', zluda_available(device_name))
+        if zluda_available(device_name):
+            torch.backends.cudnn.enabled = False
+            cuda_attr = torch.backends.cuda
+            if hasattr(cuda_attr, 'enable_flash_sdp'):
+                torch.backends.cuda.enable_flash_sdp(False)
+                print('Cuda enable flash sdp: ', False)
+            if hasattr(cuda_attr, 'enable_math_sdp'):
+                torch.backends.cuda.enable_math_sdp(True)
+                print('Cuda enable math sdp: ', True)
+            if hasattr(cuda_attr, 'enable_mem_efficient_sdp'):
+                torch.backends.cuda.enable_mem_efficient_sdp(False)
+                print('Cuda enable mem efficient sdp: ', False)
+            if hasattr(cuda_attr, 'enable_cudnn_sdp'):
+                torch.backends.cuda.enable_cudnn_sdp(False)
+                print('Cuda enable cudnn sdp: ', False)