Spaces:
Running
Running
File size: 1,761 Bytes
be754f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | from collections import OrderedDict
import gc
import logging
import threading
from paddleocr import PaddleOCR
from ocr_config import (
LANG_MAP,
MAX_LOADED_LANGS,
configure_model_environment,
normalize_lang,
)
logger = logging.getLogger(__name__)
_ocr_engines = OrderedDict()
_ocr_lock = threading.Lock()
def get_ocr(raw_lang: str | None = "kr"):
lang = normalize_lang(raw_lang)
with _ocr_lock:
if lang in _ocr_engines:
_ocr_engines.move_to_end(lang)
return _ocr_engines[lang]
try:
logger.info("Initializing PaddleOCR for lang=%s...", lang)
configure_model_environment()
ocr_engine = PaddleOCR(
lang=LANG_MAP[lang],
use_angle_cls=True,
use_gpu=False,
show_log=False,
use_mp=False,
enable_mkldnn=False,
)
_ocr_engines[lang] = ocr_engine
_ocr_engines.move_to_end(lang)
_evict_old_engines()
logger.info("PaddleOCR initialized successfully for lang=%s.", lang)
return _ocr_engines[lang]
except Exception:
logger.exception("PaddleOCR initialization failed for lang=%s.", lang)
raise
def _evict_old_engines():
while MAX_LOADED_LANGS > 0 and len(_ocr_engines) > MAX_LOADED_LANGS:
removed_lang, _ = _ocr_engines.popitem(last=False)
logger.info("Unloaded PaddleOCR lang=%s by LRU policy.", removed_lang)
gc.collect()
def loaded_langs():
return list(_ocr_engines.keys())
def supported_langs():
return list(LANG_MAP.keys())
def is_lang_loaded(raw_lang: str | None):
return normalize_lang(raw_lang) in _ocr_engines
|