Spaces:
Running
Running
| from collections import OrderedDict | |
| import gc | |
| import logging | |
| import threading | |
| from paddleocr import PaddleOCR | |
| from ocr_config import ( | |
| LANG_MAP, | |
| MAX_LOADED_LANGS, | |
| configure_model_environment, | |
| normalize_lang, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| _ocr_engines = OrderedDict() | |
| _ocr_lock = threading.Lock() | |
| def get_ocr(raw_lang: str | None = "kr"): | |
| lang = normalize_lang(raw_lang) | |
| with _ocr_lock: | |
| if lang in _ocr_engines: | |
| _ocr_engines.move_to_end(lang) | |
| return _ocr_engines[lang] | |
| try: | |
| logger.info("Initializing PaddleOCR for lang=%s...", lang) | |
| configure_model_environment() | |
| ocr_engine = PaddleOCR( | |
| lang=LANG_MAP[lang], | |
| use_angle_cls=True, | |
| use_gpu=False, | |
| show_log=False, | |
| use_mp=False, | |
| enable_mkldnn=False, | |
| ) | |
| _ocr_engines[lang] = ocr_engine | |
| _ocr_engines.move_to_end(lang) | |
| _evict_old_engines() | |
| logger.info("PaddleOCR initialized successfully for lang=%s.", lang) | |
| return _ocr_engines[lang] | |
| except Exception: | |
| logger.exception("PaddleOCR initialization failed for lang=%s.", lang) | |
| raise | |
| def _evict_old_engines(): | |
| while MAX_LOADED_LANGS > 0 and len(_ocr_engines) > MAX_LOADED_LANGS: | |
| removed_lang, _ = _ocr_engines.popitem(last=False) | |
| logger.info("Unloaded PaddleOCR lang=%s by LRU policy.", removed_lang) | |
| gc.collect() | |
| def loaded_langs(): | |
| return list(_ocr_engines.keys()) | |
| def supported_langs(): | |
| return list(LANG_MAP.keys()) | |
| def is_lang_loaded(raw_lang: str | None): | |
| return normalize_lang(raw_lang) in _ocr_engines | |