| import easyocr |
| import os |
|
|
| MODEL_DIR = "./models" |
|
|
| LANG_GROUPS = { |
| "latin": ["en", "fr", "de", "es", "it", "tr"], |
| "arabic": ["ar", "fa", "ur"], |
| "cyrillic": ["ru", "uk"], |
| "east_asia": ["ch_sim", "ja", "ko"] |
| } |
|
|
|
|
| class OCRCore: |
| def __init__(self): |
| self.cache = {} |
|
|
| def detect_group(self, lang): |
| for group, langs in LANG_GROUPS.items(): |
| if lang in langs: |
| return group |
| return "latin" |
|
|
| def get_reader(self, lang): |
| group = self.detect_group(lang) |
|
|
| if group in self.cache: |
| return self.cache[group] |
|
|
| reader = easyocr.Reader( |
| [lang], |
| model_storage_directory=MODEL_DIR, |
| download_enabled=False |
| ) |
|
|
| self.cache[group] = reader |
| return reader |
|
|
| def run(self, image, lang): |
| reader = self.get_reader(lang) |
| result = reader.readtext(image) |
| return " ".join([r[1] for r in result]) |