Configure PaddleOCR to use Korean PP-OCRv5
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ import spaces
|
|
| 9 |
import torch
|
| 10 |
from PIL import Image
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
-
from huggingface_hub import login
|
| 13 |
from paddleocr import PaddleOCR
|
| 14 |
|
| 15 |
# Hugging Face ํ ํฐ์ผ๋ก ๋ก๊ทธ์ธ (Spaces Secret์์ ๊ฐ์ ธ์ด)
|
|
@@ -24,14 +24,30 @@ MED_MODEL_ID = "google/gemma-2-2b-it"
|
|
| 24 |
OCR_READER = None
|
| 25 |
MED_MODEL = None
|
| 26 |
MED_TOKENIZER = None
|
|
|
|
| 27 |
|
| 28 |
def load_models():
|
| 29 |
"""๋ชจ๋ธ๋ค์ ํ ๋ฒ๋ง ๋ก๋"""
|
| 30 |
global OCR_READER, MED_MODEL, MED_TOKENIZER
|
| 31 |
|
| 32 |
if OCR_READER is None:
|
| 33 |
-
print("๐ Loading PaddleOCR (Korean)...")
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
print("โ
PaddleOCR loaded!")
|
| 36 |
|
| 37 |
if MED_MODEL is None:
|
|
|
|
| 9 |
import torch
|
| 10 |
from PIL import Image
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
+
from huggingface_hub import login, snapshot_download
|
| 13 |
from paddleocr import PaddleOCR
|
| 14 |
|
| 15 |
# Hugging Face ํ ํฐ์ผ๋ก ๋ก๊ทธ์ธ (Spaces Secret์์ ๊ฐ์ ธ์ด)
|
|
|
|
| 24 |
OCR_READER = None
|
| 25 |
MED_MODEL = None
|
| 26 |
MED_TOKENIZER = None
|
| 27 |
+
OCR_MODEL_REPO_ID = "PaddlePaddle/korean_PP-OCRv5_mobile_rec"
|
| 28 |
|
| 29 |
def load_models():
|
| 30 |
"""๋ชจ๋ธ๋ค์ ํ ๋ฒ๋ง ๋ก๋"""
|
| 31 |
global OCR_READER, MED_MODEL, MED_TOKENIZER
|
| 32 |
|
| 33 |
if OCR_READER is None:
|
| 34 |
+
print("๐ Loading PaddleOCR (Korean PP-OCRv5 mobile recognition)...")
|
| 35 |
+
rec_model_dir = snapshot_download(
|
| 36 |
+
OCR_MODEL_REPO_ID,
|
| 37 |
+
allow_patterns=[
|
| 38 |
+
"*.pdmodel",
|
| 39 |
+
"*.pdiparams",
|
| 40 |
+
"*.pdparams",
|
| 41 |
+
"*.json",
|
| 42 |
+
"*.yml",
|
| 43 |
+
],
|
| 44 |
+
)
|
| 45 |
+
OCR_READER = PaddleOCR(
|
| 46 |
+
lang='korean',
|
| 47 |
+
use_angle_cls=True,
|
| 48 |
+
show_log=False,
|
| 49 |
+
rec_model_dir=rec_model_dir,
|
| 50 |
+
)
|
| 51 |
print("โ
PaddleOCR loaded!")
|
| 52 |
|
| 53 |
if MED_MODEL is None:
|