LLDDWW commited on
Commit
26e52a1
ยท
1 Parent(s): d6bf509

Configure PaddleOCR to use Korean PP-OCRv5

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -9,7 +9,7 @@ import spaces
9
  import torch
10
  from PIL import Image
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
- from huggingface_hub import login
13
  from paddleocr import PaddleOCR
14
 
15
  # Hugging Face ํ† ํฐ์œผ๋กœ ๋กœ๊ทธ์ธ (Spaces Secret์—์„œ ๊ฐ€์ ธ์˜ด)
@@ -24,14 +24,30 @@ MED_MODEL_ID = "google/gemma-2-2b-it"
24
  OCR_READER = None
25
  MED_MODEL = None
26
  MED_TOKENIZER = None
 
27
 
28
  def load_models():
29
  """๋ชจ๋ธ๋“ค์„ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ"""
30
  global OCR_READER, MED_MODEL, MED_TOKENIZER
31
 
32
  if OCR_READER is None:
33
- print("๐Ÿ”„ Loading PaddleOCR (Korean)...")
34
- OCR_READER = PaddleOCR(lang='korean', use_angle_cls=True, show_log=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  print("โœ… PaddleOCR loaded!")
36
 
37
  if MED_MODEL is None:
 
9
  import torch
10
  from PIL import Image
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
+ from huggingface_hub import login, snapshot_download
13
  from paddleocr import PaddleOCR
14
 
15
  # Hugging Face ํ† ํฐ์œผ๋กœ ๋กœ๊ทธ์ธ (Spaces Secret์—์„œ ๊ฐ€์ ธ์˜ด)
 
24
  OCR_READER = None
25
  MED_MODEL = None
26
  MED_TOKENIZER = None
27
+ OCR_MODEL_REPO_ID = "PaddlePaddle/korean_PP-OCRv5_mobile_rec"
28
 
29
  def load_models():
30
  """๋ชจ๋ธ๋“ค์„ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ"""
31
  global OCR_READER, MED_MODEL, MED_TOKENIZER
32
 
33
  if OCR_READER is None:
34
+ print("๐Ÿ”„ Loading PaddleOCR (Korean PP-OCRv5 mobile recognition)...")
35
+ rec_model_dir = snapshot_download(
36
+ OCR_MODEL_REPO_ID,
37
+ allow_patterns=[
38
+ "*.pdmodel",
39
+ "*.pdiparams",
40
+ "*.pdparams",
41
+ "*.json",
42
+ "*.yml",
43
+ ],
44
+ )
45
+ OCR_READER = PaddleOCR(
46
+ lang='korean',
47
+ use_angle_cls=True,
48
+ show_log=False,
49
+ rec_model_dir=rec_model_dir,
50
+ )
51
  print("โœ… PaddleOCR loaded!")
52
 
53
  if MED_MODEL is None: