Harry Pham commited on
Commit
d2abad3
·
1 Parent(s): d80899e

update OCR

Browse files
Files changed (1) hide show
  1. src/inference.py +22 -14
src/inference.py CHANGED
@@ -35,13 +35,23 @@ def get_det_model(checkpoint="best.pt"):
35
  _det_model = RTDETR(checkpoint)
36
  return _det_model
37
 
38
- # Thêm Surya OCR làm engine thứ 3
39
- from surya.ocr import run_ocr
40
- from surya.model.detection.model import load_det_processor, load_det_model
41
- from surya.model.recognition.model import load_rec_model
42
- from surya.model.recognition.processor import load_rec_processor
 
 
 
 
 
 
 
 
43
 
44
  def ocr_with_surya(img_bgr, langs=["vi", "en"]):
 
 
45
  det_processor, det_model = load_det_processor(), load_det_model()
46
  rec_model, rec_processor = load_rec_model(), load_rec_processor()
47
  from PIL import Image
@@ -256,16 +266,14 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
256
  # DUAL-ENGINE OCR — PaddleOCR (vi) + PaddleOCR (en), chọn tốt hơn
257
  # ============================================================
258
  def run_ocr_with_backend(img_bgr, backend="paddle", ocr_type="note"):
259
- """
260
- Chạy OCR với backend được chọn.
261
- backend: "paddle", "easyocr", "surya"
262
- Trả về (list_of_texts, avg_confidence) - với surya, confidence luôn = 1.0
263
- """
264
  if backend == "surya":
265
- text = ocr_with_surya(img_bgr, langs=["vi", "en"])
266
- lines = [line.strip() for line in text.split("\n") if line.strip()]
267
- return lines, 1.0 # Surya không trả confidence, coi như 1.0
268
-
 
 
 
269
  # logic cũ cho paddle + easyocr
270
  reader_vi = get_paddle_reader('vi') if backend == "paddle" else None
271
  reader_en = get_paddle_reader('en') if backend == "paddle" else None
 
35
  _det_model = RTDETR(checkpoint)
36
  return _det_model
37
 
38
+ # ============================================================
39
+ # SURYA OCR (optional)
40
+ # ============================================================
41
+ SURYA_AVAILABLE = False
42
+ try:
43
+ from surya.ocr import run_ocr
44
+ from surya.model.detection.model import load_det_processor, load_det_model
45
+ from surya.model.recognition.model import load_rec_model
46
+ from surya.model.recognition.processor import load_rec_processor
47
+ SURYA_AVAILABLE = True
48
+ print("[INFO] Surya OCR is available")
49
+ except ImportError:
50
+ print("[WARN] Surya OCR not installed. Install with: pip install surya-ocr")
51
 
52
  def ocr_with_surya(img_bgr, langs=["vi", "en"]):
53
+ if not SURYA_AVAILABLE:
54
+ raise ImportError("Surya OCR is not installed. Please run: pip install surya-ocr")
55
  det_processor, det_model = load_det_processor(), load_det_model()
56
  rec_model, rec_processor = load_rec_model(), load_rec_processor()
57
  from PIL import Image
 
266
  # DUAL-ENGINE OCR — PaddleOCR (vi) + PaddleOCR (en), chọn tốt hơn
267
  # ============================================================
268
  def run_ocr_with_backend(img_bgr, backend="paddle", ocr_type="note"):
 
 
 
 
 
269
  if backend == "surya":
270
+ if not SURYA_AVAILABLE:
271
+ print("[WARN] Surya not available, falling back to paddle")
272
+ backend = "paddle"
273
+ else:
274
+ text = ocr_with_surya(img_bgr, langs=["vi", "en"])
275
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
276
+ return lines, 1.0
277
  # logic cũ cho paddle + easyocr
278
  reader_vi = get_paddle_reader('vi') if backend == "paddle" else None
279
  reader_en = get_paddle_reader('en') if backend == "paddle" else None