Spaces:
Sleeping
Sleeping
Harry Pham commited on
Commit ·
d2abad3
1
Parent(s): d80899e
update OCR
Browse files- src/inference.py +22 -14
src/inference.py
CHANGED
|
@@ -35,13 +35,23 @@ def get_det_model(checkpoint="best.pt"):
|
|
| 35 |
_det_model = RTDETR(checkpoint)
|
| 36 |
return _det_model
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def ocr_with_surya(img_bgr, langs=["vi", "en"]):
|
|
|
|
|
|
|
| 45 |
det_processor, det_model = load_det_processor(), load_det_model()
|
| 46 |
rec_model, rec_processor = load_rec_model(), load_rec_processor()
|
| 47 |
from PIL import Image
|
|
@@ -256,16 +266,14 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
|
|
| 256 |
# DUAL-ENGINE OCR — PaddleOCR (vi) + PaddleOCR (en), chọn tốt hơn
|
| 257 |
# ============================================================
|
| 258 |
def run_ocr_with_backend(img_bgr, backend="paddle", ocr_type="note"):
|
| 259 |
-
"""
|
| 260 |
-
Chạy OCR với backend được chọn.
|
| 261 |
-
backend: "paddle", "easyocr", "surya"
|
| 262 |
-
Trả về (list_of_texts, avg_confidence) - với surya, confidence luôn = 1.0
|
| 263 |
-
"""
|
| 264 |
if backend == "surya":
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
| 269 |
# logic cũ cho paddle + easyocr
|
| 270 |
reader_vi = get_paddle_reader('vi') if backend == "paddle" else None
|
| 271 |
reader_en = get_paddle_reader('en') if backend == "paddle" else None
|
|
|
|
| 35 |
_det_model = RTDETR(checkpoint)
|
| 36 |
return _det_model
|
| 37 |
|
| 38 |
+
# ============================================================
|
| 39 |
+
# SURYA OCR (optional)
|
| 40 |
+
# ============================================================
|
| 41 |
+
SURYA_AVAILABLE = False
|
| 42 |
+
try:
|
| 43 |
+
from surya.ocr import run_ocr
|
| 44 |
+
from surya.model.detection.model import load_det_processor, load_det_model
|
| 45 |
+
from surya.model.recognition.model import load_rec_model
|
| 46 |
+
from surya.model.recognition.processor import load_rec_processor
|
| 47 |
+
SURYA_AVAILABLE = True
|
| 48 |
+
print("[INFO] Surya OCR is available")
|
| 49 |
+
except ImportError:
|
| 50 |
+
print("[WARN] Surya OCR not installed. Install with: pip install surya-ocr")
|
| 51 |
|
| 52 |
def ocr_with_surya(img_bgr, langs=["vi", "en"]):
|
| 53 |
+
if not SURYA_AVAILABLE:
|
| 54 |
+
raise ImportError("Surya OCR is not installed. Please run: pip install surya-ocr")
|
| 55 |
det_processor, det_model = load_det_processor(), load_det_model()
|
| 56 |
rec_model, rec_processor = load_rec_model(), load_rec_processor()
|
| 57 |
from PIL import Image
|
|
|
|
| 266 |
# DUAL-ENGINE OCR — PaddleOCR (vi) + PaddleOCR (en), chọn tốt hơn
|
| 267 |
# ============================================================
|
| 268 |
def run_ocr_with_backend(img_bgr, backend="paddle", ocr_type="note"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
if backend == "surya":
|
| 270 |
+
if not SURYA_AVAILABLE:
|
| 271 |
+
print("[WARN] Surya not available, falling back to paddle")
|
| 272 |
+
backend = "paddle"
|
| 273 |
+
else:
|
| 274 |
+
text = ocr_with_surya(img_bgr, langs=["vi", "en"])
|
| 275 |
+
lines = [line.strip() for line in text.split("\n") if line.strip()]
|
| 276 |
+
return lines, 1.0
|
| 277 |
# logic cũ cho paddle + easyocr
|
| 278 |
reader_vi = get_paddle_reader('vi') if backend == "paddle" else None
|
| 279 |
reader_en = get_paddle_reader('en') if backend == "paddle" else None
|