Harry Pham commited on
Commit
cb8c63c
·
1 Parent(s): 99bd42a

update OCR

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. src/inference.py +37 -2
requirements.txt CHANGED
@@ -11,4 +11,4 @@ huggingface_hub
11
  timm
12
  surya-ocr
13
  vietocr
14
-
 
11
  timm
12
  surya-ocr
13
  vietocr
14
+ realesrgan
src/inference.py CHANGED
@@ -28,6 +28,41 @@ _ocr_paddle_en = None
28
  _ocr_easyocr = None
29
  _ocr_vietocr = None
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # ============================================================
32
  # DOMAIN DICTIONARY — Từ điển bản vẽ kỹ thuật Việt Nam
33
  # ============================================================
@@ -800,10 +835,10 @@ def detect_table_structure(img_bgr):
800
  # ============================================================
801
 
802
  def ocr_cell_improved(img_cell, backend="paddle"):
803
- """OCR a single table cell with multiple attempts."""
804
  if img_cell is None or img_cell.size == 0:
805
  return ""
806
-
 
807
  h, w = img_cell.shape[:2]
808
  if h < 5 or w < 5:
809
  return ""
 
28
  _ocr_easyocr = None
29
  _ocr_vietocr = None
30
 
31
+
32
+ # Real-ESRGAN upscaler (optional)
33
+ REALESRGAN_AVAILABLE = False
34
+ try:
35
+ from realesrgan import RealESRGANer
36
+ from basicsr.archs.rrdbnet_arch import RRDBNet
37
+ REALESRGAN_AVAILABLE = True
38
+ print("[INFO] Real-ESRGAN available")
39
+ except ImportError:
40
+ print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
41
+
42
+ def get_esrgan_upsampler():
43
+ if not REALESRGAN_AVAILABLE:
44
+ return None
45
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
46
+ upsampler = RealESRGANer(
47
+ scale=4,
48
+ model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
49
+ model=model,
50
+ device=DEVICE
51
+ )
52
+ return upsampler
53
+
54
+ def upscale_if_needed(img_bgr, min_dim=300):
55
+ """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
56
+ h, w = img_bgr.shape[:2]
57
+ if h < min_dim or w < min_dim:
58
+ upsampler = get_esrgan_upsampler()
59
+ if upsampler is not None:
60
+ try:
61
+ output, _ = upsampler.enhance(img_bgr, outscale=2)
62
+ return output
63
+ except Exception as e:
64
+ print(f"[WARN] ESRGAN upscale failed: {e}")
65
+ return img_bgr
66
  # ============================================================
67
  # DOMAIN DICTIONARY — Từ điển bản vẽ kỹ thuật Việt Nam
68
  # ============================================================
 
835
  # ============================================================
836
 
837
  def ocr_cell_improved(img_cell, backend="paddle"):
 
838
  if img_cell is None or img_cell.size == 0:
839
  return ""
840
+ # Upscale very small cells with ESRGAN
841
+ img_cell = upscale_if_needed(img_cell, min_dim=150)
842
  h, w = img_cell.shape[:2]
843
  if h < 5 or w < 5:
844
  return ""