Devashishraghav commited on
Commit
22388fc
·
verified ·
1 Parent(s): 821a664

Upload processor/redaction.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processor/redaction.py +42 -0
processor/redaction.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import easyocr
3
+ import numpy as np
4
+
5
+ # Initialize the EasyOCR reader (loads model on first run)
6
+ reader = easyocr.Reader(['en'], gpu=False) # Set gpu=True if applicable
7
+
8
+ def redact_names(img: np.ndarray) -> np.ndarray:
9
+ """
10
+ Scans the image for text. If it finds text, we apply cv2.inpaint
11
+ to seamlessly remove the detected text, mimicking "redaction" without a flat background block.
12
+ """
13
+ # Create an initial mask of zeros
14
+ mask = np.zeros(img.shape[:2], dtype=np.uint8)
15
+
16
+ h, w = img.shape[:2]
17
+
18
+ # Run OCR on the image
19
+ results = reader.readtext(img)
20
+
21
+ has_text = False
22
+ for (bbox, text, prob) in results:
23
+ # Bounding box points: top_left, top_right, bottom_right, bottom_left
24
+ (tl, tr, br, bl) = bbox
25
+ tl = (int(tl[0]), int(tl[1]))
26
+ br = (int(br[0]), int(br[1]))
27
+ tr = (int(tr[0]), int(tr[1]))
28
+
29
+ # Heuristic: Redact text only if it's in the typical name region
30
+ # e.g., Bottom 35% of the card, Left 65% of the card
31
+ if tl[1] > h * 0.65 and tr[0] < w * 0.65:
32
+ has_text = True
33
+ pad = 8 # Slightly wider pad for clean redaction
34
+ cv2.rectangle(mask, (max(0, tl[0]-pad), max(0, tl[1]-pad)), (min(w, br[0]+pad), min(h, br[1]+pad)), 255, -1)
35
+
36
+ if has_text:
37
+ # Inpaint to remove the text seamlessly using the nearby background
38
+ # Use cv2.INPAINT_TELEA or cv2.INPAINT_NS
39
+ result = cv2.inpaint(img, mask, inpaintRadius=7, flags=cv2.INPAINT_TELEA)
40
+ return result
41
+
42
+ return img