mastari commited on
Commit
4ea7620
·
1 Parent(s): bd16110
Files changed (1) hide show
  1. handler.py +71 -34
handler.py CHANGED
@@ -1,12 +1,14 @@
1
  import base64, cv2, numpy as np, importlib.util
2
  from typing import Dict, Any
3
 
 
4
  class EndpointHandler:
5
  """
6
  Robust hybrid text-removal handler:
7
- - Uses EasyOCR (pixel-level) if available
8
- - Falls back to EAST detector otherwise
9
- - Expands & merges masks for full caption coverage
 
10
  """
11
 
12
  def __init__(self, path: str = ""):
@@ -22,6 +24,7 @@ class EndpointHandler:
22
  self.use_easyocr = False
23
  print(f"[INIT] Using EAST model from {model_path}")
24
 
 
25
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
26
  inputs = data.get("inputs", data)
27
  image_b64 = inputs.get("image")
@@ -32,6 +35,7 @@ class EndpointHandler:
32
  mask = self._make_mask(img)
33
  cleaned = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)
34
 
 
35
  vis = img.copy()
36
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
37
  cv2.drawContours(vis, contours, -1, (0, 0, 255), 2)
@@ -41,6 +45,7 @@ class EndpointHandler:
41
  "cleaned_image": self._encode_image(cleaned),
42
  }
43
 
 
44
  def _decode_image(self, b64):
45
  data = base64.b64decode(b64)
46
  np_arr = np.frombuffer(data, np.uint8)
@@ -50,68 +55,100 @@ class EndpointHandler:
50
  _, buf = cv2.imencode(".png", im)
51
  return base64.b64encode(buf).decode("utf-8")
52
 
 
53
  def _make_mask(self, img):
54
  mask = np.zeros(img.shape[:2], np.uint8)
 
55
  if self.use_easyocr:
56
  results = self.reader.readtext(img)
57
- for (_, box, _) in results:
58
- pts = np.array(box, np.int32)
59
- cv2.fillPoly(mask, [pts], 255)
 
 
 
 
60
  else:
61
  boxes = self._east_boxes(img)
62
  for (x0, y0, x1, y1) in boxes:
63
  pad = 8
64
- cv2.rectangle(mask, (max(0, x0-pad), max(0, y0-pad)),
65
- (min(img.shape[1], x1+pad), min(img.shape[0], y1+pad)),
66
- 255, -1)
 
 
 
 
 
 
 
67
 
68
- kernel = np.ones((9,9), np.uint8)
 
69
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
70
  mask = cv2.dilate(mask, kernel, iterations=2)
71
 
72
- # catch bright white backgrounds behind text
73
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
74
  bg = cv2.inRange(gray, 180, 255)
75
  mask = cv2.bitwise_or(mask, bg)
76
-
77
  return mask
78
 
 
79
  def _east_boxes(self, image, conf_threshold=0.5):
80
  h, w = image.shape[:2]
81
  new_w, new_h = 320, 320
82
- r_w, r_h = w/new_w, h/new_h
83
- blob = cv2.dnn.blobFromImage(image, 1.0, (new_w,new_h),
84
- (123.68,116.78,103.94), swapRB=True, crop=False)
 
 
 
 
 
 
85
  self.net.setInput(blob)
86
  scores, geometry = self.net.forward(
87
  ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
88
  )
89
  rects, confidences = self._decode(scores, geometry, conf_threshold)
90
  indices = cv2.dnn.NMSBoxes(rects, confidences, conf_threshold, 0.4)
91
- boxes=[]
92
- if len(indices)>0:
93
  for i in indices.flatten():
94
- x0,y0,x1,y1=rects[i]
95
- boxes.append([max(0,int(x0*r_w)),max(0,int(y0*r_h)),
96
- min(w,int(x1*r_w)),min(h,int(y1*r_h))])
 
 
 
 
 
 
97
  return boxes
98
 
99
  def _decode(self, scores, geometry, conf_threshold):
100
- num_rows,num_cols=scores.shape[2:4]
101
- rects,confidences=[],[]
102
  for y in range(num_rows):
103
- scores_data=scores[0,0,y]
104
- x0=geometry[0,0,y];x1=geometry[0,1,y];x2=geometry[0,2,y];x3=geometry[0,3,y]
105
- angles=geometry[0,4,y]
 
 
 
106
  for x in range(num_cols):
107
- if scores_data[x]<conf_threshold: continue
108
- offset_x,offset_y=x*4.0,y*4.0
109
- angle=angles[x];cos,sin=np.cos(angle),np.sin(angle)
110
- h_=x0[x]+x2[x];w_=x1[x]+x3[x]
111
- end_x=int(offset_x+cos*x1[x]+sin*x2[x])
112
- end_y=int(offset_y-sin*x1[x]+cos*x2[x])
113
- start_x=int(end_x-w_);start_y=int(end_y-h_)
114
- rects.append((start_x,start_y,end_x,end_y))
 
 
 
 
115
  confidences.append(float(scores_data[x]))
116
- return rects,confidences
117
 
 
1
  import base64, cv2, numpy as np, importlib.util
2
  from typing import Dict, Any
3
 
4
+
5
  class EndpointHandler:
6
  """
7
  Robust hybrid text-removal handler:
8
+ Uses EasyOCR (pixel-level) if available
9
+ Falls back to EAST detector otherwise
10
+ Expands & merges masks for full caption coverage
11
+ • Returns both mask overlay and inpainted (cleaned) image
12
  """
13
 
14
  def __init__(self, path: str = ""):
 
24
  self.use_easyocr = False
25
  print(f"[INIT] Using EAST model from {model_path}")
26
 
27
+ # ----------------------------- INFERENCE -----------------------------
28
  def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
29
  inputs = data.get("inputs", data)
30
  image_b64 = inputs.get("image")
 
35
  mask = self._make_mask(img)
36
  cleaned = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)
37
 
38
+ # visualize mask overlay
39
  vis = img.copy()
40
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
41
  cv2.drawContours(vis, contours, -1, (0, 0, 255), 2)
 
45
  "cleaned_image": self._encode_image(cleaned),
46
  }
47
 
48
+ # ----------------------------- UTILITIES -----------------------------
49
  def _decode_image(self, b64):
50
  data = base64.b64decode(b64)
51
  np_arr = np.frombuffer(data, np.uint8)
 
55
  _, buf = cv2.imencode(".png", im)
56
  return base64.b64encode(buf).decode("utf-8")
57
 
58
+ # ----------------------------- MASK CREATION -----------------------------
59
  def _make_mask(self, img):
60
  mask = np.zeros(img.shape[:2], np.uint8)
61
+
62
  if self.use_easyocr:
63
  results = self.reader.readtext(img)
64
+ for det in results:
65
+ try:
66
+ box, _, _ = det # <-- fixed unpack order
67
+ pts = np.array(box, np.int32)
68
+ cv2.fillPoly(mask, [pts], 255)
69
+ except Exception as e:
70
+ print(f"[WARN] Skipped invalid detection: {e}")
71
  else:
72
  boxes = self._east_boxes(img)
73
  for (x0, y0, x1, y1) in boxes:
74
  pad = 8
75
+ cv2.rectangle(
76
+ mask,
77
+ (max(0, x0 - pad), max(0, y0 - pad)),
78
+ (
79
+ min(img.shape[1], x1 + pad),
80
+ min(img.shape[0], y1 + pad),
81
+ ),
82
+ 255,
83
+ -1,
84
+ )
85
 
86
+ # Merge, dilate, and add bright backgrounds
87
+ kernel = np.ones((9, 9), np.uint8)
88
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
89
  mask = cv2.dilate(mask, kernel, iterations=2)
90
 
 
91
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
92
  bg = cv2.inRange(gray, 180, 255)
93
  mask = cv2.bitwise_or(mask, bg)
 
94
  return mask
95
 
96
+ # ----------------------------- EAST FALLBACK -----------------------------
97
  def _east_boxes(self, image, conf_threshold=0.5):
98
  h, w = image.shape[:2]
99
  new_w, new_h = 320, 320
100
+ r_w, r_h = w / new_w, h / new_h
101
+ blob = cv2.dnn.blobFromImage(
102
+ image,
103
+ 1.0,
104
+ (new_w, new_h),
105
+ (123.68, 116.78, 103.94),
106
+ swapRB=True,
107
+ crop=False,
108
+ )
109
  self.net.setInput(blob)
110
  scores, geometry = self.net.forward(
111
  ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
112
  )
113
  rects, confidences = self._decode(scores, geometry, conf_threshold)
114
  indices = cv2.dnn.NMSBoxes(rects, confidences, conf_threshold, 0.4)
115
+ boxes = []
116
+ if len(indices) > 0:
117
  for i in indices.flatten():
118
+ x0, y0, x1, y1 = rects[i]
119
+ boxes.append(
120
+ [
121
+ max(0, int(x0 * r_w)),
122
+ max(0, int(y0 * r_h)),
123
+ min(w, int(x1 * r_w)),
124
+ min(h, int(y1 * r_h)),
125
+ ]
126
+ )
127
  return boxes
128
 
129
  def _decode(self, scores, geometry, conf_threshold):
130
+ num_rows, num_cols = scores.shape[2:4]
131
+ rects, confidences = [], []
132
  for y in range(num_rows):
133
+ scores_data = scores[0, 0, y]
134
+ x0 = geometry[0, 0, y]
135
+ x1 = geometry[0, 1, y]
136
+ x2 = geometry[0, 2, y]
137
+ x3 = geometry[0, 3, y]
138
+ angles = geometry[0, 4, y]
139
  for x in range(num_cols):
140
+ if scores_data[x] < conf_threshold:
141
+ continue
142
+ offset_x, offset_y = x * 4.0, y * 4.0
143
+ angle = angles[x]
144
+ cos, sin = np.cos(angle), np.sin(angle)
145
+ h_ = x0[x] + x2[x]
146
+ w_ = x1[x] + x3[x]
147
+ end_x = int(offset_x + cos * x1[x] + sin * x2[x])
148
+ end_y = int(offset_y - sin * x1[x] + cos * x2[x])
149
+ start_x = int(end_x - w_)
150
+ start_y = int(end_y - h_)
151
+ rects.append((start_x, start_y, end_x, end_y))
152
  confidences.append(float(scores_data[x]))
153
+ return rects, confidences
154