mastari
/

text-removal-endpoint

Model card Files Files and versions

xet

Community

mastari commited on Nov 5, 2025

Commit

4ea7620

1 Parent(s): bd16110

again

Browse files

Files changed (1) hide show

handler.py +71 -34

handler.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import base64, cv2, numpy as np, importlib.util
 from typing import Dict, Any
 class EndpointHandler:
     """
     Robust hybrid text-removal handler:
-      - Uses EasyOCR (pixel-level) if available
-      - Falls back to EAST detector otherwise
-      - Expands & merges masks for full caption coverage
     """
     def __init__(self, path: str = ""):
@@ -22,6 +24,7 @@ class EndpointHandler:
             self.use_easyocr = False
             print(f"[INIT] Using EAST model from {model_path}")
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         inputs = data.get("inputs", data)
         image_b64 = inputs.get("image")
@@ -32,6 +35,7 @@ class EndpointHandler:
         mask = self._make_mask(img)
         cleaned = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)
         vis = img.copy()
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         cv2.drawContours(vis, contours, -1, (0, 0, 255), 2)
@@ -41,6 +45,7 @@ class EndpointHandler:
             "cleaned_image": self._encode_image(cleaned),
         }
     def _decode_image(self, b64):
         data = base64.b64decode(b64)
         np_arr = np.frombuffer(data, np.uint8)
@@ -50,68 +55,100 @@ class EndpointHandler:
         _, buf = cv2.imencode(".png", im)
         return base64.b64encode(buf).decode("utf-8")
     def _make_mask(self, img):
         mask = np.zeros(img.shape[:2], np.uint8)
         if self.use_easyocr:
             results = self.reader.readtext(img)
-            for (_, box, _) in results:
-                pts = np.array(box, np.int32)
-                cv2.fillPoly(mask, [pts], 255)
         else:
             boxes = self._east_boxes(img)
             for (x0, y0, x1, y1) in boxes:
                 pad = 8
-                cv2.rectangle(mask, (max(0, x0-pad), max(0, y0-pad)),
-                              (min(img.shape[1], x1+pad), min(img.shape[0], y1+pad)),
-                              255, -1)
-        kernel = np.ones((9,9), np.uint8)
         mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
         mask = cv2.dilate(mask, kernel, iterations=2)
-        # catch bright white backgrounds behind text
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         bg = cv2.inRange(gray, 180, 255)
         mask = cv2.bitwise_or(mask, bg)
         return mask
     def _east_boxes(self, image, conf_threshold=0.5):
         h, w = image.shape[:2]
         new_w, new_h = 320, 320
-        r_w, r_h = w/new_w, h/new_h
-        blob = cv2.dnn.blobFromImage(image, 1.0, (new_w,new_h),
-                                     (123.68,116.78,103.94), swapRB=True, crop=False)
         self.net.setInput(blob)
         scores, geometry = self.net.forward(
             ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
         )
         rects, confidences = self._decode(scores, geometry, conf_threshold)
         indices = cv2.dnn.NMSBoxes(rects, confidences, conf_threshold, 0.4)
-        boxes=[]
-        if len(indices)>0:
             for i in indices.flatten():
-                x0,y0,x1,y1=rects[i]
-                boxes.append([max(0,int(x0*r_w)),max(0,int(y0*r_h)),
-                              min(w,int(x1*r_w)),min(h,int(y1*r_h))])
         return boxes
     def _decode(self, scores, geometry, conf_threshold):
-        num_rows,num_cols=scores.shape[2:4]
-        rects,confidences=[],[]
         for y in range(num_rows):
-            scores_data=scores[0,0,y]
-            x0=geometry[0,0,y];x1=geometry[0,1,y];x2=geometry[0,2,y];x3=geometry[0,3,y]
-            angles=geometry[0,4,y]
             for x in range(num_cols):
-                if scores_data[x]<conf_threshold: continue
-                offset_x,offset_y=x*4.0,y*4.0
-                angle=angles[x];cos,sin=np.cos(angle),np.sin(angle)
-                h_=x0[x]+x2[x];w_=x1[x]+x3[x]
-                end_x=int(offset_x+cos*x1[x]+sin*x2[x])
-                end_y=int(offset_y-sin*x1[x]+cos*x2[x])
-                start_x=int(end_x-w_);start_y=int(end_y-h_)
-                rects.append((start_x,start_y,end_x,end_y))
                 confidences.append(float(scores_data[x]))
-        return rects,confidences

 import base64, cv2, numpy as np, importlib.util
 from typing import Dict, Any
 class EndpointHandler:
     """
     Robust hybrid text-removal handler:
+      • Uses EasyOCR (pixel-level) if available
+      • Falls back to EAST detector otherwise
+      • Expands & merges masks for full caption coverage
+      • Returns both mask overlay and inpainted (cleaned) image
     """
     def __init__(self, path: str = ""):
             self.use_easyocr = False
             print(f"[INIT] Using EAST model from {model_path}")
+    # ----------------------------- INFERENCE -----------------------------
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         inputs = data.get("inputs", data)
         image_b64 = inputs.get("image")
         mask = self._make_mask(img)
         cleaned = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)
+        # visualize mask overlay
         vis = img.copy()
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         cv2.drawContours(vis, contours, -1, (0, 0, 255), 2)
             "cleaned_image": self._encode_image(cleaned),
         }
+    # ----------------------------- UTILITIES -----------------------------
     def _decode_image(self, b64):
         data = base64.b64decode(b64)
         np_arr = np.frombuffer(data, np.uint8)
         _, buf = cv2.imencode(".png", im)
         return base64.b64encode(buf).decode("utf-8")
+    # ----------------------------- MASK CREATION -----------------------------
     def _make_mask(self, img):
         mask = np.zeros(img.shape[:2], np.uint8)
         if self.use_easyocr:
             results = self.reader.readtext(img)
+            for det in results:
+                try:
+                    box, _, _ = det  # <-- fixed unpack order
+                    pts = np.array(box, np.int32)
+                    cv2.fillPoly(mask, [pts], 255)
+                except Exception as e:
+                    print(f"[WARN] Skipped invalid detection: {e}")
         else:
             boxes = self._east_boxes(img)
             for (x0, y0, x1, y1) in boxes:
                 pad = 8
+                cv2.rectangle(
+                    mask,
+                    (max(0, x0 - pad), max(0, y0 - pad)),
+                    (
+                        min(img.shape[1], x1 + pad),
+                        min(img.shape[0], y1 + pad),
+                    ),
+                    255,
+                    -1,
+                )
+        # Merge, dilate, and add bright backgrounds
+        kernel = np.ones((9, 9), np.uint8)
         mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
         mask = cv2.dilate(mask, kernel, iterations=2)
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         bg = cv2.inRange(gray, 180, 255)
         mask = cv2.bitwise_or(mask, bg)
         return mask
+    # ----------------------------- EAST FALLBACK -----------------------------
     def _east_boxes(self, image, conf_threshold=0.5):
         h, w = image.shape[:2]
         new_w, new_h = 320, 320
+        r_w, r_h = w / new_w, h / new_h
+        blob = cv2.dnn.blobFromImage(
+            image,
+            1.0,
+            (new_w, new_h),
+            (123.68, 116.78, 103.94),
+            swapRB=True,
+            crop=False,
+        )
         self.net.setInput(blob)
         scores, geometry = self.net.forward(
             ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
         )
         rects, confidences = self._decode(scores, geometry, conf_threshold)
         indices = cv2.dnn.NMSBoxes(rects, confidences, conf_threshold, 0.4)
+        boxes = []
+        if len(indices) > 0:
             for i in indices.flatten():
+                x0, y0, x1, y1 = rects[i]
+                boxes.append(
+                    [
+                        max(0, int(x0 * r_w)),
+                        max(0, int(y0 * r_h)),
+                        min(w, int(x1 * r_w)),
+                        min(h, int(y1 * r_h)),
+                    ]
+                )
         return boxes
     def _decode(self, scores, geometry, conf_threshold):
+        num_rows, num_cols = scores.shape[2:4]
+        rects, confidences = [], []
         for y in range(num_rows):
+            scores_data = scores[0, 0, y]
+            x0 = geometry[0, 0, y]
+            x1 = geometry[0, 1, y]
+            x2 = geometry[0, 2, y]
+            x3 = geometry[0, 3, y]
+            angles = geometry[0, 4, y]
             for x in range(num_cols):
+                if scores_data[x] < conf_threshold:
+                    continue
+                offset_x, offset_y = x * 4.0, y * 4.0
+                angle = angles[x]
+                cos, sin = np.cos(angle), np.sin(angle)
+                h_ = x0[x] + x2[x]
+                w_ = x1[x] + x3[x]
+                end_x = int(offset_x + cos * x1[x] + sin * x2[x])
+                end_y = int(offset_y - sin * x1[x] + cos * x2[x])
+                start_x = int(end_x - w_)
+                start_y = int(end_y - h_)
+                rects.append((start_x, start_y, end_x, end_y))
                 confidences.append(float(scores_data[x]))
+        return rects, confidences