Spaces:

ResearchMAGIC
/

teammrag-parser-moreai

Sleeping

App Files Files Community

rodrigomasini commited on May 29

Commit

d8adf3f

verified ·

1 Parent(s): 1181510

Update mdr_pdf_parser.py

Browse files

Files changed (1) hide show

mdr_pdf_parser.py +104 -44

mdr_pdf_parser.py CHANGED Viewed

@@ -708,32 +708,56 @@ class _MDR_DBPostProcess:
       scores.append(score)
     return boxes, scores
-  def _boxes_from_bitmap(self, pred, bmp, dw, dh):
     h, w = bmp.shape
     contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-    num_contours = min(len(contours), self.max_cand)
     boxes, scores = [], []
-    for i in range(num_contours):
-      contour = contours[i]
-      pts, sside = self._get_mini_boxes(contour)
-      if sside < self.min_sz:
-          continue
-      pts = np.array(pts)
-      score = self._box_score_fast(pred, pts.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
-      if self.box_thresh > score:
-          continue
-      try:
-          box = self._unclip(pts, self.unclip_r).reshape(-1, 1, 2)
-      except:
-          continue
-      box, sside = self._get_mini_boxes(box)
-      if sside < self.min_sz + 2:
-          continue
-      box = np.array(box)
-      box[:, 0] = np.clip(np.round(box[:, 0] / w * dw), 0, dw)
-      box[:, 1] = np.clip(np.round(box[:, 1] / h * dh), 0, dh)
-      boxes.append(box.astype("int32"))
-      scores.append(score)
     return np.array(boxes, dtype="int32"), scores
   def _unclip(self, box, ratio):
@@ -779,20 +803,30 @@ class _MDR_DBPostProcess:
     cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
     return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
   def __call__(self, outs_dict, shape_list):
-    pred = outs_dict['maps'][:, 0, :, :]
-    seg = pred > self.thresh
     boxes_batch = []
     for batch_idx in range(pred.shape[0]):
-      sh, sw, _, _ = shape_list[batch_idx]
-      mask = cv2.dilate(np.array(seg[batch_idx]).astype(np.uint8), self.dila_k) if self.dila_k is not None else seg[batch_idx]
-      if self.box_t == 'poly':
-          boxes, _ = self._polygons_from_bitmap(pred[batch_idx], mask, sw, sh)
-      elif self.box_t == 'quad':
-          boxes, _ = self._boxes_from_bitmap(pred[batch_idx], mask, sw, sh)
-      else:
-          raise ValueError("box_type must be 'quad' or 'poly'")
-      boxes_batch.append({'points': boxes})
     return boxes_batch
 class _MDR_TextDetector(_MDR_PredictBase):
@@ -849,24 +883,50 @@ class _MDR_TextDetector(_MDR_PredictBase):
           new_boxes.append(box)
       return np.array(new_boxes)
   def __call__(self, img):
     ori_im = img.copy()
     data = {"image": img}
     data = mdr_ocr_transform(data, self.pre_op)
     if data is None:
         return None
-    img, shape_list = data
-    if img is None:
         return None
-    img = np.expand_dims(img, axis=0)
-    shape_list = np.expand_dims(shape_list, axis=0)
-    img = img.copy()
-    inputs = self.get_input_feed(self.input_name, img)
-    outputs = self.sess.run(self.output_name, input_feed=inputs)
     preds = {"maps": outputs[0]}
-    post_res = self.post_op(preds, shape_list)
-    boxes = post_res[0]['points']
-    return self._filter_poly(boxes, ori_im.shape) if self.args.det_box_type == 'poly' else self._filter_quad(boxes, ori_im.shape)
 class _MDR_ClsPostProcess:

       scores.append(score)
     return boxes, scores
+  # In class _MDR_DBPostProcess:
+  def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
     h, w = bmp.shape
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw}x{dh}.") # DEBUG
     contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    num_contours_found = len(contours)
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.") # DEBUG
+    num_contours_to_process = min(num_contours_found, self.max_cand)
+    if num_contours_found > self.max_cand:
+        print(f"    DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours.") # DEBUG
     boxes, scores = [], []
+    kept_boxes_count = 0
+    for i in range(num_contours_to_process):
+        contour = contours[i]
+        pts_mini_box, sside = self._get_mini_boxes(contour)
+        if sside < self.min_sz:
+            # print(f"      DEBUG OCR: Contour {i} too small (sside {sside} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
+            continue
+        pts_arr = np.array(pts_mini_box)
+        # score_mode is 'fast' by default
+        current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
+        if self.box_thresh > current_score:
+            # print(f"      DEBUG OCR: Contour {i} score {current_score:.4f} < box_thresh {self.box_thresh}. Skipping.") # Can be too verbose
+            continue
+        try:
+            # unclip_ratio is self.unclip_r (default 1.5)
+            box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
+        except Exception as e_unclip:
+            # print(f"      DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
+            continue
+        box_final, sside_final = self._get_mini_boxes(box_unclipped)
+        if sside_final < self.min_sz + 2: # min_sz is 3
+            # print(f"      DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final} < {self.min_sz + 2}). Skipping.") # Can be too verbose
+            continue
+        box_final_arr = np.array(box_final)
+        # Rescale to original image dimensions
+        box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
+        box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
+        boxes.append(box_final_arr.astype("int32"))
+        scores.append(current_score)
+        kept_boxes_count +=1
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.") # DEBUG
     return np.array(boxes, dtype="int32"), scores
   def _unclip(self, box, ratio):
     cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
     return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
+  # In class _MDR_DBPostProcess:
   def __call__(self, outs_dict, shape_list):
+    pred = outs_dict['maps'][:, 0, :, :]
+    seg = pred > self.thresh
+    print(f"  DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}") # DEBUG
+    print(f"  DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}") # DEBUG
     boxes_batch = []
     for batch_idx in range(pred.shape[0]):
+        sh, sw, _, _ = shape_list[batch_idx]
+        current_pred_map = pred[batch_idx]
+        current_seg_map = seg[batch_idx]
+        mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
+        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc {sh}x{sw}. Sum of mask pixels: {np.sum(mask)}") # DEBUG
+        if self.box_t == 'poly':
+            boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, sw, sh)
+        elif self.box_t == 'quad':
+            boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, sw, sh)
+        else:
+            raise ValueError("box_type must be 'quad' or 'poly'")
+        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing (after score filtering within _boxes_from_bitmap).") # DEBUG
+        boxes_batch.append({'points': boxes})
     return boxes_batch
 class _MDR_TextDetector(_MDR_PredictBase):
           new_boxes.append(box)
       return np.array(new_boxes)
+  # In class _MDR_TextDetector:
   def __call__(self, img):
     ori_im = img.copy()
     data = {"image": img}
+    print(f"  DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}") # DEBUG
     data = mdr_ocr_transform(data, self.pre_op)
     if data is None:
+        print("  DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.") # DEBUG
         return None
+    processed_img, shape_list = data
+    if processed_img is None:
+        print("  DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.") # DEBUG
         return None
+    print(f"  DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}") # DEBUG
+    img_for_onnx = np.expand_dims(processed_img, axis=0)
+    shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
+    img_for_onnx = img_for_onnx.copy() # Ensure it's a contiguous array if ONNX runtime is sensitive
+    inputs = self.get_input_feed(self.input_name, img_for_onnx)
+    print(f"  DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...") # DEBUG
+    try:
+        outputs = self.sess.run(self.output_name, input_feed=inputs)
+    except Exception as e:
+        print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e}") # DEBUG
+        import traceback
+        traceback.print_exc()
+        return None # Stop if inference fails
+    print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}") # DEBUG
     preds = {"maps": outputs[0]}
+    # post_op is _MDR_DBPostProcess
+    post_res = self.post_op(preds, shape_list_for_onnx)
+    boxes_from_post = post_res[0]['points']
+    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}") # DEBUG
+    if self.args.det_box_type == 'poly':
+        final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
+    else: # 'quad'
+        final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
+    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}") # DEBUG
+    return final_boxes
 class _MDR_ClsPostProcess: