Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +104 -44
mdr_pdf_parser.py
CHANGED
|
@@ -708,32 +708,56 @@ class _MDR_DBPostProcess:
|
|
| 708 |
scores.append(score)
|
| 709 |
return boxes, scores
|
| 710 |
|
| 711 |
-
|
|
|
|
| 712 |
h, w = bmp.shape
|
|
|
|
| 713 |
contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
| 714 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
boxes, scores = [], []
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
return np.array(boxes, dtype="int32"), scores
|
| 738 |
|
| 739 |
def _unclip(self, box, ratio):
|
|
@@ -779,20 +803,30 @@ class _MDR_DBPostProcess:
|
|
| 779 |
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
|
| 780 |
return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
|
| 781 |
|
|
|
|
| 782 |
def __call__(self, outs_dict, shape_list):
|
| 783 |
-
pred = outs_dict['maps'][:, 0, :, :]
|
| 784 |
-
seg = pred > self.thresh
|
|
|
|
|
|
|
|
|
|
| 785 |
boxes_batch = []
|
| 786 |
for batch_idx in range(pred.shape[0]):
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
return boxes_batch
|
| 797 |
|
| 798 |
class _MDR_TextDetector(_MDR_PredictBase):
|
|
@@ -849,24 +883,50 @@ class _MDR_TextDetector(_MDR_PredictBase):
|
|
| 849 |
new_boxes.append(box)
|
| 850 |
return np.array(new_boxes)
|
| 851 |
|
|
|
|
| 852 |
def __call__(self, img):
|
| 853 |
ori_im = img.copy()
|
| 854 |
data = {"image": img}
|
|
|
|
| 855 |
data = mdr_ocr_transform(data, self.pre_op)
|
| 856 |
if data is None:
|
|
|
|
| 857 |
return None
|
| 858 |
-
|
| 859 |
-
|
|
|
|
|
|
|
| 860 |
return None
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 866 |
preds = {"maps": outputs[0]}
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 870 |
|
| 871 |
class _MDR_ClsPostProcess:
|
| 872 |
|
|
|
|
| 708 |
scores.append(score)
|
| 709 |
return boxes, scores
|
| 710 |
|
| 711 |
+
# In class _MDR_DBPostProcess:
|
| 712 |
+
def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
|
| 713 |
h, w = bmp.shape
|
| 714 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw}x{dh}.") # DEBUG
|
| 715 |
contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
| 716 |
+
num_contours_found = len(contours)
|
| 717 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.") # DEBUG
|
| 718 |
+
|
| 719 |
+
num_contours_to_process = min(num_contours_found, self.max_cand)
|
| 720 |
+
if num_contours_found > self.max_cand:
|
| 721 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours.") # DEBUG
|
| 722 |
+
|
| 723 |
boxes, scores = [], []
|
| 724 |
+
kept_boxes_count = 0
|
| 725 |
+
for i in range(num_contours_to_process):
|
| 726 |
+
contour = contours[i]
|
| 727 |
+
pts_mini_box, sside = self._get_mini_boxes(contour)
|
| 728 |
+
if sside < self.min_sz:
|
| 729 |
+
# print(f" DEBUG OCR: Contour {i} too small (sside {sside} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
|
| 730 |
+
continue
|
| 731 |
+
|
| 732 |
+
pts_arr = np.array(pts_mini_box)
|
| 733 |
+
# score_mode is 'fast' by default
|
| 734 |
+
current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
|
| 735 |
+
|
| 736 |
+
if self.box_thresh > current_score:
|
| 737 |
+
# print(f" DEBUG OCR: Contour {i} score {current_score:.4f} < box_thresh {self.box_thresh}. Skipping.") # Can be too verbose
|
| 738 |
+
continue
|
| 739 |
+
|
| 740 |
+
try:
|
| 741 |
+
# unclip_ratio is self.unclip_r (default 1.5)
|
| 742 |
+
box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
|
| 743 |
+
except Exception as e_unclip:
|
| 744 |
+
# print(f" DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
|
| 745 |
+
continue
|
| 746 |
+
|
| 747 |
+
box_final, sside_final = self._get_mini_boxes(box_unclipped)
|
| 748 |
+
if sside_final < self.min_sz + 2: # min_sz is 3
|
| 749 |
+
# print(f" DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final} < {self.min_sz + 2}). Skipping.") # Can be too verbose
|
| 750 |
+
continue
|
| 751 |
+
|
| 752 |
+
box_final_arr = np.array(box_final)
|
| 753 |
+
# Rescale to original image dimensions
|
| 754 |
+
box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
|
| 755 |
+
box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
|
| 756 |
+
|
| 757 |
+
boxes.append(box_final_arr.astype("int32"))
|
| 758 |
+
scores.append(current_score)
|
| 759 |
+
kept_boxes_count +=1
|
| 760 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.") # DEBUG
|
| 761 |
return np.array(boxes, dtype="int32"), scores
|
| 762 |
|
| 763 |
def _unclip(self, box, ratio):
|
|
|
|
| 803 |
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
|
| 804 |
return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
|
| 805 |
|
| 806 |
+
# In class _MDR_DBPostProcess:
|
| 807 |
def __call__(self, outs_dict, shape_list):
|
| 808 |
+
pred = outs_dict['maps'][:, 0, :, :]
|
| 809 |
+
seg = pred > self.thresh
|
| 810 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}") # DEBUG
|
| 811 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}") # DEBUG
|
| 812 |
+
|
| 813 |
boxes_batch = []
|
| 814 |
for batch_idx in range(pred.shape[0]):
|
| 815 |
+
sh, sw, _, _ = shape_list[batch_idx]
|
| 816 |
+
current_pred_map = pred[batch_idx]
|
| 817 |
+
current_seg_map = seg[batch_idx]
|
| 818 |
+
|
| 819 |
+
mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
|
| 820 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc {sh}x{sw}. Sum of mask pixels: {np.sum(mask)}") # DEBUG
|
| 821 |
+
|
| 822 |
+
if self.box_t == 'poly':
|
| 823 |
+
boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, sw, sh)
|
| 824 |
+
elif self.box_t == 'quad':
|
| 825 |
+
boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, sw, sh)
|
| 826 |
+
else:
|
| 827 |
+
raise ValueError("box_type must be 'quad' or 'poly'")
|
| 828 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing (after score filtering within _boxes_from_bitmap).") # DEBUG
|
| 829 |
+
boxes_batch.append({'points': boxes})
|
| 830 |
return boxes_batch
|
| 831 |
|
| 832 |
class _MDR_TextDetector(_MDR_PredictBase):
|
|
|
|
| 883 |
new_boxes.append(box)
|
| 884 |
return np.array(new_boxes)
|
| 885 |
|
| 886 |
+
# In class _MDR_TextDetector:
|
| 887 |
def __call__(self, img):
|
| 888 |
ori_im = img.copy()
|
| 889 |
data = {"image": img}
|
| 890 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}") # DEBUG
|
| 891 |
data = mdr_ocr_transform(data, self.pre_op)
|
| 892 |
if data is None:
|
| 893 |
+
print(" DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.") # DEBUG
|
| 894 |
return None
|
| 895 |
+
|
| 896 |
+
processed_img, shape_list = data
|
| 897 |
+
if processed_img is None:
|
| 898 |
+
print(" DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.") # DEBUG
|
| 899 |
return None
|
| 900 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}") # DEBUG
|
| 901 |
+
|
| 902 |
+
img_for_onnx = np.expand_dims(processed_img, axis=0)
|
| 903 |
+
shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
|
| 904 |
+
img_for_onnx = img_for_onnx.copy() # Ensure it's a contiguous array if ONNX runtime is sensitive
|
| 905 |
+
|
| 906 |
+
inputs = self.get_input_feed(self.input_name, img_for_onnx)
|
| 907 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...") # DEBUG
|
| 908 |
+
try:
|
| 909 |
+
outputs = self.sess.run(self.output_name, input_feed=inputs)
|
| 910 |
+
except Exception as e:
|
| 911 |
+
print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e}") # DEBUG
|
| 912 |
+
import traceback
|
| 913 |
+
traceback.print_exc()
|
| 914 |
+
return None # Stop if inference fails
|
| 915 |
+
print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}") # DEBUG
|
| 916 |
+
|
| 917 |
preds = {"maps": outputs[0]}
|
| 918 |
+
# post_op is _MDR_DBPostProcess
|
| 919 |
+
post_res = self.post_op(preds, shape_list_for_onnx)
|
| 920 |
+
|
| 921 |
+
boxes_from_post = post_res[0]['points']
|
| 922 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}") # DEBUG
|
| 923 |
+
|
| 924 |
+
if self.args.det_box_type == 'poly':
|
| 925 |
+
final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
|
| 926 |
+
else: # 'quad'
|
| 927 |
+
final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
|
| 928 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}") # DEBUG
|
| 929 |
+
return final_boxes
|
| 930 |
|
| 931 |
class _MDR_ClsPostProcess:
|
| 932 |
|