| | """Analyze bbox_deltas to understand their format and how they refine box positions.""" |
| | import sys, os |
| | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| | import cv2 |
| | import numpy as np |
| | import onnxruntime as ort |
| |
|
| | |
| | img = cv2.imread('working_space/input/ocr_test (2).png') |
| | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| | h, w = img_rgb.shape[:2] |
| | print(f"Image: {w}x{h}") |
| |
|
| | mean = np.array([102.9801, 115.9465, 122.7717], dtype=np.float32) |
| | scale = 800 / max(h, w) |
| | scale = min(scale, 6.0) |
| | dh = (int(h * scale) + 31) // 32 * 32 |
| | dw = (int(w * scale) + 31) // 32 * 32 |
| | print(f"Scale={scale:.3f}, Det size: {dw}x{dh}") |
| |
|
| | img_det = cv2.resize(img_rgb, (dw, dh)).astype(np.float32) |
| | det_data = (img_det[:,:,::-1] - mean).transpose(2,0,1)[np.newaxis] |
| | im_info = np.array([[dh, dw, scale]], dtype=np.float32) |
| |
|
| | sess = ort.InferenceSession( |
| | 'oneocr_extracted/onnx_models/model_00_ir6_0.1.0_11282KB.onnx', |
| | providers=['CPUExecutionProvider'] |
| | ) |
| | outs = sess.run(None, {'data': det_data, 'im_info': im_info}) |
| | out_names = [o.name for o in sess.get_outputs()] |
| | out_dict = dict(zip(out_names, outs)) |
| |
|
| | print(f"\nOutputs: {out_names}") |
| | for name in out_names: |
| | print(f" {name}: shape={out_dict[name].shape}, dtype={out_dict[name].dtype}") |
| |
|
| | |
| | for level, stride in [("fpn3", 8)]: |
| | scores = out_dict[f'scores_hori_{level}'][0, 0] |
| | deltas = out_dict[f'bbox_deltas_hori_{level}'][0] |
| | links = out_dict[f'link_scores_hori_{level}'][0] |
| | |
| | fh, fw = scores.shape |
| | print(f"\n{level} (stride={stride}): score map {fw}x{fh}") |
| | |
| | |
| | text_mask = scores > 0.6 |
| | text_ys, text_xs = np.where(text_mask) |
| | print(f" Text pixels: {len(text_ys)}") |
| | |
| | |
| | print(f"\n bbox_deltas stats at text pixels:") |
| | for ch in range(8): |
| | vals = deltas[ch][text_mask] |
| | print(f" ch{ch}: min={vals.min():.2f} max={vals.max():.2f} mean={vals.mean():.2f} std={vals.std():.2f}") |
| | |
| | |
| | from scipy import ndimage |
| | labeled, n = ndimage.label(text_mask) |
| | print(f"\n Components: {n}") |
| | |
| | for comp_id in range(1, min(n+1, 4)): |
| | ys, xs = np.where(labeled == comp_id) |
| | r_min, r_max = ys.min(), ys.max() |
| | c_min, c_max = xs.min(), xs.max() |
| | |
| | |
| | grid_x1 = c_min * stride |
| | grid_y1 = r_min * stride |
| | grid_x2 = (c_max + 1) * stride |
| | grid_y2 = (r_max + 1) * stride |
| | |
| | |
| | comp_mask = labeled == comp_id |
| | print(f"\n Component {comp_id}: grid box ({grid_x1},{grid_y1})-({grid_x2},{grid_y2}) in det coords") |
| | print(f" Original coords: ({grid_x1/scale:.0f},{grid_y1/scale:.0f})-({grid_x2/scale:.0f},{grid_y2/scale:.0f})") |
| | |
| | for ch in range(8): |
| | vals = deltas[ch][comp_mask] |
| | corner = ch // 2 |
| | coord = 'x' if ch % 2 == 0 else 'y' |
| | print(f" delta[{ch}] (corner{corner}.{coord}): min={vals.min():.2f} max={vals.max():.2f} mean={vals.mean():.2f}") |
| | |
| | |
| | |
| | avg_deltas = [deltas[ch][comp_mask].mean() for ch in range(8)] |
| | |
| | |
| | |
| | |
| | |
| | |
| | print(f" H1 (offset from grid): TL=({grid_x1+avg_deltas[0]:.0f},{grid_y1+avg_deltas[1]:.0f}) " |
| | f"TR=({grid_x2+avg_deltas[2]:.0f},{grid_y1+avg_deltas[3]:.0f}) " |
| | f"BR=({grid_x2+avg_deltas[4]:.0f},{grid_y2+avg_deltas[5]:.0f}) " |
| | f"BL=({grid_x1+avg_deltas[6]:.0f},{grid_y2+avg_deltas[7]:.0f})") |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | all_corners = [] |
| | for idx in range(len(ys)): |
| | r, c = ys[idx], xs[idx] |
| | d = [float(deltas[ch, r, c]) for ch in range(8)] |
| | cx, cy = c * stride, r * stride |
| | tl = (cx + d[0], cy + d[1]) |
| | tr = (cx + d[2], cy + d[3]) |
| | br = (cx + d[4], cy + d[5]) |
| | bl = (cx + d[6], cy + d[7]) |
| | all_corners.append([tl, tr, br, bl]) |
| | |
| | all_corners = np.array(all_corners) |
| | |
| | avg_corners = all_corners.mean(axis=0) |
| | print(f" H2 (per-pixel avg): TL=({avg_corners[0,0]:.0f},{avg_corners[0,1]:.0f}) " |
| | f"TR=({avg_corners[1,0]:.0f},{avg_corners[1,1]:.0f}) " |
| | f"BR=({avg_corners[2,0]:.0f},{avg_corners[2,1]:.0f}) " |
| | f"BL=({avg_corners[3,0]:.0f},{avg_corners[3,1]:.0f})") |
| | print(f" H2 original: TL=({avg_corners[0,0]/scale:.0f},{avg_corners[0,1]/scale:.0f}) " |
| | f"TR=({avg_corners[1,0]/scale:.0f},{avg_corners[1,1]/scale:.0f}) " |
| | f"BR=({avg_corners[2,0]/scale:.0f},{avg_corners[2,1]/scale:.0f}) " |
| | f"BL=({avg_corners[3,0]/scale:.0f},{avg_corners[3,1]/scale:.0f})") |
| | |
| | |
| | min_tl = all_corners[:, 0, :].min(axis=0) |
| | max_br = all_corners[:, 2, :].max(axis=0) |
| | print(f" H2 (min TL, max BR): ({min_tl[0]/scale:.0f},{min_tl[1]/scale:.0f})-({max_br[0]/scale:.0f},{max_br[1]/scale:.0f})") |
| |
|