"""Analyze bbox_deltas to understand their format and how they refine box positions."""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import cv2
import numpy as np
import onnxruntime as ort

# Use a small image where we know the text positions
img = cv2.imread('working_space/input/ocr_test (2).png')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w = img_rgb.shape[:2]
print(f"Image: {w}x{h}")

mean = np.array([102.9801, 115.9465, 122.7717], dtype=np.float32)
scale = 800 / max(h, w)
scale = min(scale, 6.0)
dh = (int(h * scale) + 31) // 32 * 32
dw = (int(w * scale) + 31) // 32 * 32
print(f"Scale={scale:.3f}, Det size: {dw}x{dh}")

img_det = cv2.resize(img_rgb, (dw, dh)).astype(np.float32)
det_data = (img_det[:,:,::-1] - mean).transpose(2,0,1)[np.newaxis]
im_info = np.array([[dh, dw, scale]], dtype=np.float32)

sess = ort.InferenceSession(
    'oneocr_extracted/onnx_models/model_00_ir6_0.1.0_11282KB.onnx',
    providers=['CPUExecutionProvider']
)
outs = sess.run(None, {'data': det_data, 'im_info': im_info})
out_names = [o.name for o in sess.get_outputs()]
out_dict = dict(zip(out_names, outs))

print(f"\nOutputs: {out_names}")
for name in out_names:
    print(f"  {name}: shape={out_dict[name].shape}, dtype={out_dict[name].dtype}")

# Analyze bbox_deltas for FPN3
for level, stride in [("fpn3", 8)]:
    scores = out_dict[f'scores_hori_{level}'][0, 0]  # [H, W]
    deltas = out_dict[f'bbox_deltas_hori_{level}'][0]  # [8, H, W]
    links = out_dict[f'link_scores_hori_{level}'][0]    # [8, H, W]
    
    fh, fw = scores.shape
    print(f"\n{level} (stride={stride}): score map {fw}x{fh}")
    
    # Find text pixels
    text_mask = scores > 0.6
    text_ys, text_xs = np.where(text_mask)
    print(f"  Text pixels: {len(text_ys)}")
    
    # Show deltas at text positions
    print(f"\n  bbox_deltas stats at text pixels:")
    for ch in range(8):
        vals = deltas[ch][text_mask]
        print(f"    ch{ch}: min={vals.min():.2f} max={vals.max():.2f} mean={vals.mean():.2f} std={vals.std():.2f}")
    
    # Show deltas for a specific text region (the first word cluster)
    from scipy import ndimage
    labeled, n = ndimage.label(text_mask)
    print(f"\n  Components: {n}")
    
    for comp_id in range(1, min(n+1, 4)):
        ys, xs = np.where(labeled == comp_id)
        r_min, r_max = ys.min(), ys.max()
        c_min, c_max = xs.min(), xs.max()
        
        # Grid-based box (what we currently do)
        grid_x1 = c_min * stride
        grid_y1 = r_min * stride
        grid_x2 = (c_max + 1) * stride
        grid_y2 = (r_max + 1) * stride
        
        # Deltas at component pixels
        comp_mask = labeled == comp_id
        print(f"\n  Component {comp_id}: grid box ({grid_x1},{grid_y1})-({grid_x2},{grid_y2}) in det coords")
        print(f"    Original coords: ({grid_x1/scale:.0f},{grid_y1/scale:.0f})-({grid_x2/scale:.0f},{grid_y2/scale:.0f})")
        
        for ch in range(8):
            vals = deltas[ch][comp_mask]
            corner = ch // 2
            coord = 'x' if ch % 2 == 0 else 'y'
            print(f"    delta[{ch}] (corner{corner}.{coord}): min={vals.min():.2f} max={vals.max():.2f} mean={vals.mean():.2f}")
        
        # Try interpreting: deltas are per-pixel corner offsets from (c*stride, r*stride)
        # Average over all pixels in component
        avg_deltas = [deltas[ch][comp_mask].mean() for ch in range(8)]
        
        # Hypothesis 1: deltas are absolute offsets from grid box corners
        # TL = (grid_x1 + d0, grid_y1 + d1)
        # TR = (grid_x2 + d2, grid_y2_top + d3)  
        # BR = (grid_x2 + d4, grid_y2 + d5)
        # BL = (grid_x1 + d6, grid_y2 + d7)
        print(f"    H1 (offset from grid): TL=({grid_x1+avg_deltas[0]:.0f},{grid_y1+avg_deltas[1]:.0f}) "
              f"TR=({grid_x2+avg_deltas[2]:.0f},{grid_y1+avg_deltas[3]:.0f}) "
              f"BR=({grid_x2+avg_deltas[4]:.0f},{grid_y2+avg_deltas[5]:.0f}) "
              f"BL=({grid_x1+avg_deltas[6]:.0f},{grid_y2+avg_deltas[7]:.0f})")
        
        # Hypothesis 2: deltas are per-pixel, use min/max of corner deltas
        # For TL corner, use pixels near top-left, etc.
        # Actually, let's try: each pixel predicts the 4 corners of its text region
        # So for pixel (r, c), the predicted box is:
        #   TL = (c*stride + d0, r*stride + d1)
        #   TR = (c*stride + d2, r*stride + d3)  
        #   BR = (c*stride + d4, r*stride + d5)
        #   BL = (c*stride + d6, r*stride + d7)
        
        # For the component, take the pixel-wise predictions and average or extremes
        all_corners = []
        for idx in range(len(ys)):
            r, c = ys[idx], xs[idx]
            d = [float(deltas[ch, r, c]) for ch in range(8)]
            cx, cy = c * stride, r * stride
            tl = (cx + d[0], cy + d[1])
            tr = (cx + d[2], cy + d[3])
            br = (cx + d[4], cy + d[5])
            bl = (cx + d[6], cy + d[7])
            all_corners.append([tl, tr, br, bl])
        
        all_corners = np.array(all_corners)  # [N, 4, 2]
        # Average each corner across all pixels
        avg_corners = all_corners.mean(axis=0)
        print(f"    H2 (per-pixel avg): TL=({avg_corners[0,0]:.0f},{avg_corners[0,1]:.0f}) "
              f"TR=({avg_corners[1,0]:.0f},{avg_corners[1,1]:.0f}) "
              f"BR=({avg_corners[2,0]:.0f},{avg_corners[2,1]:.0f}) "
              f"BL=({avg_corners[3,0]:.0f},{avg_corners[3,1]:.0f})")
        print(f"    H2 original: TL=({avg_corners[0,0]/scale:.0f},{avg_corners[0,1]/scale:.0f}) "
              f"TR=({avg_corners[1,0]/scale:.0f},{avg_corners[1,1]/scale:.0f}) "
              f"BR=({avg_corners[2,0]/scale:.0f},{avg_corners[2,1]/scale:.0f}) "
              f"BL=({avg_corners[3,0]/scale:.0f},{avg_corners[3,1]/scale:.0f})")
        
        # Also try: TL = min of all TL predictions, BR = max of all BR predictions
        min_tl = all_corners[:, 0, :].min(axis=0)
        max_br = all_corners[:, 2, :].max(axis=0)
        print(f"    H2 (min TL, max BR): ({min_tl[0]/scale:.0f},{min_tl[1]/scale:.0f})-({max_br[0]/scale:.0f},{max_br[1]/scale:.0f})")