bizcards-extractor / biz /segmentation.py
Tamanna Alam
Add Gradio app
6548988
import cv2, numpy as np
from typing import List, Tuple, TypedDict
import logging
log = logging.getLogger(__name__)
class Box(TypedDict):
id: str; x: int; y: int; w: int; h: int; angle: float
LONG_SIDE = 1800
MIN_AREA_FRAC = 0.006
MAX_AREA_FRAC = 0.98
AR_MIN, AR_MAX = 0.55, 2.80
WHITENESS_MIN = 120
def _resize_keep(img, long_side=LONG_SIDE):
h, w = img.shape[:2]
s = long_side / max(h, w)
if s < 1.0:
img = cv2.resize(img, (int(w*s), int(h*s)), interpolation=cv2.INTER_AREA)
else:
s = 1.0
return img, s
def _iou(a,b):
xa1,ya1,xa2,ya2=a; xb1,yb1,xb2,yb2=b
inter = max(0,min(xa2,xb2)-max(xa1,xb1)) * max(0,min(ya2,yb2)-max(ya1,yb1))
if inter == 0: return 0.0
area_a=(xa2-xa1)*(ya2-ya1); area_b=(xb2-xb1)*(yb2-yb1)
return inter / (area_a+area_b-inter+1e-6)
def segment_cards(image_bgr) -> Tuple[List[Box], int, int]:
H0, W0 = image_bgr.shape[:2]
work, s = _resize_keep(image_bgr, LONG_SIDE)
H, W = work.shape[:2]; back = 1.0/s
gray = cv2.cvtColor(work, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 7, 50, 50)
_, bin_ = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
bin_ = cv2.morphologyEx(bin_, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)),1)
bin_ = cv2.morphologyEx(bin_, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT,(9,9)),1)
cnts,_ = cv2.findContours(bin_, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
min_area = MIN_AREA_FRAC * (W*H)
max_area = MAX_AREA_FRAC * (W*H)
cand=[]
for c in cnts:
a=cv2.contourArea(c)
if a<min_area or a>max_area: continue
r=cv2.minAreaRect(c); (cx,cy),(rw,rh),ang=r
if rw<10 or rh<10: continue
ar=max(rw,rh)/max(1.0,min(rw,rh))
if ar<AR_MIN or ar>AR_MAX: continue
pts=cv2.boxPoints(r).astype(int)
m=np.zeros((H,W),np.uint8); cv2.drawContours(m,[pts],-1,255,-1)
if cv2.mean(gray, mask=m)[0] < WHITENESS_MIN: continue
xs,ys=pts[:,0],pts[:,1]
x1,y1,x2,y2=xs.min(),ys.min(),xs.max(),ys.max()
x,y,w,h=int(x1*back),int(y1*back),int((x2-x1)*back),int((y2-y1)*back)
x=max(0,x); y=max(0,y); w=min(W0-x,w); h=min(H0-y,h)
if w*h<=0: continue
cand.append((x,y,w,h,float(ang)))
cand.sort(key=lambda z:z[2]*z[3], reverse=True)
picked=[]
for x,y,w,h,ang in cand:
rect=(x,y,x+w,y+h)
if all(_iou(rect,(px,py,px+pw,py+ph))<0.20 for px,py,pw,ph,_ in picked):
picked.append((x,y,w,h,ang))
if not picked:
m=int(min(W0,H0)*0.03)
x,y=m,m; w,h=max(1,W0-2*m),max(1,H0-2*m)
log.info("No contours -> fallback to full-image box.")
boxes=[{"id":"card-0","x":x,"y":y,"w":w,"h":h,"angle":0.0}]
else:
boxes=[{"id":f"card-{i}","x":x,"y":y,"w":w,"h":h,"angle":ang}
for i,(x,y,w,h,ang) in enumerate(picked)]
boxes.sort(key=lambda b:(b["y"], b["x"]))
return boxes, W0, H0