Hover_AI / seeing.py
yugangee's picture
Upload 4 files
17f032f verified
# seeing.py
# INFO 모드에서 현재 프레임을 분석해 "요약 문장"을 만들어주는 모듈.
# test (1).py의 최신 로직을 모두 포함하여 재구성되었습니다.
import os
import re
import math
import cv2
import numpy as np
from typing import List, Tuple, Dict, Any, Optional
# ==============================
# 모듈 레벨 설정 및 상수
# ==============================
SIDE_LEFT = ["통살균", "원격제어", "예약", "내마음"]
SIDE_RIGHT = ["터보샷", "구김방지", "알림음", "빨래추가"]
SIDE_EUCLID_MAX_REL = 0.08
CATEGORY_OPTIONS = {
"세탁": ["불림", "애벌세탁", "강력", "표준", "적은때"],
"헹굼": ["5회", "4", "3", "2", "1"],
"탈수": ["건조맞춤", "강", "중", "약", "섬세"],
"물온도": ["95", "60", "40", "30", "냉수"],
}
READOUT_ORDER = ("세탁", "헹굼", "탈수", "물온도")
LABEL_SYNONYMS = {
r"\s+": "",
r"[**()\[\]]": "",
r"^이?터보\s*샷?$": "터보샷",
r"\*?터보\s*샷": "터보샷",
r"\*?알림\s*음(?:\(3초\))?": "알림음",
r"Wi[\-\s]?Fi": "WiFi",
r"일회": "1회", r"이회": "2회", r"삼회": "3회", r"사회": "4회", r"오회": "5회",
r"95\s*℃|95도": "95", r"60\s*℃|60도": "60",
r"40\s*℃|40도": "40", r"30\s*℃|30도": "30",
}
SIDE_SET = set(SIDE_LEFT + SIDE_RIGHT)
CAT2SET = {k:set(v) for k,v in CATEGORY_OPTIONS.items()}
ALL_ALLOWED = SIDE_SET.union(*CAT2SET.values())
# --- 중앙 밴드 설정 ---
CENTER_BAND_PAD_REL = 0.06
CENTER_BAND_FALLBACK = (0.34, 0.66)
CENTER_RIGHT_MIN_PX = 6
CENTER_RIGHT_MIN_FRAC = 0.18
# --- 사이드 매칭 설정 ---
SIDE_COLW_REL = 0.08
SIDE_DMAX_REL = 0.25
SIDE_Y_GAP_MIN = 2
SIDE_Y_TOL_REL = 0.02
# === (ADD) 프레임 안정화기 ==========================================
class MotionStabilizer:
def __init__(self, downscale=0.5, ecc=True, homography=False,
max_iter=50, eps=1e-6):
import cv2
self.ds = float(downscale)
self.warp_mode = (cv2.MOTION_HOMOGRAPHY if homography
else (cv2.MOTION_EUCLIDEAN if ecc else cv2.MOTION_TRANSLATION))
self.max_iter = int(max_iter)
self.eps = float(eps)
self.prev_gray_ds = None # float32 [0..1]
self.homography = bool(homography)
def reset(self):
self.prev_gray_ds = None
def _ds(self, img):
if self.ds and self.ds < 1.0:
h, w = img.shape[:2]
return cv2.resize(img, (int(w*self.ds), int(h*self.ds)), interpolation=cv2.INTER_AREA)
return img
def _undscale_warp(self, M):
s = self.ds
if self.homography:
S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
return Si @ M @ S
else:
A = np.eye(3, dtype=np.float32)
A[:2,:] = M
S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
A = Si @ A @ S
return A[:2,:]
def apply(self, frame_bgr):
import cv2
g = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
g_ds = self._ds(g)
g_ds_f = g_ds.astype(np.float32) / 255.0
if self.prev_gray_ds is None:
self.prev_gray_ds = g_ds_f
return frame_bgr
warp = (np.eye(3, dtype=np.float32) if self.homography
else np.eye(2, 3, dtype=np.float32))
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
self.max_iter, self.eps)
try:
_, warp = cv2.findTransformECC(
templateImage=self.prev_gray_ds,
inputImage=g_ds_f,
warpMatrix=warp,
motionType=(cv2.MOTION_HOMOGRAPHY if self.homography else self.warp_mode),
criteria=criteria,
inputMask=None, gaussFiltSize=1
)
if self.homography:
Wf = self._undscale_warp(warp)
out = cv2.warpPerspective(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
borderMode=cv2.BORDER_REPLICATE)
self.prev_gray_ds = cv2.warpPerspective(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
borderMode=cv2.BORDER_REPLICATE)
else:
Wf = self._undscale_warp(warp)
out = cv2.warpAffine(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
borderMode=cv2.BORDER_REPLICATE)
self.prev_gray_ds = cv2.warpAffine(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
borderMode=cv2.BORDER_REPLICATE)
return out
except Exception:
self.prev_gray_ds = g_ds_f
return frame_bgr
# ===================================================================
# (ADD) 기본 토글/싱글톤
STABILIZE_DEFAULT = True # 기본 ON. 필요시 False로 끄기
_global_stabilizer = None
# ==============================
# 내부 헬퍼 함수들
# ==============================
def _canon_text(raw: str) -> str:
if not raw: return ""
s = str(raw)
for pat, rep in LABEL_SYNONYMS.items():
s = re.sub(pat, rep, s, flags=re.IGNORECASE)
m = re.fullmatch(r"([1-4])회", s)
if m:
s = m.group(1)
elif re.fullmatch(r"5", s):
s = "5회"
digits = re.sub(r"[^0-9]", "", s)
if digits and any(digits in v for v in CATEGORY_OPTIONS.values()):
s = digits if s != "5회" else "5회"
s = re.sub(r"[^0-9A-Za-z가-힣]", "", s)
return s
def _is_side_button(tok: str) -> bool:
return tok in SIDE_SET
def _which_category(tok: str):
for cat, opts in CAT2SET.items():
if tok in opts: return cat
return None
def _order_pts(pts):
rect = np.zeros((4, 2), dtype=np.float32)
s = pts.sum(axis=1); d = np.diff(pts, axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
rect[1] = pts[np.argmin(d)]
rect[3] = pts[np.argmax(d)]
return rect
def _warp_points(H, pts_xy):
pts = np.asarray(pts_xy, dtype=np.float32).reshape(-1,1,2)
return cv2.perspectiveTransform(pts, H).reshape(-1,2)
def _map_rect_from_rectified(Hinv, x, y, w, h, offset=(0,0)):
corners = np.float32([[x,y], [x+w,y], [x+w,y+h], [x,y+h]])
mapped = _warp_points(Hinv, corners)
x1,y1 = mapped.min(axis=0); x2,y2 = mapped.max(axis=0)
ox, oy = offset
return int(x1+ox), int(y1+oy), int(x2-x1), int(y2-y1)
def _easyocr_to_items(detections):
items = []
for bbox, text, conf in detections:
quad = np.array(bbox, dtype=float)
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
cx, cy = float(sum(xs)/4), float(sum(ys)/4)
xyxy = np.array([min(xs), min(ys), max(xs), max(ys)], dtype=float)
items.append({"text": text.strip(), "conf": float(conf),
"box": quad, "center": (cx, cy), "xyxy": xyxy})
return items
def _detect_panel_roi(img_bgr, v_pctl=35, bh_kernel=31, min_area_frac=0.08, ar_range=(1.1, 4.0), pad_frac=0.01):
h, w = img_bgr.shape[:2]
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
V = hsv[:,:,2]
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (bh_kernel, bh_kernel))
bh = cv2.morphologyEx(V, cv2.MORPH_BLACKHAT, k)
_, m_bh = cv2.threshold(bh, max(20, bh.mean() + 1.0*bh.std()), 255, cv2.THRESH_BINARY)
thr_dark = int(np.percentile(V, v_pctl))
m_dark = cv2.inRange(V, 0, thr_dark)
mask = cv2.bitwise_or(m_bh, m_dark)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)), 2)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)), 1)
cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
H, W = h, w
best = None
for c in cnts:
area = cv2.contourArea(c)
if area < min_area_frac * (H*W): continue
hull = cv2.convexHull(c)
x,y,wid,hei = cv2.boundingRect(hull)
ar = max(wid,hei) / max(1, min(wid,hei))
if not (ar_range[0] <= ar <= ar_range[1]): continue
if (best is None) or (area > best[0]):
best = (area, (x,y,wid,hei))
if best is None:
return (0,0,W,H), mask
x,y,wid,hei = best[1]
pad = int(pad_frac * max(H, W))
x0 = max(0, x - pad); y0 = max(0, y - pad)
x1 = min(W, x + wid + pad); y1 = min(H, y + hei + pad)
return (x0,y0,x1,y1), mask
def _deskew_panel_by_mask(panel_bgr, panel_mask_roi, min_quad_area_frac=0.05):
h, w = panel_bgr.shape[:2]
cnts, _ = cv2.findContours(panel_mask_roi, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not cnts: return panel_bgr, None
c = max(cnts, key=cv2.contourArea)
if cv2.contourArea(c) < (min_quad_area_frac * h * w):
return panel_bgr, None
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
src = approx.reshape(4,2).astype(np.float32) if len(approx) == 4 else cv2.boxPoints(cv2.minAreaRect(c)).astype(np.float32)
src = _order_pts(src)
(tl, tr, br, bl) = src
Wt = int(max(np.linalg.norm(br-bl), np.linalg.norm(tr-tl))); Wt = max(100, Wt)
Ht = int(max(np.linalg.norm(tr-br), np.linalg.norm(tl-bl))); Ht = max(100, Ht)
dst = np.array([[0,0],[Wt-1,0],[Wt-1,Ht-1],[0,Ht-1]], dtype=np.float32)
H = cv2.getPerspectiveTransform(src, dst)
warped = cv2.warpPerspective(panel_bgr, H, (Wt, Ht), flags=cv2.INTER_CUBIC)
return warped, H
def _build_glare_mask(panel_bgr, v_thr=235, s_thr=45, lap_var_thr=25.0, min_area_rel=1e-4, max_area_rel=2e-2, ar_min=3.0, close_ks=5, open_ks=3, dil_ks=3):
h, w = panel_bgr.shape[:2]
hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
H, S, V = cv2.split(hsv)
m_hi = (V >= v_thr) & (S <= s_thr)
m = (m_hi.astype(np.uint8) * 255)
m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(close_ks,close_ks)), 1)
m = cv2.morphologyEx(m, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(open_ks,open_ks)), 1)
area_img = float(h*w)
out = np.zeros_like(m, dtype=np.uint8)
num, lab, stats, _ = cv2.connectedComponentsWithStats(m, 8)
gray = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2GRAY)
for i in range(1, num):
x,y,wid,hei,area = stats[i]
rel = area/area_img
if rel < min_area_rel or rel > max_area_rel: continue
ar = max(wid,hei)/max(1, min(wid,hei))
if ar < ar_min: continue
crop = gray[y:y+hei, x:x+wid]
if cv2.Laplacian(crop, cv2.CV_64F).var() > lap_var_thr: continue
out[lab==i] = 255
out = cv2.dilate(out, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(dil_ks,dil_ks)), 1)
ratio = out.sum() / 255.0 / area_img
return out, float(ratio)
def _apply_deglare_toneclip(panel_bgr, glare_mask, ring_px=3, add_v=18):
hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
H, S, V = cv2.split(hsv)
dil = cv2.dilate(glare_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ring_px*2+1, ring_px*2+1)), 1)
ring = cv2.bitwise_and(dil, cv2.bitwise_not(glare_mask))
if cv2.countNonZero(ring) == 0:
return panel_bgr
ring_med = int(np.median(V[ring.astype(bool)]))
cap = np.clip(ring_med + add_v, 0, 255).astype(np.uint8)
V2 = V.copy()
V2[glare_mask.astype(bool)] = np.minimum(V2[glare_mask.astype(bool)], cap)
return cv2.cvtColor(cv2.merge([H,S,V2]), cv2.COLOR_HSV2BGR)
def _ocr_with_deglare_when_needed(panel_rect_bgr, reader, area_gate=0.002):
det_orig = reader.readtext(panel_rect_bgr)
m_gl, ratio = _build_glare_mask(panel_rect_bgr)
if ratio < area_gate:
return det_orig
degl = _apply_deglare_toneclip(panel_rect_bgr, m_gl)
det_degl = reader.readtext(degl)
def _score(dets):
return sum(c for _,_,c in dets) + 0.3*sum(1 for _,t,_ in dets if len(re.sub(r"[^가-힣0-9]","",t))>0)
return det_degl if _score(det_degl) >= 0.85 * _score(det_orig) else det_orig
def _build_text_mask_from_easyocr(detections, shape_hw, dilate_px=2):
H, W = shape_hw[:2]
mask = np.zeros((H, W), np.uint8)
if not detections: return mask
polys = [np.array(bbox, dtype=np.int32).reshape(-1, 1, 2) for bbox, _, _ in detections]
if polys:
cv2.fillPoly(mask, polys, 255)
if dilate_px > 0:
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px*2+1, dilate_px*2+1))
mask = cv2.dilate(mask, k, 1)
return mask
def _auto_led_params_simple(shape, k_frac=0.015, area_lo_frac=1e-5, area_hi_frac=1.5e-3):
h, w = shape[:2]
long_side = max(h, w)
k_auto = int(round(long_side * k_frac))
if k_auto % 2 == 0: k_auto += 1
k_auto = max(5, min(k_auto, 31))
min_area = max(6, int(h * w * area_lo_frac))
max_area = max(min_area+1, int(h * w * area_hi_frac))
return k_auto, min_area, max_area
def _detect_leds_glare_core(img_bgr, k=None, sigma=2.3, ring_px=7, ring_v_thr=200, core_s_thr_bg=78, dv_thr_bg=45, strict_aspect=(2.0, 4.2), strict_extent=0.64, strict_solidity=0.80, include_white=False, exclude_mask=None, dv_thr_any=35, min_short_px=10, min_area_abs=40):
def _masked_mean_median(img_gray, mask_bool):
vals = img_gray[mask_bool]
return (float(vals.mean()), float(np.median(vals))) if vals.size > 0 else (0.0, 0.0)
k_auto, min_area, max_area = _auto_led_params_simple(img_bgr.shape)
if not k or k <= 0: k = k_auto
g = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
g_eq = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(g)
Hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
H,S,V = cv2.split(Hsv)
se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
tophat = cv2.morphologyEx(g_eq, cv2.MORPH_TOPHAT, se)
m, s = float(tophat.mean()), float(tophat.std())
_, seed_th = cv2.threshold(tophat, np.clip(m + sigma*s, 40, 240), 255, cv2.THRESH_BINARY)
_, seed_v = cv2.threshold(V, 210, 255, cv2.THRESH_BINARY)
seed = cv2.bitwise_or(seed_th, seed_v)
m_color = (cv2.inRange(H, 35, 85) | cv2.inRange(H, 90, 140)) & (cv2.inRange(S, 50, 255) & cv2.inRange(V, 160, 255))
if include_white: m_color |= (cv2.inRange(S, 0, 60) & cv2.inRange(V, 200, 255))
reinforced = cv2.bitwise_and(seed, cv2.dilate(m_color, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1))
ratio = (cv2.countNonZero(reinforced) / float(max(1, cv2.countNonZero(seed)))) if cv2.countNonZero(seed)>0 else 0.0
core = reinforced if ratio >= 0.3 else seed
if exclude_mask is not None:
core = cv2.bitwise_and(core, cv2.bitwise_not(exclude_mask))
core = cv2.medianBlur(core, 3)
core = cv2.morphologyEx(core, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1)
core = cv2.morphologyEx(core, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)), 1)
num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
for i in range(1, num):
if (min_short_px and stats[i,3] < min_short_px) or not (max(min_area, min_area_abs) <= stats[i,4] <= max_area):
core[lab == i] = 0
num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
leds, ring_kernel = [], cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ring_px*2+1, ring_px*2+1))
for i in range(1, num):
x,y,wid,hei,area = stats[i]
aspect = max(wid,hei) / max(1, min(wid,hei))
if aspect > 6.5: continue
comp_mask = (lab == i)
dil = cv2.dilate(comp_mask.astype(np.uint8), ring_kernel, 1).astype(bool)
ring_mask = np.logical_and(dil, np.logical_not(comp_mask))
core_v_mean, _ = _masked_mean_median(V, comp_mask)
_, ring_med = _masked_mean_median(V, ring_mask)
if (core_v_mean - ring_med) < dv_thr_any: continue
if ring_med >= ring_v_thr:
cnts, _ = cv2.findContours((comp_mask.astype(np.uint8) * 255), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if cnts:
c = max(cnts, key=cv2.contourArea)
solidity = cv2.contourArea(c) / max(1.0, cv2.contourArea(cv2.convexHull(c)))
extent = area / float(max(1, wid*hei))
if not (strict_aspect[0] <= aspect <= strict_aspect[1]) or extent < strict_extent or solidity < strict_solidity:
continue
leds.append((int(x), int(y), int(wid), int(hei), (float(cents[i][0]), float(cents[i][1])), float(core_v_mean)))
return leds
def _norm_ko(s: str) -> str:
return re.sub(r"\s+", "", s or "")
def _find_category_anchors(items):
anchors = {}
for it in items:
raw = it["text"]; norm = _norm_ko(raw)
for cat in CATEGORY_OPTIONS.keys():
if cat in norm:
x1,y1,x2,y2 = it["xyxy"]; h = (y2 - y1); area = (x2 - x1) * h
prev = anchors.get(cat)
if not prev or (h > prev.get("_h", -1)) or (h == prev.get("_h", -1) and area > prev.get("_a", -1)):
anchors[cat] = {"center": it["center"], "xyxy": it["xyxy"], "_h": h, "_a": area}
for cat in anchors:
anchors[cat].pop("_h", None); anchors[cat].pop("_a", None)
return anchors
def _compute_center_band(items, img_shape):
H, W = img_shape[:2]
xs = [x for it in items if any(cat in _norm_ko(it["text"]) for cat in CATEGORY_OPTIONS.keys()) for x in (it["xyxy"][0], it["xyxy"][2])]
if len(xs) >= 2:
left = max(0.0, min(xs) - CENTER_BAND_PAD_REL * W)
right = min(float(W), max(xs) + CENTER_BAND_PAD_REL * W)
else:
left, right = CENTER_BAND_FALLBACK[0] * W, CENTER_BAND_FALLBACK[1] * W
return float(left), float(right)
def _match_leds_to_texts(items, leds, img_shape, dmax_px=None, rel_gate=1.1, x_orient_eps=4, y_orient_eps=0):
Hh, Ww = img_shape[:2]
dmax_px = dmax_px or max(50, int(0.065 * max(Hh, Ww)))
band_left, band_right = _compute_center_band(items, img_shape)
side_colw, side_dmax, side_y_tol, side_eucl_max = SIDE_COLW_REL*max(Hh,Ww), SIDE_DMAX_REL*max(Hh,Ww), SIDE_Y_TOL_REL*Hh, SIDE_EUCLID_MAX_REL*max(Hh,Ww)
choices = []
for li, (_x,_y,_w,_h,(cx, cy),bright) in enumerate(leds):
best_cand = None
for ti, it in enumerate(items):
tx, ty, tw, th, raw, x1, *_ = it["center"][0], it["center"][1], it["xyxy"][2]-it["xyxy"][0], it["xyxy"][3]-it["xyxy"][1], it["text"], it["xyxy"][0]
tok = _canon_text(raw)
if not tok or tok not in ALL_ALLOWED: continue
dist = 0
if _is_side_button(tok):
# LED는 중앙 밴드 '밖'에 있어야 함
if (cx < band_left or cx > band_right) and ty >= cy - side_y_tol and abs(tx - cx) <= max(side_colw, 0.5*tw):
dist = max(0.0, ty - cy) + 0.3 * abs(tx - cx)
if dist > side_dmax or math.hypot(tx - cx, ty - cy) > side_eucl_max:
continue
else:
if band_left <= cx <= band_right and band_left <= tx <= band_right and abs(ty-cy) <= max(y_orient_eps, 0.6*th) and x1 >= cx + max(CENTER_RIGHT_MIN_PX, CENTER_RIGHT_MIN_FRAC*tw):
dist = math.hypot(tx-cx, ty-cy)
if dist > dmax_px: continue
if dist > 0 and (not best_cand or dist < best_cand[0]):
best_cand = (dist, ti, tok)
if best_cand:
dist, ti, tok = best_cand
choices.append((dist, li, ti, tok, float(bright), tuple(items[ti]["center"]), (cx,cy)))
choices.sort(key=lambda x: x[0])
used_led, used_txt, pairs_led = set(), set(), []
for d, li, ti, tok, bri, ptxt, pled in choices:
if li not in used_led and ti not in used_txt:
used_led.add(li); used_txt.add(ti)
pairs_led.append((ptxt, pled, tok, li, bri))
pairs_led.sort(key=lambda p: (int(p[1][1] // 30), p[1][0]))
return [p[2] for p in pairs_led], pairs_led
def _choose_and_enforce_categories(pairs_led, items, leds, img_shape, cw_rel=0.06, dmax_rel=0.20, fill_default=None):
H, W = img_shape[:2]; L = max(H, W)
colw, dmax = cw_rel * L, dmax_rel * L
picked = {}
bucket = {cat: [] for cat in CATEGORY_OPTIONS.keys()}
for _, _, tok, li, bri in pairs_led:
cat = _which_category(tok)
if cat: bucket[cat].append((tok, bri, li))
for cat, arr in bucket.items():
if arr: picked[cat] = max(arr, key=lambda x: x[1])[0]
anchors = _find_category_anchors(items)
for cat in CATEGORY_OPTIONS:
if cat in picked: continue
a = anchors.get(cat)
if a:
ax, ay = a["center"]
cand_leds = sorted([ (bri, idx) for idx, (*_, (cx,cy), bri) in enumerate(leds) if abs(cx-ax)<=colw and cy>=ay-2 ], reverse=True)
if cand_leds:
led_center = leds[cand_leds[0][1]][4]
best_tok, best_d = None, dmax
for it in items:
tok = _canon_text(it["text"])
if tok in CAT2SET[cat]:
tx, ty = it["center"]
if abs(tx - ax) <= colw and ty >= ay - 2:
d = math.hypot(tx - led_center[0], ty - led_center[1])
if d < best_d: best_d, best_tok = d, tok
picked[cat] = best_tok or (fill_default.get(cat) if fill_default else "미확인")
return picked
# --- 카테고리 진단 상태 (ON / TXT_ONLY / NO_TXT) 빌더 ---
STATE_KR = {"ON":"확인됨", "TXT_ONLY":"텍스트만", "NO_TXT":"텍스트없음"}
def _build_category_status(items, pairs_led):
"""
각 카테고리에 대해 OCR 인식/LED 매칭 상태를 진단.
반환: {cat: {"picked": 토큰 또는 "미확인",
"state": "ON" | "TXT_ONLY" | "NO_TXT"}}
"""
# 1) OCR로 읽힌 후보 수집
ocr_tokens_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
for it in items:
tok = _canon_text(it.get("text",""))
cat = _which_category(tok)
if cat:
ocr_tokens_by_cat[cat].add(tok)
# 2) LED-텍스트 매칭으로 확정된 것들 수집
led_matched_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
for _,_,tok,_,_ in pairs_led:
cat = _which_category(tok)
if cat:
led_matched_by_cat[cat].add(tok)
# 3) 상태 구성
status = {}
for cat in CATEGORY_OPTIONS.keys():
if led_matched_by_cat[cat]:
picked = sorted(list(led_matched_by_cat[cat]))[0]
state = "ON"
elif ocr_tokens_by_cat[cat]:
picked = "미확인"
state = "TXT_ONLY"
else:
picked = "미확인"
state = "NO_TXT"
status[cat] = {"picked": picked, "state": state}
return status
def _compose_readout(cat_map, side_on, order=READOUT_ORDER,
diag_status: Dict[str, Dict[str,str]] = None,
state_labels: Dict[str,str] = STATE_KR):
"""
diag_status가 있으면 각 카테고리 뒤에 (상태)를 붙임.
상태: ON | TXT_ONLY | NO_TXT (한국어 꼬리표는 STATE_KR로 매핑)
"""
parts = []
for k in order:
val = cat_map.get(k, "미확인")
if diag_status and k in diag_status:
st = diag_status[k]["state"]
tail = state_labels.get(st, st) if state_labels else st
parts.append(f"{k} {val}({tail})")
else:
parts.append(f"{k} {val}")
cat_sentence = ", ".join(parts)
side_sentence = " / ".join(side_on) if side_on else ""
final_parts = [p for p in (cat_sentence, side_sentence) if p]
return ", ".join(final_parts) if final_parts else "켜진 표시 없음"
# ==============================
# 최종 요약 진입점
# ==============================
def summarize_scene(frame_bgr: np.ndarray, reader,
do_pic=True, debug_font=None,
debug_dir: Optional[str]=None,
diagnostic: bool=False,
# === (ADD) 안정화 옵션 ===
stabilize: Optional[bool]=None,
stabilizer: Optional[MotionStabilizer]=None) -> str:
"""
현재 프레임(frame_bgr)을 분석하여, '조작 패널 상태'에 대한 한국어 요약 문장을 반환.
"""
try:
# === (ADD) 안정화 프레임 선택 ===
use_stab = STABILIZE_DEFAULT if (stabilize is None) else bool(stabilize)
frame_in = frame_bgr
if use_stab:
global _global_stabilizer
st = stabilizer or _global_stabilizer
if st is None:
st = MotionStabilizer(downscale=0.5, ecc=True, homography=False)
_global_stabilizer = st
frame_in = st.apply(frame_bgr)
# 1. 패널 ROI 탐지 및 정사영 변환 (frame_in 사용)
(x0,y0,x1,y1), panel_mask_full = _detect_panel_roi(frame_in)
panel_bgr = frame_in[y0:y1, x0:x1].copy()
panel_mask_roi = panel_mask_full[y0:y1, x0:x1].copy()
panel_rect, H = _deskew_panel_by_mask(panel_bgr, panel_mask_roi)
Hinv = np.linalg.inv(H) if H is not None else None
# 2. OCR (필요 시 디글레어 포함)
result_panel = _ocr_with_deglare_when_needed(panel_rect, reader)
items_local = _easyocr_to_items(result_panel)
# 3. 텍스트 마스크 생성 및 LED 탐지
text_mask_local = _build_text_mask_from_easyocr(result_panel, panel_rect.shape[:2])
leds_local = _detect_leds_glare_core(
panel_rect, k=15, sigma=2.0, include_white=True,
exclude_mask=text_mask_local, dv_thr_any=22, min_short_px=10, min_area_abs=40
)
# 4. OCR/LED 결과를 원본 좌표계로 복원 (frame_in 기준 좌표)
items = []
if Hinv is not None:
for it in items_local:
mapped = _warp_points(Hinv, it["box"]) + np.array([x0, y0])
xs, ys = mapped[:,0], mapped[:,1]
items.append({"text": it["text"], "conf": it["conf"], "box": mapped.tolist(),
"center": (xs.mean(), ys.mean()),
"xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
else:
for it in items_local:
bx = np.array(it["box"]) + np.array([x0, y0])
xs, ys = bx[:,0], bx[:,1]
items.append({"text": it["text"], "conf": it["conf"], "box": bx.tolist(),
"center": (xs.mean(), ys.mean()),
"xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
leds = []
if Hinv is not None:
for (x,y,w,h,c,b) in leds_local:
gx,gy,gw,gh = _map_rect_from_rectified(Hinv, x,y,w,h, offset=(x0,y0))
gcx, gcy = (_warp_points(Hinv, [c]) + np.array([x0, y0]))[0]
leds.append((gx,gy,gw,gh, (gcx, gcy), b))
else:
for (x,y,w,h,c,b) in leds_local:
leds.append((x+x0, y+y0, w,h, (c[0]+x0, c[1]+y0), b))
# 5. LED-텍스트 매칭 (frame_in.shape 사용)
led_tokens, pairs_led = _match_leds_to_texts(items, leds, frame_in.shape)
# 6. 카테고리별 최종 선택 및 문장 생성
cat_map = _choose_and_enforce_categories(pairs_led, items, leds, frame_in.shape)
# === 추가: 카테고리 상태(ON/TXT_ONLY/NO_TXT) ===
diag_status = _build_category_status(items, pairs_led) if diagnostic else None
side_on = sorted(list(set(tok for _,_,tok,_,_ in pairs_led if _is_side_button(tok))))
final_text = _compose_readout(cat_map, side_on, diag_status=diag_status)
if do_pic:
try:
import time as _time
from PIL import Image, ImageDraw, ImageFont
out_dir = debug_dir or os.path.join(os.getcwd(), "debug_summaries")
os.makedirs(out_dir, exist_ok=True)
vis = frame_in.copy() # (CHANGE) 안정화된 프레임로 시각화
for it in items:
poly = np.array(it["box"], dtype=np.int32)
cv2.polylines(vis, [poly], True, (0, 255, 0), 2, cv2.LINE_AA)
for (x, y, w, h, (cx, cy), bri) in leds:
cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 165, 0), 2)
cv2.circle(vis, (int(cx), int(cy)), 3, (255, 165, 0), -1)
def _pick_kr_font(size=20, font_path=None):
cands = [
font_path,
r"C:\Windows\Fonts\malgun.ttf",
r"C:\Windows\Fonts\malgunbd.ttf",
"/System/Library/Fonts/AppleSDGothicNeo.ttc",
"/usr/share/fonts/truetype/nanum/NanumGothic.ttf",
"/usr/share/fonts/truetype/noto/NotoSansKR-Regular.ttf",
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
]
for p in cands:
if p and os.path.exists(p):
try:
return ImageFont.truetype(p, size)
except Exception:
pass
return ImageFont.load_default()
pil = Image.fromarray(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(pil)
font = _pick_kr_font(size=20, font_path=debug_font)
for it in items:
x1, y1 = int(it["xyxy"][0]), int(it["xyxy"][1])
label = f"{it.get('text','')} ({it.get('conf',0.0):.2f})"
bbox = draw.textbbox((0, 0), label, font=font)
tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
draw.rectangle([x1, y1 - th - 6, x1 + tw + 8, y1 + 2], fill=(0, 0, 0))
draw.text((x1 + 4, y1 - th - 4), label, font=font, fill=(255, 255, 255))
vis = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
ts = _time.strftime("%Y%m%d_%H%M%S"); ms = int((_time.time() % 1) * 1000)
out_put_text = f"{(final_text or '미확인 상태')} ({ts}_{ms:03d}).png"
out_path = os.path.join(out_dir, out_put_text)
cv2.imwrite(out_path, vis)
except Exception as _e:
print(f"[seeing.summarize_scene] pic save failed: {_e})")
return final_text or "켜진 표시 없음"
except Exception as e:
print(f"[seeing.summarize_scene] error: {e}")
return "현재 상태를 파악하는데 실패했습니다."