# seeing.py # INFO 모드에서 현재 프레임을 분석해 "요약 문장"을 만들어주는 모듈. # test (1).py의 최신 로직을 모두 포함하여 재구성되었습니다. import os import re import math import cv2 import numpy as np from typing import List, Tuple, Dict, Any, Optional # ============================== # 모듈 레벨 설정 및 상수 # ============================== SIDE_LEFT = ["통살균", "원격제어", "예약", "내마음"] SIDE_RIGHT = ["터보샷", "구김방지", "알림음", "빨래추가"] SIDE_EUCLID_MAX_REL = 0.08 CATEGORY_OPTIONS = { "세탁": ["불림", "애벌세탁", "강력", "표준", "적은때"], "헹굼": ["5회", "4", "3", "2", "1"], "탈수": ["건조맞춤", "강", "중", "약", "섬세"], "물온도": ["95", "60", "40", "30", "냉수"], } READOUT_ORDER = ("세탁", "헹굼", "탈수", "물온도") LABEL_SYNONYMS = { r"\s+": "", r"[**()\[\]]": "", r"^이?터보\s*샷?$": "터보샷", r"\*?터보\s*샷": "터보샷", r"\*?알림\s*음(?:\(3초\))?": "알림음", r"Wi[\-\s]?Fi": "WiFi", r"일회": "1회", r"이회": "2회", r"삼회": "3회", r"사회": "4회", r"오회": "5회", r"95\s*℃|95도": "95", r"60\s*℃|60도": "60", r"40\s*℃|40도": "40", r"30\s*℃|30도": "30", } SIDE_SET = set(SIDE_LEFT + SIDE_RIGHT) CAT2SET = {k:set(v) for k,v in CATEGORY_OPTIONS.items()} ALL_ALLOWED = SIDE_SET.union(*CAT2SET.values()) # --- 중앙 밴드 설정 --- CENTER_BAND_PAD_REL = 0.06 CENTER_BAND_FALLBACK = (0.34, 0.66) CENTER_RIGHT_MIN_PX = 6 CENTER_RIGHT_MIN_FRAC = 0.18 # --- 사이드 매칭 설정 --- SIDE_COLW_REL = 0.08 SIDE_DMAX_REL = 0.25 SIDE_Y_GAP_MIN = 2 SIDE_Y_TOL_REL = 0.02 # === (ADD) 프레임 안정화기 ========================================== class MotionStabilizer: def __init__(self, downscale=0.5, ecc=True, homography=False, max_iter=50, eps=1e-6): import cv2 self.ds = float(downscale) self.warp_mode = (cv2.MOTION_HOMOGRAPHY if homography else (cv2.MOTION_EUCLIDEAN if ecc else cv2.MOTION_TRANSLATION)) self.max_iter = int(max_iter) self.eps = float(eps) self.prev_gray_ds = None # float32 [0..1] self.homography = bool(homography) def reset(self): self.prev_gray_ds = None def _ds(self, img): if self.ds and self.ds < 1.0: h, w = img.shape[:2] return cv2.resize(img, (int(w*self.ds), int(h*self.ds)), interpolation=cv2.INTER_AREA) return img def _undscale_warp(self, M): s = self.ds if self.homography: S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32) Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32) return Si @ M @ S else: A = np.eye(3, dtype=np.float32) A[:2,:] = M S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32) Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32) A = Si @ A @ S return A[:2,:] def apply(self, frame_bgr): import cv2 g = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY) g_ds = self._ds(g) g_ds_f = g_ds.astype(np.float32) / 255.0 if self.prev_gray_ds is None: self.prev_gray_ds = g_ds_f return frame_bgr warp = (np.eye(3, dtype=np.float32) if self.homography else np.eye(2, 3, dtype=np.float32)) criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, self.max_iter, self.eps) try: _, warp = cv2.findTransformECC( templateImage=self.prev_gray_ds, inputImage=g_ds_f, warpMatrix=warp, motionType=(cv2.MOTION_HOMOGRAPHY if self.homography else self.warp_mode), criteria=criteria, inputMask=None, gaussFiltSize=1 ) if self.homography: Wf = self._undscale_warp(warp) out = cv2.warpPerspective(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]), flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_REPLICATE) self.prev_gray_ds = cv2.warpPerspective(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]), flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_REPLICATE) else: Wf = self._undscale_warp(warp) out = cv2.warpAffine(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]), flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_REPLICATE) self.prev_gray_ds = cv2.warpAffine(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]), flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_REPLICATE) return out except Exception: self.prev_gray_ds = g_ds_f return frame_bgr # =================================================================== # (ADD) 기본 토글/싱글톤 STABILIZE_DEFAULT = True # 기본 ON. 필요시 False로 끄기 _global_stabilizer = None # ============================== # 내부 헬퍼 함수들 # ============================== def _canon_text(raw: str) -> str: if not raw: return "" s = str(raw) for pat, rep in LABEL_SYNONYMS.items(): s = re.sub(pat, rep, s, flags=re.IGNORECASE) m = re.fullmatch(r"([1-4])회", s) if m: s = m.group(1) elif re.fullmatch(r"5", s): s = "5회" digits = re.sub(r"[^0-9]", "", s) if digits and any(digits in v for v in CATEGORY_OPTIONS.values()): s = digits if s != "5회" else "5회" s = re.sub(r"[^0-9A-Za-z가-힣]", "", s) return s def _is_side_button(tok: str) -> bool: return tok in SIDE_SET def _which_category(tok: str): for cat, opts in CAT2SET.items(): if tok in opts: return cat return None def _order_pts(pts): rect = np.zeros((4, 2), dtype=np.float32) s = pts.sum(axis=1); d = np.diff(pts, axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] rect[1] = pts[np.argmin(d)] rect[3] = pts[np.argmax(d)] return rect def _warp_points(H, pts_xy): pts = np.asarray(pts_xy, dtype=np.float32).reshape(-1,1,2) return cv2.perspectiveTransform(pts, H).reshape(-1,2) def _map_rect_from_rectified(Hinv, x, y, w, h, offset=(0,0)): corners = np.float32([[x,y], [x+w,y], [x+w,y+h], [x,y+h]]) mapped = _warp_points(Hinv, corners) x1,y1 = mapped.min(axis=0); x2,y2 = mapped.max(axis=0) ox, oy = offset return int(x1+ox), int(y1+oy), int(x2-x1), int(y2-y1) def _easyocr_to_items(detections): items = [] for bbox, text, conf in detections: quad = np.array(bbox, dtype=float) xs = [p[0] for p in quad]; ys = [p[1] for p in quad] cx, cy = float(sum(xs)/4), float(sum(ys)/4) xyxy = np.array([min(xs), min(ys), max(xs), max(ys)], dtype=float) items.append({"text": text.strip(), "conf": float(conf), "box": quad, "center": (cx, cy), "xyxy": xyxy}) return items def _detect_panel_roi(img_bgr, v_pctl=35, bh_kernel=31, min_area_frac=0.08, ar_range=(1.1, 4.0), pad_frac=0.01): h, w = img_bgr.shape[:2] hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) V = hsv[:,:,2] k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (bh_kernel, bh_kernel)) bh = cv2.morphologyEx(V, cv2.MORPH_BLACKHAT, k) _, m_bh = cv2.threshold(bh, max(20, bh.mean() + 1.0*bh.std()), 255, cv2.THRESH_BINARY) thr_dark = int(np.percentile(V, v_pctl)) m_dark = cv2.inRange(V, 0, thr_dark) mask = cv2.bitwise_or(m_bh, m_dark) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)), 2) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)), 1) cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) H, W = h, w best = None for c in cnts: area = cv2.contourArea(c) if area < min_area_frac * (H*W): continue hull = cv2.convexHull(c) x,y,wid,hei = cv2.boundingRect(hull) ar = max(wid,hei) / max(1, min(wid,hei)) if not (ar_range[0] <= ar <= ar_range[1]): continue if (best is None) or (area > best[0]): best = (area, (x,y,wid,hei)) if best is None: return (0,0,W,H), mask x,y,wid,hei = best[1] pad = int(pad_frac * max(H, W)) x0 = max(0, x - pad); y0 = max(0, y - pad) x1 = min(W, x + wid + pad); y1 = min(H, y + hei + pad) return (x0,y0,x1,y1), mask def _deskew_panel_by_mask(panel_bgr, panel_mask_roi, min_quad_area_frac=0.05): h, w = panel_bgr.shape[:2] cnts, _ = cv2.findContours(panel_mask_roi, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not cnts: return panel_bgr, None c = max(cnts, key=cv2.contourArea) if cv2.contourArea(c) < (min_quad_area_frac * h * w): return panel_bgr, None peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) src = approx.reshape(4,2).astype(np.float32) if len(approx) == 4 else cv2.boxPoints(cv2.minAreaRect(c)).astype(np.float32) src = _order_pts(src) (tl, tr, br, bl) = src Wt = int(max(np.linalg.norm(br-bl), np.linalg.norm(tr-tl))); Wt = max(100, Wt) Ht = int(max(np.linalg.norm(tr-br), np.linalg.norm(tl-bl))); Ht = max(100, Ht) dst = np.array([[0,0],[Wt-1,0],[Wt-1,Ht-1],[0,Ht-1]], dtype=np.float32) H = cv2.getPerspectiveTransform(src, dst) warped = cv2.warpPerspective(panel_bgr, H, (Wt, Ht), flags=cv2.INTER_CUBIC) return warped, H def _build_glare_mask(panel_bgr, v_thr=235, s_thr=45, lap_var_thr=25.0, min_area_rel=1e-4, max_area_rel=2e-2, ar_min=3.0, close_ks=5, open_ks=3, dil_ks=3): h, w = panel_bgr.shape[:2] hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV) H, S, V = cv2.split(hsv) m_hi = (V >= v_thr) & (S <= s_thr) m = (m_hi.astype(np.uint8) * 255) m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(close_ks,close_ks)), 1) m = cv2.morphologyEx(m, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(open_ks,open_ks)), 1) area_img = float(h*w) out = np.zeros_like(m, dtype=np.uint8) num, lab, stats, _ = cv2.connectedComponentsWithStats(m, 8) gray = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2GRAY) for i in range(1, num): x,y,wid,hei,area = stats[i] rel = area/area_img if rel < min_area_rel or rel > max_area_rel: continue ar = max(wid,hei)/max(1, min(wid,hei)) if ar < ar_min: continue crop = gray[y:y+hei, x:x+wid] if cv2.Laplacian(crop, cv2.CV_64F).var() > lap_var_thr: continue out[lab==i] = 255 out = cv2.dilate(out, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(dil_ks,dil_ks)), 1) ratio = out.sum() / 255.0 / area_img return out, float(ratio) def _apply_deglare_toneclip(panel_bgr, glare_mask, ring_px=3, add_v=18): hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV) H, S, V = cv2.split(hsv) dil = cv2.dilate(glare_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ring_px*2+1, ring_px*2+1)), 1) ring = cv2.bitwise_and(dil, cv2.bitwise_not(glare_mask)) if cv2.countNonZero(ring) == 0: return panel_bgr ring_med = int(np.median(V[ring.astype(bool)])) cap = np.clip(ring_med + add_v, 0, 255).astype(np.uint8) V2 = V.copy() V2[glare_mask.astype(bool)] = np.minimum(V2[glare_mask.astype(bool)], cap) return cv2.cvtColor(cv2.merge([H,S,V2]), cv2.COLOR_HSV2BGR) def _ocr_with_deglare_when_needed(panel_rect_bgr, reader, area_gate=0.002): det_orig = reader.readtext(panel_rect_bgr) m_gl, ratio = _build_glare_mask(panel_rect_bgr) if ratio < area_gate: return det_orig degl = _apply_deglare_toneclip(panel_rect_bgr, m_gl) det_degl = reader.readtext(degl) def _score(dets): return sum(c for _,_,c in dets) + 0.3*sum(1 for _,t,_ in dets if len(re.sub(r"[^가-힣0-9]","",t))>0) return det_degl if _score(det_degl) >= 0.85 * _score(det_orig) else det_orig def _build_text_mask_from_easyocr(detections, shape_hw, dilate_px=2): H, W = shape_hw[:2] mask = np.zeros((H, W), np.uint8) if not detections: return mask polys = [np.array(bbox, dtype=np.int32).reshape(-1, 1, 2) for bbox, _, _ in detections] if polys: cv2.fillPoly(mask, polys, 255) if dilate_px > 0: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px*2+1, dilate_px*2+1)) mask = cv2.dilate(mask, k, 1) return mask def _auto_led_params_simple(shape, k_frac=0.015, area_lo_frac=1e-5, area_hi_frac=1.5e-3): h, w = shape[:2] long_side = max(h, w) k_auto = int(round(long_side * k_frac)) if k_auto % 2 == 0: k_auto += 1 k_auto = max(5, min(k_auto, 31)) min_area = max(6, int(h * w * area_lo_frac)) max_area = max(min_area+1, int(h * w * area_hi_frac)) return k_auto, min_area, max_area def _detect_leds_glare_core(img_bgr, k=None, sigma=2.3, ring_px=7, ring_v_thr=200, core_s_thr_bg=78, dv_thr_bg=45, strict_aspect=(2.0, 4.2), strict_extent=0.64, strict_solidity=0.80, include_white=False, exclude_mask=None, dv_thr_any=35, min_short_px=10, min_area_abs=40): def _masked_mean_median(img_gray, mask_bool): vals = img_gray[mask_bool] return (float(vals.mean()), float(np.median(vals))) if vals.size > 0 else (0.0, 0.0) k_auto, min_area, max_area = _auto_led_params_simple(img_bgr.shape) if not k or k <= 0: k = k_auto g = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) g_eq = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(g) Hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) H,S,V = cv2.split(Hsv) se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)) tophat = cv2.morphologyEx(g_eq, cv2.MORPH_TOPHAT, se) m, s = float(tophat.mean()), float(tophat.std()) _, seed_th = cv2.threshold(tophat, np.clip(m + sigma*s, 40, 240), 255, cv2.THRESH_BINARY) _, seed_v = cv2.threshold(V, 210, 255, cv2.THRESH_BINARY) seed = cv2.bitwise_or(seed_th, seed_v) m_color = (cv2.inRange(H, 35, 85) | cv2.inRange(H, 90, 140)) & (cv2.inRange(S, 50, 255) & cv2.inRange(V, 160, 255)) if include_white: m_color |= (cv2.inRange(S, 0, 60) & cv2.inRange(V, 200, 255)) reinforced = cv2.bitwise_and(seed, cv2.dilate(m_color, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1)) ratio = (cv2.countNonZero(reinforced) / float(max(1, cv2.countNonZero(seed)))) if cv2.countNonZero(seed)>0 else 0.0 core = reinforced if ratio >= 0.3 else seed if exclude_mask is not None: core = cv2.bitwise_and(core, cv2.bitwise_not(exclude_mask)) core = cv2.medianBlur(core, 3) core = cv2.morphologyEx(core, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1) core = cv2.morphologyEx(core, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)), 1) num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8) for i in range(1, num): if (min_short_px and stats[i,3] < min_short_px) or not (max(min_area, min_area_abs) <= stats[i,4] <= max_area): core[lab == i] = 0 num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8) leds, ring_kernel = [], cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ring_px*2+1, ring_px*2+1)) for i in range(1, num): x,y,wid,hei,area = stats[i] aspect = max(wid,hei) / max(1, min(wid,hei)) if aspect > 6.5: continue comp_mask = (lab == i) dil = cv2.dilate(comp_mask.astype(np.uint8), ring_kernel, 1).astype(bool) ring_mask = np.logical_and(dil, np.logical_not(comp_mask)) core_v_mean, _ = _masked_mean_median(V, comp_mask) _, ring_med = _masked_mean_median(V, ring_mask) if (core_v_mean - ring_med) < dv_thr_any: continue if ring_med >= ring_v_thr: cnts, _ = cv2.findContours((comp_mask.astype(np.uint8) * 255), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if cnts: c = max(cnts, key=cv2.contourArea) solidity = cv2.contourArea(c) / max(1.0, cv2.contourArea(cv2.convexHull(c))) extent = area / float(max(1, wid*hei)) if not (strict_aspect[0] <= aspect <= strict_aspect[1]) or extent < strict_extent or solidity < strict_solidity: continue leds.append((int(x), int(y), int(wid), int(hei), (float(cents[i][0]), float(cents[i][1])), float(core_v_mean))) return leds def _norm_ko(s: str) -> str: return re.sub(r"\s+", "", s or "") def _find_category_anchors(items): anchors = {} for it in items: raw = it["text"]; norm = _norm_ko(raw) for cat in CATEGORY_OPTIONS.keys(): if cat in norm: x1,y1,x2,y2 = it["xyxy"]; h = (y2 - y1); area = (x2 - x1) * h prev = anchors.get(cat) if not prev or (h > prev.get("_h", -1)) or (h == prev.get("_h", -1) and area > prev.get("_a", -1)): anchors[cat] = {"center": it["center"], "xyxy": it["xyxy"], "_h": h, "_a": area} for cat in anchors: anchors[cat].pop("_h", None); anchors[cat].pop("_a", None) return anchors def _compute_center_band(items, img_shape): H, W = img_shape[:2] xs = [x for it in items if any(cat in _norm_ko(it["text"]) for cat in CATEGORY_OPTIONS.keys()) for x in (it["xyxy"][0], it["xyxy"][2])] if len(xs) >= 2: left = max(0.0, min(xs) - CENTER_BAND_PAD_REL * W) right = min(float(W), max(xs) + CENTER_BAND_PAD_REL * W) else: left, right = CENTER_BAND_FALLBACK[0] * W, CENTER_BAND_FALLBACK[1] * W return float(left), float(right) def _match_leds_to_texts(items, leds, img_shape, dmax_px=None, rel_gate=1.1, x_orient_eps=4, y_orient_eps=0): Hh, Ww = img_shape[:2] dmax_px = dmax_px or max(50, int(0.065 * max(Hh, Ww))) band_left, band_right = _compute_center_band(items, img_shape) side_colw, side_dmax, side_y_tol, side_eucl_max = SIDE_COLW_REL*max(Hh,Ww), SIDE_DMAX_REL*max(Hh,Ww), SIDE_Y_TOL_REL*Hh, SIDE_EUCLID_MAX_REL*max(Hh,Ww) choices = [] for li, (_x,_y,_w,_h,(cx, cy),bright) in enumerate(leds): best_cand = None for ti, it in enumerate(items): tx, ty, tw, th, raw, x1, *_ = it["center"][0], it["center"][1], it["xyxy"][2]-it["xyxy"][0], it["xyxy"][3]-it["xyxy"][1], it["text"], it["xyxy"][0] tok = _canon_text(raw) if not tok or tok not in ALL_ALLOWED: continue dist = 0 if _is_side_button(tok): # LED는 중앙 밴드 '밖'에 있어야 함 if (cx < band_left or cx > band_right) and ty >= cy - side_y_tol and abs(tx - cx) <= max(side_colw, 0.5*tw): dist = max(0.0, ty - cy) + 0.3 * abs(tx - cx) if dist > side_dmax or math.hypot(tx - cx, ty - cy) > side_eucl_max: continue else: if band_left <= cx <= band_right and band_left <= tx <= band_right and abs(ty-cy) <= max(y_orient_eps, 0.6*th) and x1 >= cx + max(CENTER_RIGHT_MIN_PX, CENTER_RIGHT_MIN_FRAC*tw): dist = math.hypot(tx-cx, ty-cy) if dist > dmax_px: continue if dist > 0 and (not best_cand or dist < best_cand[0]): best_cand = (dist, ti, tok) if best_cand: dist, ti, tok = best_cand choices.append((dist, li, ti, tok, float(bright), tuple(items[ti]["center"]), (cx,cy))) choices.sort(key=lambda x: x[0]) used_led, used_txt, pairs_led = set(), set(), [] for d, li, ti, tok, bri, ptxt, pled in choices: if li not in used_led and ti not in used_txt: used_led.add(li); used_txt.add(ti) pairs_led.append((ptxt, pled, tok, li, bri)) pairs_led.sort(key=lambda p: (int(p[1][1] // 30), p[1][0])) return [p[2] for p in pairs_led], pairs_led def _choose_and_enforce_categories(pairs_led, items, leds, img_shape, cw_rel=0.06, dmax_rel=0.20, fill_default=None): H, W = img_shape[:2]; L = max(H, W) colw, dmax = cw_rel * L, dmax_rel * L picked = {} bucket = {cat: [] for cat in CATEGORY_OPTIONS.keys()} for _, _, tok, li, bri in pairs_led: cat = _which_category(tok) if cat: bucket[cat].append((tok, bri, li)) for cat, arr in bucket.items(): if arr: picked[cat] = max(arr, key=lambda x: x[1])[0] anchors = _find_category_anchors(items) for cat in CATEGORY_OPTIONS: if cat in picked: continue a = anchors.get(cat) if a: ax, ay = a["center"] cand_leds = sorted([ (bri, idx) for idx, (*_, (cx,cy), bri) in enumerate(leds) if abs(cx-ax)<=colw and cy>=ay-2 ], reverse=True) if cand_leds: led_center = leds[cand_leds[0][1]][4] best_tok, best_d = None, dmax for it in items: tok = _canon_text(it["text"]) if tok in CAT2SET[cat]: tx, ty = it["center"] if abs(tx - ax) <= colw and ty >= ay - 2: d = math.hypot(tx - led_center[0], ty - led_center[1]) if d < best_d: best_d, best_tok = d, tok picked[cat] = best_tok or (fill_default.get(cat) if fill_default else "미확인") return picked # --- 카테고리 진단 상태 (ON / TXT_ONLY / NO_TXT) 빌더 --- STATE_KR = {"ON":"확인됨", "TXT_ONLY":"텍스트만", "NO_TXT":"텍스트없음"} def _build_category_status(items, pairs_led): """ 각 카테고리에 대해 OCR 인식/LED 매칭 상태를 진단. 반환: {cat: {"picked": 토큰 또는 "미확인", "state": "ON" | "TXT_ONLY" | "NO_TXT"}} """ # 1) OCR로 읽힌 후보 수집 ocr_tokens_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()} for it in items: tok = _canon_text(it.get("text","")) cat = _which_category(tok) if cat: ocr_tokens_by_cat[cat].add(tok) # 2) LED-텍스트 매칭으로 확정된 것들 수집 led_matched_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()} for _,_,tok,_,_ in pairs_led: cat = _which_category(tok) if cat: led_matched_by_cat[cat].add(tok) # 3) 상태 구성 status = {} for cat in CATEGORY_OPTIONS.keys(): if led_matched_by_cat[cat]: picked = sorted(list(led_matched_by_cat[cat]))[0] state = "ON" elif ocr_tokens_by_cat[cat]: picked = "미확인" state = "TXT_ONLY" else: picked = "미확인" state = "NO_TXT" status[cat] = {"picked": picked, "state": state} return status def _compose_readout(cat_map, side_on, order=READOUT_ORDER, diag_status: Dict[str, Dict[str,str]] = None, state_labels: Dict[str,str] = STATE_KR): """ diag_status가 있으면 각 카테고리 뒤에 (상태)를 붙임. 상태: ON | TXT_ONLY | NO_TXT (한국어 꼬리표는 STATE_KR로 매핑) """ parts = [] for k in order: val = cat_map.get(k, "미확인") if diag_status and k in diag_status: st = diag_status[k]["state"] tail = state_labels.get(st, st) if state_labels else st parts.append(f"{k} {val}({tail})") else: parts.append(f"{k} {val}") cat_sentence = ", ".join(parts) side_sentence = " / ".join(side_on) if side_on else "" final_parts = [p for p in (cat_sentence, side_sentence) if p] return ", ".join(final_parts) if final_parts else "켜진 표시 없음" # ============================== # 최종 요약 진입점 # ============================== def summarize_scene(frame_bgr: np.ndarray, reader, do_pic=True, debug_font=None, debug_dir: Optional[str]=None, diagnostic: bool=False, # === (ADD) 안정화 옵션 === stabilize: Optional[bool]=None, stabilizer: Optional[MotionStabilizer]=None) -> str: """ 현재 프레임(frame_bgr)을 분석하여, '조작 패널 상태'에 대한 한국어 요약 문장을 반환. """ try: # === (ADD) 안정화 프레임 선택 === use_stab = STABILIZE_DEFAULT if (stabilize is None) else bool(stabilize) frame_in = frame_bgr if use_stab: global _global_stabilizer st = stabilizer or _global_stabilizer if st is None: st = MotionStabilizer(downscale=0.5, ecc=True, homography=False) _global_stabilizer = st frame_in = st.apply(frame_bgr) # 1. 패널 ROI 탐지 및 정사영 변환 (frame_in 사용) (x0,y0,x1,y1), panel_mask_full = _detect_panel_roi(frame_in) panel_bgr = frame_in[y0:y1, x0:x1].copy() panel_mask_roi = panel_mask_full[y0:y1, x0:x1].copy() panel_rect, H = _deskew_panel_by_mask(panel_bgr, panel_mask_roi) Hinv = np.linalg.inv(H) if H is not None else None # 2. OCR (필요 시 디글레어 포함) result_panel = _ocr_with_deglare_when_needed(panel_rect, reader) items_local = _easyocr_to_items(result_panel) # 3. 텍스트 마스크 생성 및 LED 탐지 text_mask_local = _build_text_mask_from_easyocr(result_panel, panel_rect.shape[:2]) leds_local = _detect_leds_glare_core( panel_rect, k=15, sigma=2.0, include_white=True, exclude_mask=text_mask_local, dv_thr_any=22, min_short_px=10, min_area_abs=40 ) # 4. OCR/LED 결과를 원본 좌표계로 복원 (frame_in 기준 좌표) items = [] if Hinv is not None: for it in items_local: mapped = _warp_points(Hinv, it["box"]) + np.array([x0, y0]) xs, ys = mapped[:,0], mapped[:,1] items.append({"text": it["text"], "conf": it["conf"], "box": mapped.tolist(), "center": (xs.mean(), ys.mean()), "xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])}) else: for it in items_local: bx = np.array(it["box"]) + np.array([x0, y0]) xs, ys = bx[:,0], bx[:,1] items.append({"text": it["text"], "conf": it["conf"], "box": bx.tolist(), "center": (xs.mean(), ys.mean()), "xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])}) leds = [] if Hinv is not None: for (x,y,w,h,c,b) in leds_local: gx,gy,gw,gh = _map_rect_from_rectified(Hinv, x,y,w,h, offset=(x0,y0)) gcx, gcy = (_warp_points(Hinv, [c]) + np.array([x0, y0]))[0] leds.append((gx,gy,gw,gh, (gcx, gcy), b)) else: for (x,y,w,h,c,b) in leds_local: leds.append((x+x0, y+y0, w,h, (c[0]+x0, c[1]+y0), b)) # 5. LED-텍스트 매칭 (frame_in.shape 사용) led_tokens, pairs_led = _match_leds_to_texts(items, leds, frame_in.shape) # 6. 카테고리별 최종 선택 및 문장 생성 cat_map = _choose_and_enforce_categories(pairs_led, items, leds, frame_in.shape) # === 추가: 카테고리 상태(ON/TXT_ONLY/NO_TXT) === diag_status = _build_category_status(items, pairs_led) if diagnostic else None side_on = sorted(list(set(tok for _,_,tok,_,_ in pairs_led if _is_side_button(tok)))) final_text = _compose_readout(cat_map, side_on, diag_status=diag_status) if do_pic: try: import time as _time from PIL import Image, ImageDraw, ImageFont out_dir = debug_dir or os.path.join(os.getcwd(), "debug_summaries") os.makedirs(out_dir, exist_ok=True) vis = frame_in.copy() # (CHANGE) 안정화된 프레임로 시각화 for it in items: poly = np.array(it["box"], dtype=np.int32) cv2.polylines(vis, [poly], True, (0, 255, 0), 2, cv2.LINE_AA) for (x, y, w, h, (cx, cy), bri) in leds: cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 165, 0), 2) cv2.circle(vis, (int(cx), int(cy)), 3, (255, 165, 0), -1) def _pick_kr_font(size=20, font_path=None): cands = [ font_path, r"C:\Windows\Fonts\malgun.ttf", r"C:\Windows\Fonts\malgunbd.ttf", "/System/Library/Fonts/AppleSDGothicNeo.ttc", "/usr/share/fonts/truetype/nanum/NanumGothic.ttf", "/usr/share/fonts/truetype/noto/NotoSansKR-Regular.ttf", "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", ] for p in cands: if p and os.path.exists(p): try: return ImageFont.truetype(p, size) except Exception: pass return ImageFont.load_default() pil = Image.fromarray(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil) font = _pick_kr_font(size=20, font_path=debug_font) for it in items: x1, y1 = int(it["xyxy"][0]), int(it["xyxy"][1]) label = f"{it.get('text','')} ({it.get('conf',0.0):.2f})" bbox = draw.textbbox((0, 0), label, font=font) tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1] draw.rectangle([x1, y1 - th - 6, x1 + tw + 8, y1 + 2], fill=(0, 0, 0)) draw.text((x1 + 4, y1 - th - 4), label, font=font, fill=(255, 255, 255)) vis = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR) ts = _time.strftime("%Y%m%d_%H%M%S"); ms = int((_time.time() % 1) * 1000) out_put_text = f"{(final_text or '미확인 상태')} ({ts}_{ms:03d}).png" out_path = os.path.join(out_dir, out_put_text) cv2.imwrite(out_path, vis) except Exception as _e: print(f"[seeing.summarize_scene] pic save failed: {_e})") return final_text or "켜진 표시 없음" except Exception as e: print(f"[seeing.summarize_scene] error: {e}") return "현재 상태를 파악하는데 실패했습니다."