Upload 4 files
Browse files- requirements.txt +0 -0
- seeing.py +671 -0
- total_mode.py +1885 -0
- tts_reader.py +197 -0
requirements.txt
ADDED
|
Binary file (6.29 kB). View file
|
|
|
seeing.py
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# seeing.py
|
| 2 |
+
# INFO 모드에서 현재 프레임을 분석해 "요약 문장"을 만들어주는 모듈.
|
| 3 |
+
# test (1).py의 최신 로직을 모두 포함하여 재구성되었습니다.
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
import math
|
| 8 |
+
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
+
from typing import List, Tuple, Dict, Any, Optional
|
| 11 |
+
|
| 12 |
+
# ==============================
|
| 13 |
+
# 모듈 레벨 설정 및 상수
|
| 14 |
+
# ==============================
|
| 15 |
+
SIDE_LEFT = ["통살균", "원격제어", "예약", "내마음"]
|
| 16 |
+
SIDE_RIGHT = ["터보샷", "구김방지", "알림음", "빨래추가"]
|
| 17 |
+
SIDE_EUCLID_MAX_REL = 0.08
|
| 18 |
+
|
| 19 |
+
CATEGORY_OPTIONS = {
|
| 20 |
+
"세탁": ["불림", "애벌세탁", "강력", "표준", "적은때"],
|
| 21 |
+
"헹굼": ["5회", "4", "3", "2", "1"],
|
| 22 |
+
"탈수": ["건조맞춤", "강", "중", "약", "섬세"],
|
| 23 |
+
"물온도": ["95", "60", "40", "30", "냉수"],
|
| 24 |
+
}
|
| 25 |
+
READOUT_ORDER = ("세탁", "헹굼", "탈수", "물온도")
|
| 26 |
+
|
| 27 |
+
LABEL_SYNONYMS = {
|
| 28 |
+
r"\s+": "",
|
| 29 |
+
r"[**()\[\]]": "",
|
| 30 |
+
r"^이?터보\s*샷?$": "터보샷",
|
| 31 |
+
r"\*?터보\s*샷": "터보샷",
|
| 32 |
+
r"\*?알림\s*음(?:\(3초\))?": "알림음",
|
| 33 |
+
r"Wi[\-\s]?Fi": "WiFi",
|
| 34 |
+
r"일회": "1회", r"이회": "2회", r"삼회": "3회", r"사회": "4회", r"오회": "5회",
|
| 35 |
+
r"95\s*℃|95도": "95", r"60\s*℃|60도": "60",
|
| 36 |
+
r"40\s*℃|40도": "40", r"30\s*℃|30도": "30",
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
SIDE_SET = set(SIDE_LEFT + SIDE_RIGHT)
|
| 40 |
+
CAT2SET = {k:set(v) for k,v in CATEGORY_OPTIONS.items()}
|
| 41 |
+
ALL_ALLOWED = SIDE_SET.union(*CAT2SET.values())
|
| 42 |
+
|
| 43 |
+
# --- 중앙 밴드 설정 ---
|
| 44 |
+
CENTER_BAND_PAD_REL = 0.06
|
| 45 |
+
CENTER_BAND_FALLBACK = (0.34, 0.66)
|
| 46 |
+
CENTER_RIGHT_MIN_PX = 6
|
| 47 |
+
CENTER_RIGHT_MIN_FRAC = 0.18
|
| 48 |
+
|
| 49 |
+
# --- 사이드 매칭 설정 ---
|
| 50 |
+
SIDE_COLW_REL = 0.08
|
| 51 |
+
SIDE_DMAX_REL = 0.25
|
| 52 |
+
SIDE_Y_GAP_MIN = 2
|
| 53 |
+
SIDE_Y_TOL_REL = 0.02
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# === (ADD) 프레임 안정화기 ==========================================
|
| 57 |
+
class MotionStabilizer:
|
| 58 |
+
def __init__(self, downscale=0.5, ecc=True, homography=False,
|
| 59 |
+
max_iter=50, eps=1e-6):
|
| 60 |
+
import cv2
|
| 61 |
+
self.ds = float(downscale)
|
| 62 |
+
self.warp_mode = (cv2.MOTION_HOMOGRAPHY if homography
|
| 63 |
+
else (cv2.MOTION_EUCLIDEAN if ecc else cv2.MOTION_TRANSLATION))
|
| 64 |
+
self.max_iter = int(max_iter)
|
| 65 |
+
self.eps = float(eps)
|
| 66 |
+
self.prev_gray_ds = None # float32 [0..1]
|
| 67 |
+
self.homography = bool(homography)
|
| 68 |
+
|
| 69 |
+
def reset(self):
|
| 70 |
+
self.prev_gray_ds = None
|
| 71 |
+
|
| 72 |
+
def _ds(self, img):
|
| 73 |
+
if self.ds and self.ds < 1.0:
|
| 74 |
+
h, w = img.shape[:2]
|
| 75 |
+
return cv2.resize(img, (int(w*self.ds), int(h*self.ds)), interpolation=cv2.INTER_AREA)
|
| 76 |
+
return img
|
| 77 |
+
|
| 78 |
+
def _undscale_warp(self, M):
|
| 79 |
+
s = self.ds
|
| 80 |
+
if self.homography:
|
| 81 |
+
S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
|
| 82 |
+
Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
|
| 83 |
+
return Si @ M @ S
|
| 84 |
+
else:
|
| 85 |
+
A = np.eye(3, dtype=np.float32)
|
| 86 |
+
A[:2,:] = M
|
| 87 |
+
S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
|
| 88 |
+
Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
|
| 89 |
+
A = Si @ A @ S
|
| 90 |
+
return A[:2,:]
|
| 91 |
+
|
| 92 |
+
def apply(self, frame_bgr):
|
| 93 |
+
import cv2
|
| 94 |
+
g = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
|
| 95 |
+
g_ds = self._ds(g)
|
| 96 |
+
g_ds_f = g_ds.astype(np.float32) / 255.0
|
| 97 |
+
|
| 98 |
+
if self.prev_gray_ds is None:
|
| 99 |
+
self.prev_gray_ds = g_ds_f
|
| 100 |
+
return frame_bgr
|
| 101 |
+
|
| 102 |
+
warp = (np.eye(3, dtype=np.float32) if self.homography
|
| 103 |
+
else np.eye(2, 3, dtype=np.float32))
|
| 104 |
+
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
|
| 105 |
+
self.max_iter, self.eps)
|
| 106 |
+
try:
|
| 107 |
+
_, warp = cv2.findTransformECC(
|
| 108 |
+
templateImage=self.prev_gray_ds,
|
| 109 |
+
inputImage=g_ds_f,
|
| 110 |
+
warpMatrix=warp,
|
| 111 |
+
motionType=(cv2.MOTION_HOMOGRAPHY if self.homography else self.warp_mode),
|
| 112 |
+
criteria=criteria,
|
| 113 |
+
inputMask=None, gaussFiltSize=1
|
| 114 |
+
)
|
| 115 |
+
if self.homography:
|
| 116 |
+
Wf = self._undscale_warp(warp)
|
| 117 |
+
out = cv2.warpPerspective(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
|
| 118 |
+
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
|
| 119 |
+
borderMode=cv2.BORDER_REPLICATE)
|
| 120 |
+
self.prev_gray_ds = cv2.warpPerspective(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
|
| 121 |
+
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
|
| 122 |
+
borderMode=cv2.BORDER_REPLICATE)
|
| 123 |
+
else:
|
| 124 |
+
Wf = self._undscale_warp(warp)
|
| 125 |
+
out = cv2.warpAffine(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
|
| 126 |
+
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
|
| 127 |
+
borderMode=cv2.BORDER_REPLICATE)
|
| 128 |
+
self.prev_gray_ds = cv2.warpAffine(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
|
| 129 |
+
flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
|
| 130 |
+
borderMode=cv2.BORDER_REPLICATE)
|
| 131 |
+
return out
|
| 132 |
+
except Exception:
|
| 133 |
+
self.prev_gray_ds = g_ds_f
|
| 134 |
+
return frame_bgr
|
| 135 |
+
# ===================================================================
|
| 136 |
+
|
| 137 |
+
# (ADD) 기본 토글/싱글톤
|
| 138 |
+
STABILIZE_DEFAULT = True # 기본 ON. 필요시 False로 끄기
|
| 139 |
+
_global_stabilizer = None
|
| 140 |
+
|
| 141 |
+
# ==============================
|
| 142 |
+
# 내부 헬퍼 함수들
|
| 143 |
+
# ==============================
|
| 144 |
+
|
| 145 |
+
def _canon_text(raw: str) -> str:
|
| 146 |
+
if not raw: return ""
|
| 147 |
+
s = str(raw)
|
| 148 |
+
for pat, rep in LABEL_SYNONYMS.items():
|
| 149 |
+
s = re.sub(pat, rep, s, flags=re.IGNORECASE)
|
| 150 |
+
m = re.fullmatch(r"([1-4])회", s)
|
| 151 |
+
if m:
|
| 152 |
+
s = m.group(1)
|
| 153 |
+
elif re.fullmatch(r"5", s):
|
| 154 |
+
s = "5회"
|
| 155 |
+
digits = re.sub(r"[^0-9]", "", s)
|
| 156 |
+
if digits and any(digits in v for v in CATEGORY_OPTIONS.values()):
|
| 157 |
+
s = digits if s != "5회" else "5회"
|
| 158 |
+
s = re.sub(r"[^0-9A-Za-z가-힣]", "", s)
|
| 159 |
+
return s
|
| 160 |
+
|
| 161 |
+
def _is_side_button(tok: str) -> bool:
|
| 162 |
+
return tok in SIDE_SET
|
| 163 |
+
|
| 164 |
+
def _which_category(tok: str):
|
| 165 |
+
for cat, opts in CAT2SET.items():
|
| 166 |
+
if tok in opts: return cat
|
| 167 |
+
return None
|
| 168 |
+
|
| 169 |
+
def _order_pts(pts):
|
| 170 |
+
rect = np.zeros((4, 2), dtype=np.float32)
|
| 171 |
+
s = pts.sum(axis=1); d = np.diff(pts, axis=1)
|
| 172 |
+
rect[0] = pts[np.argmin(s)]
|
| 173 |
+
rect[2] = pts[np.argmax(s)]
|
| 174 |
+
rect[1] = pts[np.argmin(d)]
|
| 175 |
+
rect[3] = pts[np.argmax(d)]
|
| 176 |
+
return rect
|
| 177 |
+
|
| 178 |
+
def _warp_points(H, pts_xy):
|
| 179 |
+
pts = np.asarray(pts_xy, dtype=np.float32).reshape(-1,1,2)
|
| 180 |
+
return cv2.perspectiveTransform(pts, H).reshape(-1,2)
|
| 181 |
+
|
| 182 |
+
def _map_rect_from_rectified(Hinv, x, y, w, h, offset=(0,0)):
|
| 183 |
+
corners = np.float32([[x,y], [x+w,y], [x+w,y+h], [x,y+h]])
|
| 184 |
+
mapped = _warp_points(Hinv, corners)
|
| 185 |
+
x1,y1 = mapped.min(axis=0); x2,y2 = mapped.max(axis=0)
|
| 186 |
+
ox, oy = offset
|
| 187 |
+
return int(x1+ox), int(y1+oy), int(x2-x1), int(y2-y1)
|
| 188 |
+
|
| 189 |
+
def _easyocr_to_items(detections):
|
| 190 |
+
items = []
|
| 191 |
+
for bbox, text, conf in detections:
|
| 192 |
+
quad = np.array(bbox, dtype=float)
|
| 193 |
+
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
|
| 194 |
+
cx, cy = float(sum(xs)/4), float(sum(ys)/4)
|
| 195 |
+
xyxy = np.array([min(xs), min(ys), max(xs), max(ys)], dtype=float)
|
| 196 |
+
items.append({"text": text.strip(), "conf": float(conf),
|
| 197 |
+
"box": quad, "center": (cx, cy), "xyxy": xyxy})
|
| 198 |
+
return items
|
| 199 |
+
|
| 200 |
+
def _detect_panel_roi(img_bgr, v_pctl=35, bh_kernel=31, min_area_frac=0.08, ar_range=(1.1, 4.0), pad_frac=0.01):
|
| 201 |
+
h, w = img_bgr.shape[:2]
|
| 202 |
+
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
| 203 |
+
V = hsv[:,:,2]
|
| 204 |
+
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (bh_kernel, bh_kernel))
|
| 205 |
+
bh = cv2.morphologyEx(V, cv2.MORPH_BLACKHAT, k)
|
| 206 |
+
_, m_bh = cv2.threshold(bh, max(20, bh.mean() + 1.0*bh.std()), 255, cv2.THRESH_BINARY)
|
| 207 |
+
thr_dark = int(np.percentile(V, v_pctl))
|
| 208 |
+
m_dark = cv2.inRange(V, 0, thr_dark)
|
| 209 |
+
mask = cv2.bitwise_or(m_bh, m_dark)
|
| 210 |
+
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)), 2)
|
| 211 |
+
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)), 1)
|
| 212 |
+
cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 213 |
+
H, W = h, w
|
| 214 |
+
best = None
|
| 215 |
+
for c in cnts:
|
| 216 |
+
area = cv2.contourArea(c)
|
| 217 |
+
if area < min_area_frac * (H*W): continue
|
| 218 |
+
hull = cv2.convexHull(c)
|
| 219 |
+
x,y,wid,hei = cv2.boundingRect(hull)
|
| 220 |
+
ar = max(wid,hei) / max(1, min(wid,hei))
|
| 221 |
+
if not (ar_range[0] <= ar <= ar_range[1]): continue
|
| 222 |
+
if (best is None) or (area > best[0]):
|
| 223 |
+
best = (area, (x,y,wid,hei))
|
| 224 |
+
if best is None:
|
| 225 |
+
return (0,0,W,H), mask
|
| 226 |
+
x,y,wid,hei = best[1]
|
| 227 |
+
pad = int(pad_frac * max(H, W))
|
| 228 |
+
x0 = max(0, x - pad); y0 = max(0, y - pad)
|
| 229 |
+
x1 = min(W, x + wid + pad); y1 = min(H, y + hei + pad)
|
| 230 |
+
return (x0,y0,x1,y1), mask
|
| 231 |
+
|
| 232 |
+
def _deskew_panel_by_mask(panel_bgr, panel_mask_roi, min_quad_area_frac=0.05):
|
| 233 |
+
h, w = panel_bgr.shape[:2]
|
| 234 |
+
cnts, _ = cv2.findContours(panel_mask_roi, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 235 |
+
if not cnts: return panel_bgr, None
|
| 236 |
+
c = max(cnts, key=cv2.contourArea)
|
| 237 |
+
if cv2.contourArea(c) < (min_quad_area_frac * h * w):
|
| 238 |
+
return panel_bgr, None
|
| 239 |
+
peri = cv2.arcLength(c, True)
|
| 240 |
+
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
|
| 241 |
+
src = approx.reshape(4,2).astype(np.float32) if len(approx) == 4 else cv2.boxPoints(cv2.minAreaRect(c)).astype(np.float32)
|
| 242 |
+
src = _order_pts(src)
|
| 243 |
+
(tl, tr, br, bl) = src
|
| 244 |
+
Wt = int(max(np.linalg.norm(br-bl), np.linalg.norm(tr-tl))); Wt = max(100, Wt)
|
| 245 |
+
Ht = int(max(np.linalg.norm(tr-br), np.linalg.norm(tl-bl))); Ht = max(100, Ht)
|
| 246 |
+
dst = np.array([[0,0],[Wt-1,0],[Wt-1,Ht-1],[0,Ht-1]], dtype=np.float32)
|
| 247 |
+
H = cv2.getPerspectiveTransform(src, dst)
|
| 248 |
+
warped = cv2.warpPerspective(panel_bgr, H, (Wt, Ht), flags=cv2.INTER_CUBIC)
|
| 249 |
+
return warped, H
|
| 250 |
+
|
| 251 |
+
def _build_glare_mask(panel_bgr, v_thr=235, s_thr=45, lap_var_thr=25.0, min_area_rel=1e-4, max_area_rel=2e-2, ar_min=3.0, close_ks=5, open_ks=3, dil_ks=3):
|
| 252 |
+
h, w = panel_bgr.shape[:2]
|
| 253 |
+
hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
|
| 254 |
+
H, S, V = cv2.split(hsv)
|
| 255 |
+
m_hi = (V >= v_thr) & (S <= s_thr)
|
| 256 |
+
m = (m_hi.astype(np.uint8) * 255)
|
| 257 |
+
m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(close_ks,close_ks)), 1)
|
| 258 |
+
m = cv2.morphologyEx(m, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(open_ks,open_ks)), 1)
|
| 259 |
+
area_img = float(h*w)
|
| 260 |
+
out = np.zeros_like(m, dtype=np.uint8)
|
| 261 |
+
num, lab, stats, _ = cv2.connectedComponentsWithStats(m, 8)
|
| 262 |
+
gray = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2GRAY)
|
| 263 |
+
for i in range(1, num):
|
| 264 |
+
x,y,wid,hei,area = stats[i]
|
| 265 |
+
rel = area/area_img
|
| 266 |
+
if rel < min_area_rel or rel > max_area_rel: continue
|
| 267 |
+
ar = max(wid,hei)/max(1, min(wid,hei))
|
| 268 |
+
if ar < ar_min: continue
|
| 269 |
+
crop = gray[y:y+hei, x:x+wid]
|
| 270 |
+
if cv2.Laplacian(crop, cv2.CV_64F).var() > lap_var_thr: continue
|
| 271 |
+
out[lab==i] = 255
|
| 272 |
+
out = cv2.dilate(out, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(dil_ks,dil_ks)), 1)
|
| 273 |
+
ratio = out.sum() / 255.0 / area_img
|
| 274 |
+
return out, float(ratio)
|
| 275 |
+
|
| 276 |
+
def _apply_deglare_toneclip(panel_bgr, glare_mask, ring_px=3, add_v=18):
|
| 277 |
+
hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
|
| 278 |
+
H, S, V = cv2.split(hsv)
|
| 279 |
+
dil = cv2.dilate(glare_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ring_px*2+1, ring_px*2+1)), 1)
|
| 280 |
+
ring = cv2.bitwise_and(dil, cv2.bitwise_not(glare_mask))
|
| 281 |
+
if cv2.countNonZero(ring) == 0:
|
| 282 |
+
return panel_bgr
|
| 283 |
+
ring_med = int(np.median(V[ring.astype(bool)]))
|
| 284 |
+
cap = np.clip(ring_med + add_v, 0, 255).astype(np.uint8)
|
| 285 |
+
V2 = V.copy()
|
| 286 |
+
V2[glare_mask.astype(bool)] = np.minimum(V2[glare_mask.astype(bool)], cap)
|
| 287 |
+
return cv2.cvtColor(cv2.merge([H,S,V2]), cv2.COLOR_HSV2BGR)
|
| 288 |
+
|
| 289 |
+
def _ocr_with_deglare_when_needed(panel_rect_bgr, reader, area_gate=0.002):
|
| 290 |
+
det_orig = reader.readtext(panel_rect_bgr)
|
| 291 |
+
m_gl, ratio = _build_glare_mask(panel_rect_bgr)
|
| 292 |
+
if ratio < area_gate:
|
| 293 |
+
return det_orig
|
| 294 |
+
degl = _apply_deglare_toneclip(panel_rect_bgr, m_gl)
|
| 295 |
+
det_degl = reader.readtext(degl)
|
| 296 |
+
def _score(dets):
|
| 297 |
+
return sum(c for _,_,c in dets) + 0.3*sum(1 for _,t,_ in dets if len(re.sub(r"[^가-힣0-9]","",t))>0)
|
| 298 |
+
return det_degl if _score(det_degl) >= 0.85 * _score(det_orig) else det_orig
|
| 299 |
+
|
| 300 |
+
def _build_text_mask_from_easyocr(detections, shape_hw, dilate_px=2):
|
| 301 |
+
H, W = shape_hw[:2]
|
| 302 |
+
mask = np.zeros((H, W), np.uint8)
|
| 303 |
+
if not detections: return mask
|
| 304 |
+
polys = [np.array(bbox, dtype=np.int32).reshape(-1, 1, 2) for bbox, _, _ in detections]
|
| 305 |
+
if polys:
|
| 306 |
+
cv2.fillPoly(mask, polys, 255)
|
| 307 |
+
if dilate_px > 0:
|
| 308 |
+
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px*2+1, dilate_px*2+1))
|
| 309 |
+
mask = cv2.dilate(mask, k, 1)
|
| 310 |
+
return mask
|
| 311 |
+
|
| 312 |
+
def _auto_led_params_simple(shape, k_frac=0.015, area_lo_frac=1e-5, area_hi_frac=1.5e-3):
|
| 313 |
+
h, w = shape[:2]
|
| 314 |
+
long_side = max(h, w)
|
| 315 |
+
k_auto = int(round(long_side * k_frac))
|
| 316 |
+
if k_auto % 2 == 0: k_auto += 1
|
| 317 |
+
k_auto = max(5, min(k_auto, 31))
|
| 318 |
+
min_area = max(6, int(h * w * area_lo_frac))
|
| 319 |
+
max_area = max(min_area+1, int(h * w * area_hi_frac))
|
| 320 |
+
return k_auto, min_area, max_area
|
| 321 |
+
|
| 322 |
+
def _detect_leds_glare_core(img_bgr, k=None, sigma=2.3, ring_px=7, ring_v_thr=200, core_s_thr_bg=78, dv_thr_bg=45, strict_aspect=(2.0, 4.2), strict_extent=0.64, strict_solidity=0.80, include_white=False, exclude_mask=None, dv_thr_any=35, min_short_px=10, min_area_abs=40):
|
| 323 |
+
def _masked_mean_median(img_gray, mask_bool):
|
| 324 |
+
vals = img_gray[mask_bool]
|
| 325 |
+
return (float(vals.mean()), float(np.median(vals))) if vals.size > 0 else (0.0, 0.0)
|
| 326 |
+
k_auto, min_area, max_area = _auto_led_params_simple(img_bgr.shape)
|
| 327 |
+
if not k or k <= 0: k = k_auto
|
| 328 |
+
g = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
| 329 |
+
g_eq = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(g)
|
| 330 |
+
Hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
| 331 |
+
H,S,V = cv2.split(Hsv)
|
| 332 |
+
se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
|
| 333 |
+
tophat = cv2.morphologyEx(g_eq, cv2.MORPH_TOPHAT, se)
|
| 334 |
+
m, s = float(tophat.mean()), float(tophat.std())
|
| 335 |
+
_, seed_th = cv2.threshold(tophat, np.clip(m + sigma*s, 40, 240), 255, cv2.THRESH_BINARY)
|
| 336 |
+
_, seed_v = cv2.threshold(V, 210, 255, cv2.THRESH_BINARY)
|
| 337 |
+
seed = cv2.bitwise_or(seed_th, seed_v)
|
| 338 |
+
m_color = (cv2.inRange(H, 35, 85) | cv2.inRange(H, 90, 140)) & (cv2.inRange(S, 50, 255) & cv2.inRange(V, 160, 255))
|
| 339 |
+
if include_white: m_color |= (cv2.inRange(S, 0, 60) & cv2.inRange(V, 200, 255))
|
| 340 |
+
reinforced = cv2.bitwise_and(seed, cv2.dilate(m_color, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1))
|
| 341 |
+
ratio = (cv2.countNonZero(reinforced) / float(max(1, cv2.countNonZero(seed)))) if cv2.countNonZero(seed)>0 else 0.0
|
| 342 |
+
core = reinforced if ratio >= 0.3 else seed
|
| 343 |
+
if exclude_mask is not None:
|
| 344 |
+
core = cv2.bitwise_and(core, cv2.bitwise_not(exclude_mask))
|
| 345 |
+
core = cv2.medianBlur(core, 3)
|
| 346 |
+
core = cv2.morphologyEx(core, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1)
|
| 347 |
+
core = cv2.morphologyEx(core, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)), 1)
|
| 348 |
+
num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
|
| 349 |
+
for i in range(1, num):
|
| 350 |
+
if (min_short_px and stats[i,3] < min_short_px) or not (max(min_area, min_area_abs) <= stats[i,4] <= max_area):
|
| 351 |
+
core[lab == i] = 0
|
| 352 |
+
num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
|
| 353 |
+
leds, ring_kernel = [], cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ring_px*2+1, ring_px*2+1))
|
| 354 |
+
for i in range(1, num):
|
| 355 |
+
x,y,wid,hei,area = stats[i]
|
| 356 |
+
aspect = max(wid,hei) / max(1, min(wid,hei))
|
| 357 |
+
if aspect > 6.5: continue
|
| 358 |
+
comp_mask = (lab == i)
|
| 359 |
+
dil = cv2.dilate(comp_mask.astype(np.uint8), ring_kernel, 1).astype(bool)
|
| 360 |
+
ring_mask = np.logical_and(dil, np.logical_not(comp_mask))
|
| 361 |
+
core_v_mean, _ = _masked_mean_median(V, comp_mask)
|
| 362 |
+
_, ring_med = _masked_mean_median(V, ring_mask)
|
| 363 |
+
if (core_v_mean - ring_med) < dv_thr_any: continue
|
| 364 |
+
if ring_med >= ring_v_thr:
|
| 365 |
+
cnts, _ = cv2.findContours((comp_mask.astype(np.uint8) * 255), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 366 |
+
if cnts:
|
| 367 |
+
c = max(cnts, key=cv2.contourArea)
|
| 368 |
+
solidity = cv2.contourArea(c) / max(1.0, cv2.contourArea(cv2.convexHull(c)))
|
| 369 |
+
extent = area / float(max(1, wid*hei))
|
| 370 |
+
if not (strict_aspect[0] <= aspect <= strict_aspect[1]) or extent < strict_extent or solidity < strict_solidity:
|
| 371 |
+
continue
|
| 372 |
+
leds.append((int(x), int(y), int(wid), int(hei), (float(cents[i][0]), float(cents[i][1])), float(core_v_mean)))
|
| 373 |
+
return leds
|
| 374 |
+
|
| 375 |
+
def _norm_ko(s: str) -> str:
|
| 376 |
+
return re.sub(r"\s+", "", s or "")
|
| 377 |
+
|
| 378 |
+
def _find_category_anchors(items):
|
| 379 |
+
anchors = {}
|
| 380 |
+
for it in items:
|
| 381 |
+
raw = it["text"]; norm = _norm_ko(raw)
|
| 382 |
+
for cat in CATEGORY_OPTIONS.keys():
|
| 383 |
+
if cat in norm:
|
| 384 |
+
x1,y1,x2,y2 = it["xyxy"]; h = (y2 - y1); area = (x2 - x1) * h
|
| 385 |
+
prev = anchors.get(cat)
|
| 386 |
+
if not prev or (h > prev.get("_h", -1)) or (h == prev.get("_h", -1) and area > prev.get("_a", -1)):
|
| 387 |
+
anchors[cat] = {"center": it["center"], "xyxy": it["xyxy"], "_h": h, "_a": area}
|
| 388 |
+
for cat in anchors:
|
| 389 |
+
anchors[cat].pop("_h", None); anchors[cat].pop("_a", None)
|
| 390 |
+
return anchors
|
| 391 |
+
|
| 392 |
+
def _compute_center_band(items, img_shape):
|
| 393 |
+
H, W = img_shape[:2]
|
| 394 |
+
xs = [x for it in items if any(cat in _norm_ko(it["text"]) for cat in CATEGORY_OPTIONS.keys()) for x in (it["xyxy"][0], it["xyxy"][2])]
|
| 395 |
+
if len(xs) >= 2:
|
| 396 |
+
left = max(0.0, min(xs) - CENTER_BAND_PAD_REL * W)
|
| 397 |
+
right = min(float(W), max(xs) + CENTER_BAND_PAD_REL * W)
|
| 398 |
+
else:
|
| 399 |
+
left, right = CENTER_BAND_FALLBACK[0] * W, CENTER_BAND_FALLBACK[1] * W
|
| 400 |
+
return float(left), float(right)
|
| 401 |
+
|
| 402 |
+
def _match_leds_to_texts(items, leds, img_shape, dmax_px=None, rel_gate=1.1, x_orient_eps=4, y_orient_eps=0):
|
| 403 |
+
Hh, Ww = img_shape[:2]
|
| 404 |
+
dmax_px = dmax_px or max(50, int(0.065 * max(Hh, Ww)))
|
| 405 |
+
band_left, band_right = _compute_center_band(items, img_shape)
|
| 406 |
+
side_colw, side_dmax, side_y_tol, side_eucl_max = SIDE_COLW_REL*max(Hh,Ww), SIDE_DMAX_REL*max(Hh,Ww), SIDE_Y_TOL_REL*Hh, SIDE_EUCLID_MAX_REL*max(Hh,Ww)
|
| 407 |
+
choices = []
|
| 408 |
+
for li, (_x,_y,_w,_h,(cx, cy),bright) in enumerate(leds):
|
| 409 |
+
best_cand = None
|
| 410 |
+
for ti, it in enumerate(items):
|
| 411 |
+
tx, ty, tw, th, raw, x1, *_ = it["center"][0], it["center"][1], it["xyxy"][2]-it["xyxy"][0], it["xyxy"][3]-it["xyxy"][1], it["text"], it["xyxy"][0]
|
| 412 |
+
tok = _canon_text(raw)
|
| 413 |
+
if not tok or tok not in ALL_ALLOWED: continue
|
| 414 |
+
dist = 0
|
| 415 |
+
if _is_side_button(tok):
|
| 416 |
+
# LED는 중앙 밴드 '밖'에 있어야 함
|
| 417 |
+
if (cx < band_left or cx > band_right) and ty >= cy - side_y_tol and abs(tx - cx) <= max(side_colw, 0.5*tw):
|
| 418 |
+
dist = max(0.0, ty - cy) + 0.3 * abs(tx - cx)
|
| 419 |
+
if dist > side_dmax or math.hypot(tx - cx, ty - cy) > side_eucl_max:
|
| 420 |
+
continue
|
| 421 |
+
|
| 422 |
+
else:
|
| 423 |
+
if band_left <= cx <= band_right and band_left <= tx <= band_right and abs(ty-cy) <= max(y_orient_eps, 0.6*th) and x1 >= cx + max(CENTER_RIGHT_MIN_PX, CENTER_RIGHT_MIN_FRAC*tw):
|
| 424 |
+
dist = math.hypot(tx-cx, ty-cy)
|
| 425 |
+
if dist > dmax_px: continue
|
| 426 |
+
if dist > 0 and (not best_cand or dist < best_cand[0]):
|
| 427 |
+
best_cand = (dist, ti, tok)
|
| 428 |
+
if best_cand:
|
| 429 |
+
dist, ti, tok = best_cand
|
| 430 |
+
choices.append((dist, li, ti, tok, float(bright), tuple(items[ti]["center"]), (cx,cy)))
|
| 431 |
+
choices.sort(key=lambda x: x[0])
|
| 432 |
+
used_led, used_txt, pairs_led = set(), set(), []
|
| 433 |
+
for d, li, ti, tok, bri, ptxt, pled in choices:
|
| 434 |
+
if li not in used_led and ti not in used_txt:
|
| 435 |
+
used_led.add(li); used_txt.add(ti)
|
| 436 |
+
pairs_led.append((ptxt, pled, tok, li, bri))
|
| 437 |
+
pairs_led.sort(key=lambda p: (int(p[1][1] // 30), p[1][0]))
|
| 438 |
+
return [p[2] for p in pairs_led], pairs_led
|
| 439 |
+
|
| 440 |
+
def _choose_and_enforce_categories(pairs_led, items, leds, img_shape, cw_rel=0.06, dmax_rel=0.20, fill_default=None):
|
| 441 |
+
H, W = img_shape[:2]; L = max(H, W)
|
| 442 |
+
colw, dmax = cw_rel * L, dmax_rel * L
|
| 443 |
+
picked = {}
|
| 444 |
+
bucket = {cat: [] for cat in CATEGORY_OPTIONS.keys()}
|
| 445 |
+
for _, _, tok, li, bri in pairs_led:
|
| 446 |
+
cat = _which_category(tok)
|
| 447 |
+
if cat: bucket[cat].append((tok, bri, li))
|
| 448 |
+
for cat, arr in bucket.items():
|
| 449 |
+
if arr: picked[cat] = max(arr, key=lambda x: x[1])[0]
|
| 450 |
+
anchors = _find_category_anchors(items)
|
| 451 |
+
for cat in CATEGORY_OPTIONS:
|
| 452 |
+
if cat in picked: continue
|
| 453 |
+
a = anchors.get(cat)
|
| 454 |
+
if a:
|
| 455 |
+
ax, ay = a["center"]
|
| 456 |
+
cand_leds = sorted([ (bri, idx) for idx, (*_, (cx,cy), bri) in enumerate(leds) if abs(cx-ax)<=colw and cy>=ay-2 ], reverse=True)
|
| 457 |
+
if cand_leds:
|
| 458 |
+
led_center = leds[cand_leds[0][1]][4]
|
| 459 |
+
best_tok, best_d = None, dmax
|
| 460 |
+
for it in items:
|
| 461 |
+
tok = _canon_text(it["text"])
|
| 462 |
+
if tok in CAT2SET[cat]:
|
| 463 |
+
tx, ty = it["center"]
|
| 464 |
+
if abs(tx - ax) <= colw and ty >= ay - 2:
|
| 465 |
+
d = math.hypot(tx - led_center[0], ty - led_center[1])
|
| 466 |
+
if d < best_d: best_d, best_tok = d, tok
|
| 467 |
+
picked[cat] = best_tok or (fill_default.get(cat) if fill_default else "미확인")
|
| 468 |
+
return picked
|
| 469 |
+
|
| 470 |
+
# --- 카테고리 진단 상태 (ON / TXT_ONLY / NO_TXT) 빌더 ---
|
| 471 |
+
STATE_KR = {"ON":"확인됨", "TXT_ONLY":"텍스트만", "NO_TXT":"텍스트없음"}
|
| 472 |
+
|
| 473 |
+
def _build_category_status(items, pairs_led):
|
| 474 |
+
"""
|
| 475 |
+
각 카테고리에 대해 OCR 인식/LED 매칭 상태를 진단.
|
| 476 |
+
반환: {cat: {"picked": 토큰 또는 "미확인",
|
| 477 |
+
"state": "ON" | "TXT_ONLY" | "NO_TXT"}}
|
| 478 |
+
"""
|
| 479 |
+
# 1) OCR로 읽힌 후보 수집
|
| 480 |
+
ocr_tokens_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
|
| 481 |
+
for it in items:
|
| 482 |
+
tok = _canon_text(it.get("text",""))
|
| 483 |
+
cat = _which_category(tok)
|
| 484 |
+
if cat:
|
| 485 |
+
ocr_tokens_by_cat[cat].add(tok)
|
| 486 |
+
|
| 487 |
+
# 2) LED-텍스트 매칭으로 확정된 것들 수집
|
| 488 |
+
led_matched_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
|
| 489 |
+
for _,_,tok,_,_ in pairs_led:
|
| 490 |
+
cat = _which_category(tok)
|
| 491 |
+
if cat:
|
| 492 |
+
led_matched_by_cat[cat].add(tok)
|
| 493 |
+
|
| 494 |
+
# 3) 상태 구성
|
| 495 |
+
status = {}
|
| 496 |
+
for cat in CATEGORY_OPTIONS.keys():
|
| 497 |
+
if led_matched_by_cat[cat]:
|
| 498 |
+
picked = sorted(list(led_matched_by_cat[cat]))[0]
|
| 499 |
+
state = "ON"
|
| 500 |
+
elif ocr_tokens_by_cat[cat]:
|
| 501 |
+
picked = "미확인"
|
| 502 |
+
state = "TXT_ONLY"
|
| 503 |
+
else:
|
| 504 |
+
picked = "미확인"
|
| 505 |
+
state = "NO_TXT"
|
| 506 |
+
status[cat] = {"picked": picked, "state": state}
|
| 507 |
+
return status
|
| 508 |
+
|
| 509 |
+
def _compose_readout(cat_map, side_on, order=READOUT_ORDER,
|
| 510 |
+
diag_status: Dict[str, Dict[str,str]] = None,
|
| 511 |
+
state_labels: Dict[str,str] = STATE_KR):
|
| 512 |
+
"""
|
| 513 |
+
diag_status가 있으면 각 카테고리 뒤에 (상태)를 붙임.
|
| 514 |
+
상태: ON | TXT_ONLY | NO_TXT (한국어 꼬리표는 STATE_KR로 매핑)
|
| 515 |
+
"""
|
| 516 |
+
parts = []
|
| 517 |
+
for k in order:
|
| 518 |
+
val = cat_map.get(k, "미확인")
|
| 519 |
+
if diag_status and k in diag_status:
|
| 520 |
+
st = diag_status[k]["state"]
|
| 521 |
+
tail = state_labels.get(st, st) if state_labels else st
|
| 522 |
+
parts.append(f"{k} {val}({tail})")
|
| 523 |
+
else:
|
| 524 |
+
parts.append(f"{k} {val}")
|
| 525 |
+
cat_sentence = ", ".join(parts)
|
| 526 |
+
side_sentence = " / ".join(side_on) if side_on else ""
|
| 527 |
+
final_parts = [p for p in (cat_sentence, side_sentence) if p]
|
| 528 |
+
return ", ".join(final_parts) if final_parts else "켜진 표시 없음"
|
| 529 |
+
|
| 530 |
+
# ==============================
|
| 531 |
+
# 최종 요약 진입점
|
| 532 |
+
# ==============================
|
| 533 |
+
def summarize_scene(frame_bgr: np.ndarray, reader,
|
| 534 |
+
do_pic=True, debug_font=None,
|
| 535 |
+
debug_dir: Optional[str]=None,
|
| 536 |
+
diagnostic: bool=False,
|
| 537 |
+
# === (ADD) 안정화 옵션 ===
|
| 538 |
+
stabilize: Optional[bool]=None,
|
| 539 |
+
stabilizer: Optional[MotionStabilizer]=None) -> str:
|
| 540 |
+
"""
|
| 541 |
+
현재 프레임(frame_bgr)을 분석하여, '조작 패널 상태'에 대한 한국어 요약 문장을 반환.
|
| 542 |
+
"""
|
| 543 |
+
try:
|
| 544 |
+
# === (ADD) 안정화 프레임 선택 ===
|
| 545 |
+
use_stab = STABILIZE_DEFAULT if (stabilize is None) else bool(stabilize)
|
| 546 |
+
frame_in = frame_bgr
|
| 547 |
+
if use_stab:
|
| 548 |
+
global _global_stabilizer
|
| 549 |
+
st = stabilizer or _global_stabilizer
|
| 550 |
+
if st is None:
|
| 551 |
+
st = MotionStabilizer(downscale=0.5, ecc=True, homography=False)
|
| 552 |
+
_global_stabilizer = st
|
| 553 |
+
frame_in = st.apply(frame_bgr)
|
| 554 |
+
|
| 555 |
+
# 1. 패널 ROI 탐지 및 정사영 변환 (frame_in 사용)
|
| 556 |
+
(x0,y0,x1,y1), panel_mask_full = _detect_panel_roi(frame_in)
|
| 557 |
+
panel_bgr = frame_in[y0:y1, x0:x1].copy()
|
| 558 |
+
panel_mask_roi = panel_mask_full[y0:y1, x0:x1].copy()
|
| 559 |
+
panel_rect, H = _deskew_panel_by_mask(panel_bgr, panel_mask_roi)
|
| 560 |
+
Hinv = np.linalg.inv(H) if H is not None else None
|
| 561 |
+
|
| 562 |
+
# 2. OCR (필요 시 디글레어 포함)
|
| 563 |
+
result_panel = _ocr_with_deglare_when_needed(panel_rect, reader)
|
| 564 |
+
items_local = _easyocr_to_items(result_panel)
|
| 565 |
+
|
| 566 |
+
# 3. 텍스트 마스크 생성 및 LED 탐지
|
| 567 |
+
text_mask_local = _build_text_mask_from_easyocr(result_panel, panel_rect.shape[:2])
|
| 568 |
+
leds_local = _detect_leds_glare_core(
|
| 569 |
+
panel_rect, k=15, sigma=2.0, include_white=True,
|
| 570 |
+
exclude_mask=text_mask_local, dv_thr_any=22, min_short_px=10, min_area_abs=40
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
# 4. OCR/LED 결과를 원본 좌표계로 복원 (frame_in 기준 좌표)
|
| 574 |
+
items = []
|
| 575 |
+
if Hinv is not None:
|
| 576 |
+
for it in items_local:
|
| 577 |
+
mapped = _warp_points(Hinv, it["box"]) + np.array([x0, y0])
|
| 578 |
+
xs, ys = mapped[:,0], mapped[:,1]
|
| 579 |
+
items.append({"text": it["text"], "conf": it["conf"], "box": mapped.tolist(),
|
| 580 |
+
"center": (xs.mean(), ys.mean()),
|
| 581 |
+
"xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
|
| 582 |
+
else:
|
| 583 |
+
for it in items_local:
|
| 584 |
+
bx = np.array(it["box"]) + np.array([x0, y0])
|
| 585 |
+
xs, ys = bx[:,0], bx[:,1]
|
| 586 |
+
items.append({"text": it["text"], "conf": it["conf"], "box": bx.tolist(),
|
| 587 |
+
"center": (xs.mean(), ys.mean()),
|
| 588 |
+
"xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
|
| 589 |
+
|
| 590 |
+
leds = []
|
| 591 |
+
if Hinv is not None:
|
| 592 |
+
for (x,y,w,h,c,b) in leds_local:
|
| 593 |
+
gx,gy,gw,gh = _map_rect_from_rectified(Hinv, x,y,w,h, offset=(x0,y0))
|
| 594 |
+
gcx, gcy = (_warp_points(Hinv, [c]) + np.array([x0, y0]))[0]
|
| 595 |
+
leds.append((gx,gy,gw,gh, (gcx, gcy), b))
|
| 596 |
+
else:
|
| 597 |
+
for (x,y,w,h,c,b) in leds_local:
|
| 598 |
+
leds.append((x+x0, y+y0, w,h, (c[0]+x0, c[1]+y0), b))
|
| 599 |
+
|
| 600 |
+
# 5. LED-텍스트 매칭 (frame_in.shape 사용)
|
| 601 |
+
led_tokens, pairs_led = _match_leds_to_texts(items, leds, frame_in.shape)
|
| 602 |
+
|
| 603 |
+
# 6. 카테고리별 최종 선택 및 문장 생성
|
| 604 |
+
cat_map = _choose_and_enforce_categories(pairs_led, items, leds, frame_in.shape)
|
| 605 |
+
|
| 606 |
+
# === 추가: 카테고리 상태(ON/TXT_ONLY/NO_TXT) ===
|
| 607 |
+
diag_status = _build_category_status(items, pairs_led) if diagnostic else None
|
| 608 |
+
|
| 609 |
+
side_on = sorted(list(set(tok for _,_,tok,_,_ in pairs_led if _is_side_button(tok))))
|
| 610 |
+
final_text = _compose_readout(cat_map, side_on, diag_status=diag_status)
|
| 611 |
+
|
| 612 |
+
if do_pic:
|
| 613 |
+
try:
|
| 614 |
+
import time as _time
|
| 615 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 616 |
+
out_dir = debug_dir or os.path.join(os.getcwd(), "debug_summaries")
|
| 617 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 618 |
+
|
| 619 |
+
vis = frame_in.copy() # (CHANGE) 안정화된 프레임로 시각화
|
| 620 |
+
|
| 621 |
+
for it in items:
|
| 622 |
+
poly = np.array(it["box"], dtype=np.int32)
|
| 623 |
+
cv2.polylines(vis, [poly], True, (0, 255, 0), 2, cv2.LINE_AA)
|
| 624 |
+
for (x, y, w, h, (cx, cy), bri) in leds:
|
| 625 |
+
cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 165, 0), 2)
|
| 626 |
+
cv2.circle(vis, (int(cx), int(cy)), 3, (255, 165, 0), -1)
|
| 627 |
+
|
| 628 |
+
def _pick_kr_font(size=20, font_path=None):
|
| 629 |
+
cands = [
|
| 630 |
+
font_path,
|
| 631 |
+
r"C:\Windows\Fonts\malgun.ttf",
|
| 632 |
+
r"C:\Windows\Fonts\malgunbd.ttf",
|
| 633 |
+
"/System/Library/Fonts/AppleSDGothicNeo.ttc",
|
| 634 |
+
"/usr/share/fonts/truetype/nanum/NanumGothic.ttf",
|
| 635 |
+
"/usr/share/fonts/truetype/noto/NotoSansKR-Regular.ttf",
|
| 636 |
+
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
| 637 |
+
]
|
| 638 |
+
for p in cands:
|
| 639 |
+
if p and os.path.exists(p):
|
| 640 |
+
try:
|
| 641 |
+
return ImageFont.truetype(p, size)
|
| 642 |
+
except Exception:
|
| 643 |
+
pass
|
| 644 |
+
return ImageFont.load_default()
|
| 645 |
+
|
| 646 |
+
pil = Image.fromarray(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
|
| 647 |
+
draw = ImageDraw.Draw(pil)
|
| 648 |
+
font = _pick_kr_font(size=20, font_path=debug_font)
|
| 649 |
+
|
| 650 |
+
for it in items:
|
| 651 |
+
x1, y1 = int(it["xyxy"][0]), int(it["xyxy"][1])
|
| 652 |
+
label = f"{it.get('text','')} ({it.get('conf',0.0):.2f})"
|
| 653 |
+
bbox = draw.textbbox((0, 0), label, font=font)
|
| 654 |
+
tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
| 655 |
+
draw.rectangle([x1, y1 - th - 6, x1 + tw + 8, y1 + 2], fill=(0, 0, 0))
|
| 656 |
+
draw.text((x1 + 4, y1 - th - 4), label, font=font, fill=(255, 255, 255))
|
| 657 |
+
|
| 658 |
+
vis = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
|
| 659 |
+
|
| 660 |
+
ts = _time.strftime("%Y%m%d_%H%M%S"); ms = int((_time.time() % 1) * 1000)
|
| 661 |
+
out_put_text = f"{(final_text or '미확인 상태')} ({ts}_{ms:03d}).png"
|
| 662 |
+
out_path = os.path.join(out_dir, out_put_text)
|
| 663 |
+
cv2.imwrite(out_path, vis)
|
| 664 |
+
except Exception as _e:
|
| 665 |
+
print(f"[seeing.summarize_scene] pic save failed: {_e})")
|
| 666 |
+
|
| 667 |
+
return final_text or "켜진 표시 없음"
|
| 668 |
+
|
| 669 |
+
except Exception as e:
|
| 670 |
+
print(f"[seeing.summarize_scene] error: {e}")
|
| 671 |
+
return "현재 상태를 파악하는데 실패했습니다."
|
total_mode.py
ADDED
|
@@ -0,0 +1,1885 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Fingertip-Only OCR — EasyOCR-only + SIM-history warp
|
| 2 |
+
# (OP/INFO 모드 스케줄 재구성 & HUD 개선)
|
| 3 |
+
# --------------------------------------------------------------
|
| 4 |
+
|
| 5 |
+
import cv2, time, numpy as np, threading, queue, os, math, re, traceback
|
| 6 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 7 |
+
from tts_reader import TTSReader
|
| 8 |
+
import seeing
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
from rapidfuzz import process, fuzz
|
| 12 |
+
from jamo import h2j, j2hcj
|
| 13 |
+
_SPELLFIX_OK = True
|
| 14 |
+
except Exception as _e:
|
| 15 |
+
print(f"[SPELLFIX] disabled: { _e }")
|
| 16 |
+
_SPELLFIX_OK = False
|
| 17 |
+
|
| 18 |
+
# ========= User / Display =========
|
| 19 |
+
CAMERA_ID = 0
|
| 20 |
+
CAPTURE_TARGET_W = 1920
|
| 21 |
+
CAPTURE_TARGET_H = 1080
|
| 22 |
+
WORK_WIDTH_TARGET = 1280
|
| 23 |
+
DISPLAY_MAX_W = 1280
|
| 24 |
+
WINDOW_NAME = 'Assistive Fingertip OCR (fast)'
|
| 25 |
+
|
| 26 |
+
# ========= OCR / Scheduling =========
|
| 27 |
+
OCR_ENABLED = True
|
| 28 |
+
BASE_OCR_PERIOD = 1.5
|
| 29 |
+
EXTRA_OCR_PERIOD = 0.6
|
| 30 |
+
STALE_AGE_SEC = 7.0
|
| 31 |
+
LOW_CONF_TH = 0.55
|
| 32 |
+
|
| 33 |
+
# ========= ROI (work-space) =========
|
| 34 |
+
# <실험1> 문제 상황: 서연 세탁기 글씨가 작아서 detection 못함
|
| 35 |
+
# [CASE 1] ROI_W, ROI_H = 420, 420 -> detection 성능 향상 (부족함)
|
| 36 |
+
# [CASE 2] 서연 세탁기 사진 잘라서 글씨 더 크게 보이도록 조정 (안 해봄)
|
| 37 |
+
# [CASE 3] MAX_OCR_LONG 을 420으로 제한하지 않고 원본을 넣기 (속도 느려짐)
|
| 38 |
+
# [CASE 4] 실제 사이즈로 인쇄
|
| 39 |
+
|
| 40 |
+
ROI_W, ROI_H = 420,420
|
| 41 |
+
MIN_ROI_W, MIN_ROI_H = 200, 120
|
| 42 |
+
BLUR_VAR_THRESH = 80.0
|
| 43 |
+
|
| 44 |
+
# ROI 유지 유예(손가락 잠깐 끊겨도 ROI 내부 TTL 갱신)
|
| 45 |
+
ROI_KEEPALIVE_GRACE_SEC = 1.2
|
| 46 |
+
last_roi_active_until = 0.0
|
| 47 |
+
# ========= No masking =========
|
| 48 |
+
#USE_MASKED_FULL_ROI = True 삭제(8.20)
|
| 49 |
+
# EXCLUDE_PAD = 8
|
| 50 |
+
# MASK_FILL_VAL = (127,127,127)
|
| 51 |
+
|
| 52 |
+
# ========= Donut OCR (unused) =========
|
| 53 |
+
# DONUT_PAD = 3 삭제(8.20)
|
| 54 |
+
# SUBROI_MIN_AREA = 1200
|
| 55 |
+
# MAX_SUBROIS = 1
|
| 56 |
+
|
| 57 |
+
# ========= TTL / Pruning =========
|
| 58 |
+
BASE_TTL = 3.0 # 연장시간 조정(8.20)
|
| 59 |
+
PIN_GRACE_SEC = 1.2
|
| 60 |
+
MAX_OVERLAYS = 300
|
| 61 |
+
#ONSCREEN_KEEPALIVE = 0.8 삭제(8.20)
|
| 62 |
+
HARD_MAX_LIFETIME = 9.0
|
| 63 |
+
no_repeat_until_ts = 2.0 # 같은 문장 재발화 금지 시간 (8.21)
|
| 64 |
+
|
| 65 |
+
IGNORE_HARD_CAP_WHILE_FINGER_IN_ROI = True
|
| 66 |
+
PRUNE_TIMEOUT_SEC =0.5 # prune 주기 변수화 (8.20)
|
| 67 |
+
|
| 68 |
+
# ========= Merge criteria =========
|
| 69 |
+
MERGE_IOU_TH = 0.50
|
| 70 |
+
MERGE_CENTER_DIST = 28.0
|
| 71 |
+
|
| 72 |
+
# ========= TTS =========
|
| 73 |
+
TTS_ENABLE = True
|
| 74 |
+
TTS_CONF = 0.0 # 발화 기준 임계치 필요할듯. 지금은 다 말함 (8.20)
|
| 75 |
+
TTS_REPEAT_SEC = 1.0
|
| 76 |
+
# TTS_QUEUE_MAX = 1 삭제(8.20)
|
| 77 |
+
TTS_TARGET_STICKY_SEC = 0.6
|
| 78 |
+
# TTS_DEBUG = False 삭제 (8.20)
|
| 79 |
+
# TTS_STRICT_LATEST = True 삭제(8.20)
|
| 80 |
+
|
| 81 |
+
STRICT_DICT_ONLY = True
|
| 82 |
+
TTS_CONF_FALLBACK = 0.35
|
| 83 |
+
|
| 84 |
+
SHOW_TTS_HINT = True
|
| 85 |
+
tts_current_display = ""
|
| 86 |
+
tts_current_note = ""
|
| 87 |
+
tts_last_spoken_text = "" # <<< CHANGED: 마지막 발화 문구를 HUD에 유지
|
| 88 |
+
|
| 89 |
+
# ★ 추가: 모드 전환 멘트 직후 1회 즉시 요약 트리거 + 선점 락
|
| 90 |
+
INFO_FORCE_IMMEDIATE = False
|
| 91 |
+
tts_force_lock = threading.Lock()
|
| 92 |
+
|
| 93 |
+
# ========= Speed knobs =========
|
| 94 |
+
# 수정1: MAX_OCR_LONG 416 -> 420
|
| 95 |
+
MAX_OCR_LONG = 420
|
| 96 |
+
ENHANCE_MODE = "off"
|
| 97 |
+
MOTION_GATE_PX = 2.0
|
| 98 |
+
MAX_TEXT_DRAW = 30
|
| 99 |
+
|
| 100 |
+
# ========= Global tracking (SIM) =========
|
| 101 |
+
FLOW_DS = 0.45
|
| 102 |
+
FLOW_MAX_CORNERS=240; FLOW_QUALITY=0.01; FLOW_MIN_DISTANCE=7
|
| 103 |
+
FLOW_WINSIZE=(21,21); FLOW_LEVELS=3
|
| 104 |
+
RESEED_INTERVAL_FRAMES=8
|
| 105 |
+
|
| 106 |
+
MAX_TRANS_PX = 90
|
| 107 |
+
MAX_SCALE_STEP = 0.18
|
| 108 |
+
MAX_ROT_STEP_DEG = 10.0
|
| 109 |
+
EMA_ALPHA_SIM = 0.28
|
| 110 |
+
|
| 111 |
+
USE_ORB_FALLBACK = True
|
| 112 |
+
ORB_NFEATURES=600; ORB_MIN_GOOD=45
|
| 113 |
+
|
| 114 |
+
# ========= Finger =========
|
| 115 |
+
EMA_ALPHA_FINGER=0.35
|
| 116 |
+
FINGER_STALE_MS = 800
|
| 117 |
+
finger_last_seen = 0.0
|
| 118 |
+
# had_finger = False 삭제(8.20)
|
| 119 |
+
last_finger_xy = None
|
| 120 |
+
|
| 121 |
+
# ========= YOLO =========
|
| 122 |
+
YOLO_DEBUG = True
|
| 123 |
+
YOLO_DRAW_ALL = True
|
| 124 |
+
YOLO_IMG_SIZE = 640
|
| 125 |
+
YOLO_CONF_TH = 0.25
|
| 126 |
+
YOLO_IOU_TH = 0.50
|
| 127 |
+
YOLO_CLASS_NAME = None
|
| 128 |
+
YOLO_CLASS_ID = 0
|
| 129 |
+
# fingerip_o.pt 사용 가능
|
| 130 |
+
YOLO_WEIGHTS = r'weights/fingertip.pt'
|
| 131 |
+
|
| 132 |
+
YOLO_SHOW_INPUT = False
|
| 133 |
+
YOLO_INPUT_WIN = 'YOLO_INPUT'
|
| 134 |
+
|
| 135 |
+
DO_PIC=True # 보기모드 입력 출력(8.21)
|
| 136 |
+
# # ========= speed change parameters =========
|
| 137 |
+
# # 해상도/스케일
|
| 138 |
+
# WORK_WIDTH_TARGET = 960
|
| 139 |
+
# YOLO_IMG_SIZE = 448
|
| 140 |
+
# MAX_OCR_LONG = 360
|
| 141 |
+
# FLOW_DS = 0.35
|
| 142 |
+
|
| 143 |
+
# # ROI 크기
|
| 144 |
+
# ROI_W, ROI_H = 270,270
|
| 145 |
+
|
| 146 |
+
# # 빈도/주기
|
| 147 |
+
# BASE_OCR_PERIOD = 2.0
|
| 148 |
+
# EXTRA_OCR_PERIOD = 0.9
|
| 149 |
+
# RESEED_INTERVAL_FRAMES = 12
|
| 150 |
+
# PRUNE_TIMEOUT_SEC = 1.0
|
| 151 |
+
|
| 152 |
+
# # 전역 SIM
|
| 153 |
+
# FLOW_MAX_CORNERS = 150
|
| 154 |
+
# FLOW_WINSIZE = (17,17)
|
| 155 |
+
# FLOW_LEVELS = 2
|
| 156 |
+
# # estimateAffinePartial2D maxIters ~800로 하향
|
| 157 |
+
|
| 158 |
+
# # KLT
|
| 159 |
+
# KLT_LEVELS = 2
|
| 160 |
+
# KLT_WIN = (25,25)
|
| 161 |
+
# KLT_TERM = (cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT, 12, 0.03)
|
| 162 |
+
# KLT_N_SAMPLES = 6
|
| 163 |
+
# KLT_USE_CLAHE = False
|
| 164 |
+
|
| 165 |
+
# # YOLO
|
| 166 |
+
# YOLO_CONF_TH = 0.3 # 잡음↓
|
| 167 |
+
# # yolo_model.predict(..., half=True) # (GPU일 때)
|
| 168 |
+
|
| 169 |
+
# # OCR
|
| 170 |
+
# # rotation_info=[0] 로 축소
|
| 171 |
+
# # canvas_size=1280, mag_ratio=1.1
|
| 172 |
+
# MAX_TEXT_DRAW = 30
|
| 173 |
+
# MAX_OVERLAYS = 150
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# ========= KLT fallback =========
|
| 177 |
+
# 수정2: False -> True
|
| 178 |
+
USE_KLT_FALLBACK = True
|
| 179 |
+
KLT_WIN=(31,31); KLT_LEVELS=4
|
| 180 |
+
KLT_TERM=(cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT, 20, 0.03)
|
| 181 |
+
KLT_FB_MAX=4.0; KLT_ERR_MAX=100.0; KLT_STEP_MAX=30.0
|
| 182 |
+
KLT_OUT_MARGIN=4; KLT_N_SAMPLES=12; KLT_RING_R=10
|
| 183 |
+
# KLT_RESEED_EVERY=6; 삭제(8.20)
|
| 184 |
+
KLT_MIN_GOOD=5; KLT_LOSS_GRACE=3
|
| 185 |
+
KLT_USE_CLAHE=True
|
| 186 |
+
|
| 187 |
+
klt_pts_prev=None; klt_lost_frames=0; #frames_since_reseed=0 삭제 (8.20)
|
| 188 |
+
|
| 189 |
+
# ========= OCR time meter =========
|
| 190 |
+
OCR_EMA=None; OCR_EMA_ALPHA=0.25
|
| 191 |
+
|
| 192 |
+
# ==== Dict-based merge parameters ====
|
| 193 |
+
# 수정3: 80 -> 60
|
| 194 |
+
DICT_MERGE_SCORE = 70 # 사전 변환 수정(8.21)
|
| 195 |
+
DICT_TIE_DELTA = 3
|
| 196 |
+
DICT_ONLY =True # 사전 단어만 표기(8.21)
|
| 197 |
+
|
| 198 |
+
# ========= GUIDE MODE =========
|
| 199 |
+
GUIDE_MODE = False
|
| 200 |
+
GUIDE_TARGET = None
|
| 201 |
+
GUIDE_TOL_PX = 40
|
| 202 |
+
GUIDE_REPEAT_SEC = 1.0
|
| 203 |
+
GUIDE_LAST_TS = 0.0
|
| 204 |
+
GUIDE_LAST_SENT = ""
|
| 205 |
+
GUIDE_TARGET_ITEM = None
|
| 206 |
+
GUIDE_REQUIRE_FINGER = True
|
| 207 |
+
|
| 208 |
+
# ========= INFO/OP 모드 =========
|
| 209 |
+
MODE_OP = 1 # 조작 모드
|
| 210 |
+
MODE_INFO = 2 # 보기 모드
|
| 211 |
+
MODE_GUIDE = 3 # 안내 모드
|
| 212 |
+
|
| 213 |
+
mode_lock = threading.Lock()
|
| 214 |
+
mode_state = MODE_OP
|
| 215 |
+
|
| 216 |
+
# 보기 주기(초)
|
| 217 |
+
INFO_PERIOD_SEC = 5.0 # <<< CHANGED: 8s → 5s
|
| 218 |
+
|
| 219 |
+
# 즉시 실행/주기 스케줄용
|
| 220 |
+
_next_info_due = 0.0
|
| 221 |
+
|
| 222 |
+
# 보기용 최신 프레임 공유
|
| 223 |
+
_latest_frame_for_info = None
|
| 224 |
+
_latest_frame_lock = threading.Lock()
|
| 225 |
+
|
| 226 |
+
# 보기 스레드 제어
|
| 227 |
+
_info_stop = threading.Event()
|
| 228 |
+
|
| 229 |
+
def _is_speaker_busy() -> bool:
|
| 230 |
+
try:
|
| 231 |
+
import pygame
|
| 232 |
+
return pygame.mixer.music.get_busy()
|
| 233 |
+
except Exception:
|
| 234 |
+
return False
|
| 235 |
+
|
| 236 |
+
def _say_once(text: str):
|
| 237 |
+
"""한 문장만 안전하게 재생(비동기 TTS) + 표시 유지.
|
| 238 |
+
- 재생 시작을 잠깐 대기(최대 2s 시도)
|
| 239 |
+
- 끝날 때까지 폴링(최대 30s), 그 후 target만 None으로 지워 재반복 차단
|
| 240 |
+
- HUD는 tts_last_spoken_text로 마지막 발화를 계속 보여줌
|
| 241 |
+
"""
|
| 242 |
+
global no_repeat_until_ts
|
| 243 |
+
|
| 244 |
+
t_start = time.time() # ★ 누락되었던 t_start 보완
|
| 245 |
+
set_tts_target(text)
|
| 246 |
+
no_repeat_until_ts = time.time() + 60.0 # 같은 문장 재enqueue 금지(안전 마진)
|
| 247 |
+
|
| 248 |
+
# 재생 시작 감지(최대 2s)
|
| 249 |
+
while not _info_stop.is_set():
|
| 250 |
+
if _is_speaker_busy():
|
| 251 |
+
break
|
| 252 |
+
if '_last_spoken_enqueue_ts' in globals() and _last_spoken_enqueue_ts >= t_start:
|
| 253 |
+
time.sleep(0.1)
|
| 254 |
+
break
|
| 255 |
+
if (time.time() - t_start) > 2.0:
|
| 256 |
+
break
|
| 257 |
+
time.sleep(0.02)
|
| 258 |
+
|
| 259 |
+
# 재생 종료 대기(최대 30s)
|
| 260 |
+
t0 = time.time()
|
| 261 |
+
while _is_speaker_busy() and not _info_stop.is_set():
|
| 262 |
+
if (time.time() - t0) > 30.0:
|
| 263 |
+
break
|
| 264 |
+
time.sleep(0.05)
|
| 265 |
+
|
| 266 |
+
# target만 지워서 재반복 방지(표시는 tts_last_spoken_text로 유지됨)
|
| 267 |
+
set_tts_target(None)
|
| 268 |
+
no_repeat_until_ts = 0.0
|
| 269 |
+
|
| 270 |
+
def announce_force_async(text: str, after=None):
|
| 271 |
+
"""모드 전환 전용: 현재 재생 중이어도 즉시 중단하고 text부터 발화."""
|
| 272 |
+
def _runner():
|
| 273 |
+
with tts_force_lock:
|
| 274 |
+
if TTS_ENABLE and tts is not None:
|
| 275 |
+
try: tts.clear_queue()
|
| 276 |
+
except Exception: pass
|
| 277 |
+
for m in ("stop","cancel","flush"):
|
| 278 |
+
if hasattr(tts, m):
|
| 279 |
+
try: getattr(tts, m)()
|
| 280 |
+
except Exception: pass
|
| 281 |
+
_say_once(text) # 마지막 멘트 HUD 유지 + 재반복 억제 로직 그대로
|
| 282 |
+
if callable(after):
|
| 283 |
+
try: after()
|
| 284 |
+
except Exception: pass
|
| 285 |
+
threading.Thread(target=_runner, daemon=True).start()
|
| 286 |
+
|
| 287 |
+
def _enter_op_mode():
|
| 288 |
+
# 1) 지금 말하는 TTS 전부 끊고, 2) "조작 모드로 전환합니다"를 끝까지 말한 다음, 3) 모드 적용
|
| 289 |
+
def _after():
|
| 290 |
+
global mode_state
|
| 291 |
+
with mode_lock:
|
| 292 |
+
mode_state = MODE_OP
|
| 293 |
+
globals().update(GUIDE_MODE=False)
|
| 294 |
+
announce_force_async("조작 모드로 전환합니다.", after=_after)
|
| 295 |
+
|
| 296 |
+
def _enter_info_mode():
|
| 297 |
+
# 1) 선점 발화 → 2) 발화 끝난 뒤 INFO 모드 플래그 세팅 + 첫 요약 즉시 허용
|
| 298 |
+
def _after():
|
| 299 |
+
global mode_state, _next_info_due, INFO_FORCE_IMMEDIATE
|
| 300 |
+
with mode_lock:
|
| 301 |
+
mode_state = MODE_INFO
|
| 302 |
+
_next_info_due = 0.0 # 진입 직후 1회 즉시
|
| 303 |
+
INFO_FORCE_IMMEDIATE = True # 다음 루프에서 바로 요약
|
| 304 |
+
globals().update(GUIDE_MODE=False)
|
| 305 |
+
announce_force_async("보기 모드로 전환합니다. 지금부터 상황을 설명합니다.", after=_after)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def _enter_guide_mode():
|
| 309 |
+
# 1) 선점 발화 → 2) 발화 완료 후 GUIDE 모드 적용
|
| 310 |
+
def _after():
|
| 311 |
+
global mode_state, GUIDE_MODE
|
| 312 |
+
with mode_lock:
|
| 313 |
+
mode_state = MODE_GUIDE
|
| 314 |
+
GUIDE_MODE = True
|
| 315 |
+
announce_force_async("안내 모드로 전환합니다. 목표를 지정해 주세요.", after=_after)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
# def _finger_present_now() -> bool: #손가락 탐지 제거 (8.21)
|
| 319 |
+
# try:
|
| 320 |
+
# if last_finger_xy is None:
|
| 321 |
+
# return False
|
| 322 |
+
# return (time.time() - finger_last_seen) * 1000.0 <= FINGER_STALE_MS
|
| 323 |
+
# except NameError:
|
| 324 |
+
# return False
|
| 325 |
+
|
| 326 |
+
# def _wait_till_no_finger(max_wait_sec: float = 8.0):
|
| 327 |
+
# t0 = time.time()
|
| 328 |
+
# while _finger_present_now() and not _info_stop.is_set():
|
| 329 |
+
# if time.time() - t0 > max_wait_sec:
|
| 330 |
+
# break
|
| 331 |
+
# time.sleep(0.05)
|
| 332 |
+
|
| 333 |
+
def _info_worker():
|
| 334 |
+
"""보기 모드: 진입 즉시 1회, 이후 5초마다. 말하는 중이면 '말 끝 + 2초' 후 실행
|
| 335 |
+
단, 모드 진입 멘트 직후 1회는 지연 없이 곧바로 요약."""
|
| 336 |
+
global _next_info_due, INFO_FORCE_IMMEDIATE
|
| 337 |
+
|
| 338 |
+
while not _info_stop.is_set():
|
| 339 |
+
time.sleep(0.05)
|
| 340 |
+
|
| 341 |
+
with mode_lock:
|
| 342 |
+
info_on = (mode_state == MODE_INFO)
|
| 343 |
+
if not info_on:
|
| 344 |
+
_next_info_due = 0.0
|
| 345 |
+
continue
|
| 346 |
+
|
| 347 |
+
now = time.time()
|
| 348 |
+
if now < _next_info_due:
|
| 349 |
+
continue
|
| 350 |
+
|
| 351 |
+
# 1) 말하는 중이면 끝날 때까지 대기
|
| 352 |
+
was_busy = False
|
| 353 |
+
while _is_speaker_busy() and not _info_stop.is_set():
|
| 354 |
+
was_busy = True
|
| 355 |
+
time.sleep(0.05)
|
| 356 |
+
|
| 357 |
+
# 1-1) 일반 경우: 말 끝났으면 2초 뒤로
|
| 358 |
+
# 단, 직전이 "진입 멘트"였다면 지연 없이 곧바로 진행
|
| 359 |
+
if was_busy:
|
| 360 |
+
if INFO_FORCE_IMMEDIATE:
|
| 361 |
+
# 진입 멘트 방금 끝남 → 즉시 1회 실행
|
| 362 |
+
INFO_FORCE_IMMEDIATE = False
|
| 363 |
+
else:
|
| 364 |
+
_next_info_due = time.time() + 2.0
|
| 365 |
+
continue
|
| 366 |
+
|
| 367 |
+
# 2) 손가락 있으면 치워 달라고 말하고(한번) 손가락 사라질 때까지 대기
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
# 3) 최신 프레임 요약
|
| 371 |
+
with _latest_frame_lock:
|
| 372 |
+
frame = None if _latest_frame_for_info is None else _latest_frame_for_info.copy()
|
| 373 |
+
|
| 374 |
+
if frame is not None:
|
| 375 |
+
try:
|
| 376 |
+
summary = seeing.summarize_scene(frame, easy_reader,do_pic=DO_PIC, debug_dir=r"logs/ocr_bbox",debug_font=r"C:\Windows\Fonts\malgun.ttf")
|
| 377 |
+
except Exception as e:
|
| 378 |
+
print("[INFO] summarize failed:", e)
|
| 379 |
+
summary = None
|
| 380 |
+
|
| 381 |
+
if summary:
|
| 382 |
+
_say_once(summary)
|
| 383 |
+
|
| 384 |
+
# 4) 다음 실행 예약(지금 시점 + 5s)
|
| 385 |
+
_next_info_due = time.time() + INFO_PERIOD_SEC
|
| 386 |
+
|
| 387 |
+
# ===== STT =====
|
| 388 |
+
USE_STT = True
|
| 389 |
+
try:
|
| 390 |
+
import speech_recognition as sr
|
| 391 |
+
_STT_OK = True
|
| 392 |
+
except Exception as _e:
|
| 393 |
+
print(f"[STT] disabled: {_e}")
|
| 394 |
+
_STT_OK = False
|
| 395 |
+
|
| 396 |
+
# ========= GPU / OCR / YOLO 로드 =========
|
| 397 |
+
def torch_cuda_ok():
|
| 398 |
+
try:
|
| 399 |
+
import torch
|
| 400 |
+
ok = bool(torch.cuda.is_available())
|
| 401 |
+
print(f"[GPU] torch CUDA available: {ok}")
|
| 402 |
+
return ok
|
| 403 |
+
except Exception as e:
|
| 404 |
+
print(f"[GPU] torch check failed: {e}")
|
| 405 |
+
return False
|
| 406 |
+
|
| 407 |
+
gpu_ok = torch_cuda_ok()
|
| 408 |
+
|
| 409 |
+
OCR_ENGINE=None; easy_reader=None
|
| 410 |
+
import easyocr
|
| 411 |
+
try:
|
| 412 |
+
easy_reader = easyocr.Reader(['ko'], gpu=gpu_ok,
|
| 413 |
+
model_storage_directory='models',
|
| 414 |
+
user_network_directory='user_network',
|
| 415 |
+
recog_network='best_accuracy1',
|
| 416 |
+
download_enabled=False)
|
| 417 |
+
OCR_ENGINE = 'easyocr_gpu' if gpu_ok else 'easyocr_cpu'
|
| 418 |
+
print(f"[OCR] EasyOCR (GPU={gpu_ok})")
|
| 419 |
+
except Exception as e:
|
| 420 |
+
traceback.print_exc()
|
| 421 |
+
raise SystemExit("No OCR engine available")
|
| 422 |
+
|
| 423 |
+
# === 보기(상황 설명) 스레드 기동 ===
|
| 424 |
+
def _start_info_thread_once():
|
| 425 |
+
if not hasattr(_start_info_thread_once, "_started"):
|
| 426 |
+
threading.Thread(target=_info_worker, daemon=True).start()
|
| 427 |
+
_start_info_thread_once._started = True
|
| 428 |
+
_start_info_thread_once()
|
| 429 |
+
|
| 430 |
+
# ========= YOLO =========
|
| 431 |
+
try:
|
| 432 |
+
from ultralytics import YOLO
|
| 433 |
+
yolo_device = 0 if gpu_ok else 'cpu'
|
| 434 |
+
yolo_model = YOLO(YOLO_WEIGHTS)
|
| 435 |
+
print(f"[YOLO] Loaded: {YOLO_WEIGHTS} (device={yolo_device})")
|
| 436 |
+
class_names = yolo_model.names
|
| 437 |
+
if YOLO_CLASS_NAME:
|
| 438 |
+
inv = {str(v).lower(): int(k) for k, v in class_names.items()}
|
| 439 |
+
if YOLO_CLASS_NAME.lower() in inv:
|
| 440 |
+
YOLO_CLASS_ID = inv[YOLO_CLASS_NAME.lower()]
|
| 441 |
+
except Exception as e:
|
| 442 |
+
traceback.print_exc()
|
| 443 |
+
raise SystemExit("[YOLO] 모델 로드 실패. YOLO_WEIGHTS 경로/파일 확인")
|
| 444 |
+
|
| 445 |
+
def _pick_best_tip(cands, last_xy):
|
| 446 |
+
if not cands: return None
|
| 447 |
+
if last_xy is None:
|
| 448 |
+
return max(cands, key=lambda t: t[2])
|
| 449 |
+
lx, ly = last_xy
|
| 450 |
+
def score(t):
|
| 451 |
+
cx, cy, conf, _ = t
|
| 452 |
+
d2 = (cx-lx)**2 + (cy-ly)**2
|
| 453 |
+
return conf - 0.0005*d2
|
| 454 |
+
return max(cands, key=score)
|
| 455 |
+
|
| 456 |
+
# --- YOLO 비동기 워커 ---
|
| 457 |
+
yolo_in_q=queue.Queue(maxsize=1); yolo_out_q=queue.Queue(maxsize=1); yolo_stop=threading.Event()
|
| 458 |
+
def _yolo_worker():
|
| 459 |
+
while not yolo_stop.is_set():
|
| 460 |
+
try:
|
| 461 |
+
frame = yolo_in_q.get(timeout=0.2)
|
| 462 |
+
except queue.Empty:
|
| 463 |
+
continue
|
| 464 |
+
yolo_in_vis, _r, _off = _yolo_letterbox_bgr(frame, YOLO_IMG_SIZE)
|
| 465 |
+
res = yolo_model.predict(source=frame, imgsz=YOLO_IMG_SIZE,
|
| 466 |
+
conf=YOLO_CONF_TH, iou=YOLO_IOU_TH,
|
| 467 |
+
device=yolo_device, verbose=False)
|
| 468 |
+
det=None; raw_boxes=[]
|
| 469 |
+
if res and res[0].boxes is not None and len(res[0].boxes) > 0:
|
| 470 |
+
cands=[]
|
| 471 |
+
for b in res[0].boxes:
|
| 472 |
+
x1,y1,x2,y2 = b.xyxy[0].tolist()
|
| 473 |
+
conf = float(b.conf[0]) if b.conf is not None else 0.0
|
| 474 |
+
cls_id = int(b.cls[0]) if b.cls is not None else 0
|
| 475 |
+
raw_boxes.append((x1,y1,x2,y2,conf,cls_id))
|
| 476 |
+
if YOLO_CLASS_ID is not None and cls_id != YOLO_CLASS_ID: continue
|
| 477 |
+
cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
|
| 478 |
+
cands.append((cx, cy, conf, (x1, y1, x2-x1, y2-y1)))
|
| 479 |
+
best=_pick_best_tip(cands, last_finger_xy)
|
| 480 |
+
if best is not None:
|
| 481 |
+
cx, cy, conf, (x,y,w,h) = best
|
| 482 |
+
det={'xy':(int(round(cx)), int(round(cy))),
|
| 483 |
+
'box':(int(x), int(y), int(w), int(h)),
|
| 484 |
+
'conf':conf, 'ts':time.time(),
|
| 485 |
+
'raw_boxes':raw_boxes, 'yolo_in':yolo_in_vis}
|
| 486 |
+
else:
|
| 487 |
+
det={'xy':None, 'raw_boxes':[], 'yolo_in':yolo_in_vis}
|
| 488 |
+
try:
|
| 489 |
+
while True: yolo_out_q.get_nowait()
|
| 490 |
+
except queue.Empty:
|
| 491 |
+
pass
|
| 492 |
+
try: yolo_out_q.put_nowait(det)
|
| 493 |
+
except queue.Full: pass
|
| 494 |
+
threading.Thread(target=_yolo_worker, daemon=True).start()
|
| 495 |
+
|
| 496 |
+
def _yolo_letterbox_bgr(img, new_size=YOLO_IMG_SIZE, pad_val=114):
|
| 497 |
+
h, w = img.shape[:2]
|
| 498 |
+
r = min(new_size / float(h), new_size / float(w))
|
| 499 |
+
new_w, new_h = int(round(w*r)), int(round(h*r))
|
| 500 |
+
resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
| 501 |
+
dw = (new_size - new_w) / 2.0; dh = (new_size - new_h) / 2.0
|
| 502 |
+
left, right = int(math.floor(dw)), int(math.ceil(dw))
|
| 503 |
+
top, bottom = int(math.floor(dh)), int(math.ceil(dh))
|
| 504 |
+
out = cv2.copyMakeBorder(resized, top, bottom, left, right,
|
| 505 |
+
borderType=cv2.BORDER_CONSTANT,
|
| 506 |
+
value=(pad_val, pad_val, pad_val))
|
| 507 |
+
return out, r, (left, top)
|
| 508 |
+
|
| 509 |
+
# ========= TTS =========
|
| 510 |
+
# tts_q=queue.Queue(maxsize=TTS_QUEUE_MAX) 삭제(8.20)
|
| 511 |
+
# tts_is_speaking=threading.Event() 삭제(8.20)
|
| 512 |
+
tts_stop=threading.Event()
|
| 513 |
+
tts_target_lock=threading.Lock()
|
| 514 |
+
tts_target_text=None
|
| 515 |
+
_last_spoken_enqueue_ts=0.0
|
| 516 |
+
# tts_last_done_ts=0.0 삭제(8.20)
|
| 517 |
+
tts_last_seen_target_ts=0.0
|
| 518 |
+
|
| 519 |
+
SPELLFIX_ENABLE=True
|
| 520 |
+
JAMO_THRESHOLD=85; JAMO_THRESHOLD_LOWCONF=80
|
| 521 |
+
# 수정4: 딕셔너리 확장 (서연 세탁기 포함하도록)
|
| 522 |
+
#"동작","일시정지",
|
| 523 |
+
DICT_WORDS=["통살균","원격제어","예약","내마음","세탁","헹굼","탈수","물온도","빨래추가","알림음","구김방지","터보샷", "강력물살","온수세탁","냉수세탁","물높이","코스","동작/일시정지","전원"]
|
| 524 |
+
CANON={"표준세탁":"세탁","손세탁":"세탁"}
|
| 525 |
+
|
| 526 |
+
def _to_jamo(s: str) -> str:
|
| 527 |
+
try:
|
| 528 |
+
return j2hcj(h2j(s))
|
| 529 |
+
except Exception:
|
| 530 |
+
return s or ""
|
| 531 |
+
|
| 532 |
+
if _SPELLFIX_OK:
|
| 533 |
+
_DICT_JAMO=[_to_jamo(w) for w in DICT_WORDS]
|
| 534 |
+
else:
|
| 535 |
+
_DICT_JAMO=[]
|
| 536 |
+
|
| 537 |
+
_TOKENIZER=re.compile(r"[가-힣A-Za-z0-9]+|[^\s가-힣A-Za-z0-9]")
|
| 538 |
+
|
| 539 |
+
def correct_token(tok: str, threshold: int):
|
| 540 |
+
if not (_SPELLFIX_OK and SPELLFIX_ENABLE and _DICT_JAMO and tok):
|
| 541 |
+
return tok, 0.0
|
| 542 |
+
q=_to_jamo(tok)
|
| 543 |
+
res=process.extractOne(q, _DICT_JAMO, scorer=fuzz.ratio, score_cutoff=threshold)
|
| 544 |
+
if not res: return tok, 0.0
|
| 545 |
+
matched, score, idx=res
|
| 546 |
+
best=DICT_WORDS[idx]; best=CANON.get(best, best)
|
| 547 |
+
return best, float(score)
|
| 548 |
+
|
| 549 |
+
def correct_text(text: str, threshold: int):
|
| 550 |
+
if not (_SPELLFIX_OK and SPELLFIX_ENABLE and _DICT_JAMO and text):
|
| 551 |
+
return text, False
|
| 552 |
+
out=[]; changed=False
|
| 553 |
+
for tok in _TOKENIZER.findall(text):
|
| 554 |
+
if re.match(r"^[가-힣A-Za-z0-9]+$", tok):
|
| 555 |
+
fixed, sc = correct_token(tok, threshold=threshold)
|
| 556 |
+
if fixed!=tok: changed=True
|
| 557 |
+
out.append(fixed)
|
| 558 |
+
else:
|
| 559 |
+
out.append(tok)
|
| 560 |
+
return "".join(out), changed
|
| 561 |
+
|
| 562 |
+
DICT_SPEAK_ENABLE=True
|
| 563 |
+
DICT_THRESHOLD=80; DICT_THRESHOLD_LOWCONF=80
|
| 564 |
+
def _build_dict_index(words, canon_map):
|
| 565 |
+
keys=[]; vals=[]
|
| 566 |
+
for w in words: keys.append(w); vals.append(canon_map.get(w,w))
|
| 567 |
+
for alias, canon in canon_map.items(): keys.append(alias); vals.append(canon)
|
| 568 |
+
keys_j=[_to_jamo(re.sub(r"\s+","",k)) for k in keys]
|
| 569 |
+
return keys, keys_j, vals
|
| 570 |
+
_DICT_KEYS, _DICT_KEYS_J, _DICT_VALS=_build_dict_index(DICT_WORDS, CANON)
|
| 571 |
+
# _DICT_KEYS_PLAIN=[re.sub(r"\s+","",k).casefold() for k in _DICT_KEYS] 삭제(8.20)
|
| 572 |
+
def _normalize_plain(s:str)->str: return re.sub(r"\s+","",(s or "")).casefold()
|
| 573 |
+
def map_to_dict_canon(text: str, threshold: int): #사용
|
| 574 |
+
if not DICT_SPEAK_ENABLE or not text: return None, 0.0
|
| 575 |
+
if _SPELLFIX_OK:
|
| 576 |
+
queries=[]
|
| 577 |
+
s=re.sub(r"\s+","",text)
|
| 578 |
+
if s: queries.append(_to_jamo(s))
|
| 579 |
+
for tok in _TOKENIZER.findall(text):
|
| 580 |
+
if re.match(r"^[가-힣A-Za-z0-9]+$", tok): queries.append(_to_jamo(tok))
|
| 581 |
+
best_idx, best_sc=-1, 0.0
|
| 582 |
+
for q in queries:
|
| 583 |
+
res=process.extractOne(q, _DICT_KEYS_J, scorer=fuzz.ratio, score_cutoff=threshold)
|
| 584 |
+
if res:
|
| 585 |
+
_, sc, idx=res
|
| 586 |
+
if sc>best_sc:
|
| 587 |
+
best_sc=float(sc); best_idx=int(idx)
|
| 588 |
+
if best_idx>=0: return _DICT_VALS[best_idx], best_sc
|
| 589 |
+
|
| 590 |
+
# 수정6: 사전 매칭 점수로만 결정. 부분집합 때문에 옵션을 읽어버리는 문제(강 -> 강력세탁 매칭) 방지.
|
| 591 |
+
# q_full=_normalize_plain(text)
|
| 592 |
+
# q_tokens=[_normalize_plain(tok) for tok in _TOKENIZER.findall(text) if re.match(r"^[가-힣A-Za-z0-9]+$", tok)]
|
| 593 |
+
# for q in [q_full]+q_tokens:
|
| 594 |
+
# if not q: continue
|
| 595 |
+
# for i,k in enumerate(_DICT_KEYS_PLAIN):
|
| 596 |
+
# if q==k: return _DICT_VALS[i], 100.0
|
| 597 |
+
# for q in [q_full]+q_tokens:
|
| 598 |
+
# if not q: continue
|
| 599 |
+
# for i,k in enumerate(_DICT_KEYS_PLAIN):
|
| 600 |
+
# if (k and k in q) or (q and q in k): return _DICT_VALS[i], 90.0
|
| 601 |
+
return None, 0.0
|
| 602 |
+
|
| 603 |
+
def enrich_with_dict(text: str, conf: float):
|
| 604 |
+
canon, sc = map_to_dict_canon(text, threshold=DICT_MERGE_SCORE)
|
| 605 |
+
display = canon if canon else text
|
| 606 |
+
return display, canon, float(sc or 0.0), float(conf or 0.0)
|
| 607 |
+
|
| 608 |
+
def _has_korean(s: str)->bool:
|
| 609 |
+
return any('가'<=ch<='힣' for ch in (s or ""))
|
| 610 |
+
|
| 611 |
+
# (tts / 안내 스레드 근처 아무 곳에 추가)
|
| 612 |
+
# def announce_async(text: str): 삭제(8.20)
|
| 613 |
+
# threading.Thread(target=_say_once, args=(text,), daemon=True).start()
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
# TTS 초기화
|
| 617 |
+
try:
|
| 618 |
+
import tempfile, os
|
| 619 |
+
try:
|
| 620 |
+
tts=TTSReader(cooldown_sec=TTS_REPEAT_SEC, speaking_rate=1.05, pitch=0.0,
|
| 621 |
+
min_len=2, credentials_path=r"yugpae-4f8335e15ba0.json",
|
| 622 |
+
cache_dir=None, persist_cache=False)
|
| 623 |
+
except TypeError:
|
| 624 |
+
tts=TTSReader(cooldown_sec=TTS_REPEAT_SEC, speaking_rate=1.05, pitch=0.0,
|
| 625 |
+
min_len=2, credentials_path=r"yugpae-4f8335e15ba0.json")
|
| 626 |
+
for attr in ("set_cache","disable_cache"):
|
| 627 |
+
if hasattr(tts, attr):
|
| 628 |
+
try: getattr(tts, attr)(persist=False, dir=None)
|
| 629 |
+
except Exception: pass
|
| 630 |
+
try:
|
| 631 |
+
if not (hasattr(tts,"cache_dir") and getattr(tts,"cache_dir") is None):
|
| 632 |
+
tmp_cache=os.path.join(tempfile.gettempdir(),"tts_runtime_cache")
|
| 633 |
+
os.makedirs(tmp_cache, exist_ok=True)
|
| 634 |
+
if hasattr(tts,"cache_dir"): tts.cache_dir=tmp_cache
|
| 635 |
+
except Exception: pass
|
| 636 |
+
except Exception as e:
|
| 637 |
+
print(f"[TTS] init failed: {e}")
|
| 638 |
+
TTS_ENABLE=False
|
| 639 |
+
tts=None
|
| 640 |
+
|
| 641 |
+
try:
|
| 642 |
+
import pygame
|
| 643 |
+
if not pygame.mixer.get_init(): pygame.mixer.init()
|
| 644 |
+
pygame.mixer.music.set_volume(1.0)
|
| 645 |
+
except Exception: pass
|
| 646 |
+
|
| 647 |
+
def tts_scheduler():
|
| 648 |
+
global _last_spoken_enqueue_ts, tts_last_spoken_text, no_repeat_until_ts #tts_last_done_ts 삭제(8.20)
|
| 649 |
+
last_sent_text = None
|
| 650 |
+
while not tts_stop.is_set():
|
| 651 |
+
time.sleep(0.05)
|
| 652 |
+
if not TTS_ENABLE or tts is None:
|
| 653 |
+
continue
|
| 654 |
+
|
| 655 |
+
with tts_target_lock:
|
| 656 |
+
tgt = (tts_target_text or "").strip()
|
| 657 |
+
|
| 658 |
+
# 타겟이 없으면 아무것도 하지 않고 넘김 (중단/정지 금지)
|
| 659 |
+
if not tgt:
|
| 660 |
+
last_sent_text = None
|
| 661 |
+
continue
|
| 662 |
+
|
| 663 |
+
now = time.time()
|
| 664 |
+
|
| 665 |
+
# 지금 말하는 중이면 일반 TTS는 절대 선점/중단하지 않음
|
| 666 |
+
if _is_speaker_busy():
|
| 667 |
+
continue
|
| 668 |
+
|
| 669 |
+
# 같은 문장을 너무 자주 반복하지 않음
|
| 670 |
+
if tgt == last_sent_text and now < no_repeat_until_ts:
|
| 671 |
+
continue
|
| 672 |
+
|
| 673 |
+
# 재생 (모드 전환이 아닌 한 clear_queue/stop/flush 절대 금지)
|
| 674 |
+
try:
|
| 675 |
+
tts.say(tgt)
|
| 676 |
+
tts_last_spoken_text = tgt
|
| 677 |
+
_last_spoken_enqueue_ts = now
|
| 678 |
+
#tts_last_done_ts = now 삭제(8.20)
|
| 679 |
+
last_sent_text = tgt
|
| 680 |
+
except Exception as e:
|
| 681 |
+
print(f"[TTS] error: {e}")
|
| 682 |
+
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
if TTS_ENABLE:
|
| 686 |
+
threading.Thread(target=tts_scheduler, daemon=True).start()
|
| 687 |
+
|
| 688 |
+
# def set_tts_target(text_or_none, note: str=""):
|
| 689 |
+
# global tts_target_text, tts_current_display, tts_current_note
|
| 690 |
+
# # 일반 TTS는 오직 타겟만 갱신. 여기서 재생을 중단/선점하지 않음.
|
| 691 |
+
# with tts_target_lock:
|
| 692 |
+
# tts_target_text = text_or_none
|
| 693 |
+
# tts_current_display = (text_or_none or "").strip()
|
| 694 |
+
# tts_current_note = note or ""
|
| 695 |
+
last_text="" #선점 발화를 위한 마지막 text 기록 (8.21)
|
| 696 |
+
|
| 697 |
+
def set_tts_target(text_or_none, note: str="", # 선점 발화를 위한 force 추가 (8.21)
|
| 698 |
+
*, force: bool=False):
|
| 699 |
+
"""TTS 타겟 갱신.
|
| 700 |
+
- force=True : 지금 재생 중단(큐 비우고 stop/cancel/flush) 후 새 타겟 적용
|
| 701 |
+
- bypass_repeat=True: 같은 문장 반복 억제 타이머 무시(바로 재발화)
|
| 702 |
+
- speak_now=True : 스케줄러 기다리지 않고 즉시 say() 실행
|
| 703 |
+
"""
|
| 704 |
+
global tts_target_text, tts_current_display, tts_current_note
|
| 705 |
+
global no_repeat_until_ts, _last_spoken_enqueue_ts
|
| 706 |
+
global last_text
|
| 707 |
+
# 1) 타겟 갱신
|
| 708 |
+
with tts_target_lock:
|
| 709 |
+
tts_target_text = text_or_none
|
| 710 |
+
tts_current_display = (text_or_none or "").strip()
|
| 711 |
+
tts_current_note = note or ""
|
| 712 |
+
|
| 713 |
+
# 2) 선점 옵션
|
| 714 |
+
if force and ("tts" in globals()) and (tts is not None) and last_text != text_or_none:
|
| 715 |
+
last_text=text_or_none
|
| 716 |
+
try:
|
| 717 |
+
if hasattr(tts, "clear_queue"): tts.clear_queue()
|
| 718 |
+
for m in ("stop","cancel","flush"):
|
| 719 |
+
if hasattr(tts, m):
|
| 720 |
+
try: getattr(tts, m)()
|
| 721 |
+
except Exception: pass
|
| 722 |
+
except Exception:
|
| 723 |
+
pass
|
| 724 |
+
_last_spoken_enqueue_ts = 0.0 # 스케줄러와 동기화
|
| 725 |
+
|
| 726 |
+
|
| 727 |
+
|
| 728 |
+
|
| 729 |
+
# ========= Camera =========
|
| 730 |
+
cap = cv2.VideoCapture(CAMERA_ID, cv2.CAP_DSHOW) if cv2.getBuildInformation().find('Windows')!=-1 else cv2.VideoCapture(CAMERA_ID)
|
| 731 |
+
if not cap.isOpened(): raise SystemExit("카메라 열기 실패")
|
| 732 |
+
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
|
| 733 |
+
cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAPTURE_TARGET_W)
|
| 734 |
+
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAPTURE_TARGET_H)
|
| 735 |
+
cap.set(cv2.CAP_PROP_FPS, 30)
|
| 736 |
+
try: cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
|
| 737 |
+
except: pass
|
| 738 |
+
time.sleep(0.15)
|
| 739 |
+
Wc=int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)); Hc=int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 740 |
+
print(f"[Camera] requested ~{CAPTURE_TARGET_W}x{CAPTURE_TARGET_H}, actual {Wc}x{Hc}")
|
| 741 |
+
|
| 742 |
+
WORK_SCALE=min(1.0, WORK_WIDTH_TARGET/float(Wc))
|
| 743 |
+
print(f"[Work] WORK_SCALE={WORK_SCALE:.3f} (work width ~{int(Wc*WORK_SCALE)})")
|
| 744 |
+
|
| 745 |
+
# ========= State =========
|
| 746 |
+
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
|
| 747 |
+
frame_idx=0
|
| 748 |
+
prev_gray_s=None; prev_pts=None
|
| 749 |
+
overlays=[]; last_prune=time.time()
|
| 750 |
+
|
| 751 |
+
# OCR 스케줄
|
| 752 |
+
last_ocr_time=0.0
|
| 753 |
+
last_roi=None
|
| 754 |
+
|
| 755 |
+
# ORB
|
| 756 |
+
orb=None; bf=None
|
| 757 |
+
if USE_ORB_FALLBACK:
|
| 758 |
+
orb=cv2.ORB_create(nfeatures=ORB_NFEATURES)
|
| 759 |
+
bf=cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
|
| 760 |
+
|
| 761 |
+
# ========= Utils =========
|
| 762 |
+
_next_oid=1
|
| 763 |
+
def alloc_oid():
|
| 764 |
+
global _next_oid
|
| 765 |
+
oid=_next_oid; _next_oid+=1; return oid
|
| 766 |
+
|
| 767 |
+
def clamp_rect(x,y,w,h,W,H):
|
| 768 |
+
x=max(0,min(x,W-1)); y=max(0,min(y,H-1))
|
| 769 |
+
w=max(1,min(w,W-x)); h=max(1,min(h,H-y))
|
| 770 |
+
return x,y,w,h
|
| 771 |
+
|
| 772 |
+
def poly_center(poly): return np.mean(poly,axis=0)
|
| 773 |
+
|
| 774 |
+
def bbox_of_poly(poly):
|
| 775 |
+
x1=float(np.min(poly[:,0])); y1=float(np.min(poly[:,1]))
|
| 776 |
+
x2=float(np.max(poly[:,0])); y2=float(np.max(poly[:,1]))
|
| 777 |
+
return (x1,y1,x2-x1,y2-y1)
|
| 778 |
+
|
| 779 |
+
def variance_of_laplacian(g): return cv2.Laplacian(g, cv2.CV_64F).var()
|
| 780 |
+
|
| 781 |
+
def rect_contains(outer, inner, tol=2.0):
|
| 782 |
+
ox, oy, ow, oh = outer
|
| 783 |
+
ix, iy, iw, ih = inner
|
| 784 |
+
return (ix >= ox - tol) and (iy >= oy - tol) and \
|
| 785 |
+
(ix + iw <= ox + ow + tol) and (iy + ih <= oy + oh + tol)
|
| 786 |
+
|
| 787 |
+
def _canon_equal(a: str, b: str) -> bool:
|
| 788 |
+
a = (a or "").strip(); b = (b or "").strip()
|
| 789 |
+
if not a or not b: return False
|
| 790 |
+
try:
|
| 791 |
+
return _normalize_plain(a) == _normalize_plain(b)
|
| 792 |
+
except Exception:
|
| 793 |
+
import re
|
| 794 |
+
aa = re.sub(r"\s+","",a).casefold()
|
| 795 |
+
bb = re.sub(r"\s+","",b).casefold()
|
| 796 |
+
return aa == bb
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
def iou(a,b):
|
| 800 |
+
ax,ay,aw,ah=a; bx,by,bw,bh=b
|
| 801 |
+
ax2,ay2=ax+aw,ay+ah; bx2,by2=bx+bw,by+bh
|
| 802 |
+
ix1,iy1=max(ax,bx),max(ay,by)
|
| 803 |
+
ix2,iy2=min(ax2,bx2),min(ay2,by2)
|
| 804 |
+
iw,ih=max(0,ix2-ix1),max(0,iy2-iy1)
|
| 805 |
+
inter=iw*ih; union=aw*ah+bw*bh-inter+1e-9
|
| 806 |
+
return inter/union
|
| 807 |
+
|
| 808 |
+
# def expand_rect(x,y,w,h,pad,W,H): 삭제(8.20)
|
| 809 |
+
# x2=x-pad; y2=y-pad; w2=w+2*pad; h2=h+2*pad
|
| 810 |
+
# return clamp_rect(x2,y2,w2,h2,W,H)
|
| 811 |
+
|
| 812 |
+
def is_visible_in_view(poly, W, H, min_overlap=0.7):
|
| 813 |
+
x, y, w, h = bbox_of_poly(poly)
|
| 814 |
+
x1, y1, x2, y2 = x, y, x+w, y+h
|
| 815 |
+
vx1, vy1, vx2, vy2 = 0, 0, W, H
|
| 816 |
+
ix1, iy1 = max(x1, vx1), max(y1, vy1)
|
| 817 |
+
ix2, iy2 = min(x2, vx2), min(y2, vy2)
|
| 818 |
+
iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1)
|
| 819 |
+
inter = iw * ih; area = max(1.0, w * h)
|
| 820 |
+
return (inter / area) >= min_overlap
|
| 821 |
+
|
| 822 |
+
def draw_overlays(frame, items, now_ts):
|
| 823 |
+
H, W = frame.shape[:2]
|
| 824 |
+
to_draw=[]
|
| 825 |
+
for it in items:
|
| 826 |
+
if is_visible_in_view(it['poly'], W, H, min_overlap=0.7):
|
| 827 |
+
it['last_seen']=now_ts
|
| 828 |
+
to_draw.append(it)
|
| 829 |
+
to_draw=to_draw[:MAX_TEXT_DRAW]
|
| 830 |
+
for it in to_draw:
|
| 831 |
+
cv2.polylines(frame, [it['poly'].astype(int)], True, (255,165,0), 2, cv2.LINE_AA)
|
| 832 |
+
|
| 833 |
+
img_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 834 |
+
pil=Image.fromarray(img_rgb); draw=ImageDraw.Draw(pil)
|
| 835 |
+
font_path=None
|
| 836 |
+
for p in [r"C:\Windows\Fonts\malgun.ttf", r"C:\Windows\Fonts\NanumGothic.ttf",
|
| 837 |
+
r"C:\Windows\Fonts\NotoSansCJKkr-Regular.otf",
|
| 838 |
+
"/usr/share/fonts/truetype/noto/NotoSansCJKkr-Regular.ttc"]:
|
| 839 |
+
if os.path.isfile(p): font_path=p; break
|
| 840 |
+
font=ImageFont.truetype(font_path, 22) if font_path else ImageFont.load_default()
|
| 841 |
+
|
| 842 |
+
for it in to_draw:
|
| 843 |
+
poly=it['poly'].astype(int)
|
| 844 |
+
x=int(np.min(poly[:,0])); y=int(np.min(poly[:,1]))-6
|
| 845 |
+
draw.text((x, max(0,y)), f"{it['text']} ({it['conf']:.2f})",
|
| 846 |
+
font=font, fill=(255,255,255), stroke_width=2, stroke_fill=(0,0,0))
|
| 847 |
+
frame[:]=cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
|
| 848 |
+
|
| 849 |
+
def prune_overlays(items, now, active_roi=None):
|
| 850 |
+
def center_in_roi(c, roi):
|
| 851 |
+
if roi is None: return False
|
| 852 |
+
rx,ry,rw,rh = roi
|
| 853 |
+
return (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh)
|
| 854 |
+
|
| 855 |
+
kept=[]
|
| 856 |
+
for it in items:
|
| 857 |
+
pinned = (now <= it.get('pin_until', 0.0))
|
| 858 |
+
if pinned:
|
| 859 |
+
kept.append(it); continue
|
| 860 |
+
birth = it.get('time', now)
|
| 861 |
+
alive_by_ttl = (now <= it.get('expiry', 0.0))
|
| 862 |
+
if IGNORE_HARD_CAP_WHILE_FINGER_IN_ROI and active_roi is not None:
|
| 863 |
+
c = poly_center(it['poly'])
|
| 864 |
+
if center_in_roi(c, active_roi):
|
| 865 |
+
if alive_by_ttl:
|
| 866 |
+
kept.append(it)
|
| 867 |
+
continue
|
| 868 |
+
under_hard_cap = ((now - birth) <= HARD_MAX_LIFETIME)
|
| 869 |
+
if alive_by_ttl and under_hard_cap:
|
| 870 |
+
kept.append(it)
|
| 871 |
+
|
| 872 |
+
if len(kept) > MAX_OVERLAYS:
|
| 873 |
+
kept = sorted(
|
| 874 |
+
kept,
|
| 875 |
+
key=lambda d: max(d.get('expiry', 0.0), d.get('pin_until', 0.0)),
|
| 876 |
+
reverse=True
|
| 877 |
+
)[:MAX_OVERLAYS]
|
| 878 |
+
return kept
|
| 879 |
+
|
| 880 |
+
# ========= SIM helpers =========
|
| 881 |
+
def closest_rotation(A):
|
| 882 |
+
U, _, Vt = np.linalg.svd(A); R = U @ Vt
|
| 883 |
+
if np.linalg.det(R) < 0: Vt[-1,:]*=-1; R = U @ Vt
|
| 884 |
+
return R
|
| 885 |
+
def project_to_similarity(M): #사용
|
| 886 |
+
A=M[:,:2]; R=closest_rotation(A)
|
| 887 |
+
s=float(np.trace(A.T@R)/2.0); t=M[:,2].reshape(2)
|
| 888 |
+
return s, R, t
|
| 889 |
+
def angle_from_R(R): return math.atan2(R[1,0], R[0,0]) #사용
|
| 890 |
+
def build_similarity(s, theta):
|
| 891 |
+
c, n = math.cos(theta), math.sin(theta)
|
| 892 |
+
A=np.array([[c,-n],[n,c]], dtype=np.float32)*float(s)
|
| 893 |
+
return A
|
| 894 |
+
|
| 895 |
+
SIM_HIST_MAX=240
|
| 896 |
+
sim_steps=[]
|
| 897 |
+
|
| 898 |
+
def _rect_aabb_after_M(rect, M3, W, H):
|
| 899 |
+
x,y,w,h=rect
|
| 900 |
+
corners=np.array([[x,y],[x+w,y],[x+w,y+h],[x,y+h]], dtype=np.float32)
|
| 901 |
+
tc=(corners @ M3[:2,:2].T)+M3[:2,2]
|
| 902 |
+
minx,miny=float(np.min(tc[:,0])), float(np.min(tc[:,1]))
|
| 903 |
+
maxx,maxy=float(np.max(tc[:,0])), float(np.max(tc[:,1]))
|
| 904 |
+
rx=int(max(0,minx)); ry=int(max(0,miny))
|
| 905 |
+
rh=int(max(1,min(H-1,maxy)-ry)); rw=int(max(1,min(W-1,maxx)-rx)) #약간의 오류 수정 (8.21)
|
| 906 |
+
return (rx,ry,rw,rh)
|
| 907 |
+
|
| 908 |
+
def estimate_similarity_small(prev_gray_s, gray_s, prev_pts): #사용
|
| 909 |
+
if prev_pts is None or len(prev_pts) < 140:
|
| 910 |
+
prev_pts=cv2.goodFeaturesToTrack(prev_gray_s, maxCorners=FLOW_MAX_CORNERS,
|
| 911 |
+
qualityLevel=FLOW_QUALITY, minDistance=FLOW_MIN_DISTANCE, blockSize=7)
|
| 912 |
+
if prev_pts is None: return None, None
|
| 913 |
+
next_pts, st, err=cv2.calcOpticalFlowPyrLK(prev_gray_s, gray_s, prev_pts, None,
|
| 914 |
+
winSize=FLOW_WINSIZE, maxLevel=FLOW_LEVELS,
|
| 915 |
+
criteria=(cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT,12,0.03))
|
| 916 |
+
if next_pts is None: return None, None
|
| 917 |
+
P=prev_pts[st==1].reshape(-1,1,2); Q=next_pts[st==1].reshape(-1,1,2)
|
| 918 |
+
if len(P) < 60: return None, None
|
| 919 |
+
M,_=cv2.estimateAffinePartial2D(P,Q,method=cv2.RANSAC,
|
| 920 |
+
ransacReprojThreshold=3.0, maxIters=1500, confidence=0.99)
|
| 921 |
+
if M is None: return None, None
|
| 922 |
+
return M, next_pts
|
| 923 |
+
|
| 924 |
+
def transform_overlays_similarity(items, s, theta, t_s): #사용
|
| 925 |
+
tx=float(t_s[0])/FLOW_DS; ty=float(t_s[1])/FLOW_DS
|
| 926 |
+
step_mag=math.hypot(tx,ty)
|
| 927 |
+
if step_mag>MAX_TRANS_PX:
|
| 928 |
+
scale=MAX_TRANS_PX/(step_mag+1e-6)
|
| 929 |
+
tx*=scale; ty*=scale
|
| 930 |
+
A=build_similarity(s, theta).astype(np.float32)
|
| 931 |
+
for it in items:
|
| 932 |
+
pts=it['poly'].astype(np.float32)
|
| 933 |
+
it['poly']=(pts@A.T)+np.array([tx,ty], dtype=np.float32)
|
| 934 |
+
|
| 935 |
+
def orb_similarity(prev_g, cur_g):
|
| 936 |
+
kp1, des1 = orb.detectAndCompute(prev_g, None)
|
| 937 |
+
kp2, des2 = orb.detectAndCompute(cur_g, None)
|
| 938 |
+
if des1 is None or des2 is None or len(kp1)<8 or len(kp2)<8: return None
|
| 939 |
+
matches=bf.knnMatch(des1, des2, k=2)
|
| 940 |
+
good=[]
|
| 941 |
+
for mn in matches:
|
| 942 |
+
if len(mn)==2:
|
| 943 |
+
m,n=mn
|
| 944 |
+
if m.distance < 0.75*n.distance: good.append(m)
|
| 945 |
+
if len(good) < ORB_MIN_GOOD: return None
|
| 946 |
+
src=np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
|
| 947 |
+
dst=np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
|
| 948 |
+
M,_=cv2.estimateAffinePartial2D(src,dst,method=cv2.RANSAC,
|
| 949 |
+
ransacReprojThreshold=3.0,maxIters=1500,confidence=0.99)
|
| 950 |
+
return M
|
| 951 |
+
|
| 952 |
+
# ========= KLT =========
|
| 953 |
+
def _build_gray_for_klt(gray): #사용
|
| 954 |
+
g=gray
|
| 955 |
+
if KLT_USE_CLAHE:
|
| 956 |
+
clahe=cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
| 957 |
+
g=clahe.apply(g)
|
| 958 |
+
gx=cv2.Sobel(g, cv2.CV_32F, 1, 0, ksize=3)
|
| 959 |
+
gy=cv2.Sobel(g, cv2.CV_32F, 0, 1, ksize=3)
|
| 960 |
+
mag=cv2.magnitude(gx,gy)
|
| 961 |
+
if mag.max()>0: mag=(mag/mag.max())*255.0
|
| 962 |
+
return mag.astype(np.uint8)
|
| 963 |
+
|
| 964 |
+
def _klt_seed_ring(center, n=KLT_N_SAMPLES, r=KLT_RING_R): #사용
|
| 965 |
+
cx, cy = float(center[0]), float(center[1])
|
| 966 |
+
pts=[(cx,cy)]
|
| 967 |
+
for k in range(n):
|
| 968 |
+
a=2.0*math.pi*k/float(n)
|
| 969 |
+
pts.append((cx+r*math.cos(a), cy+r*math.sin(a)))
|
| 970 |
+
return np.array(pts, dtype=np.float32).reshape(-1,1,2)
|
| 971 |
+
|
| 972 |
+
def _in_bounds(pt, W, H, margin=0):
|
| 973 |
+
x,y=float(pt[0]), float(pt[1])
|
| 974 |
+
return (-margin<=x<=(W-1+margin)) and (-margin<=y<=(H-1+margin))
|
| 975 |
+
|
| 976 |
+
def klt_track_multi(prev_gray, cur_gray, prev_pts, W, H): #사용
|
| 977 |
+
if prev_gray is None or cur_gray is None or prev_pts is None or len(prev_pts)==0:
|
| 978 |
+
return None, None
|
| 979 |
+
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, cur_gray, prev_pts, None,
|
| 980 |
+
winSize=KLT_WIN, maxLevel=KLT_LEVELS, criteria=KLT_TERM)
|
| 981 |
+
if p1 is None: return None, None
|
| 982 |
+
p0r, st2, err2 = cv2.calcOpticalFlowPyrLK(cur_gray, prev_gray, p1, None,
|
| 983 |
+
winSize=KLT_WIN, maxLevel=KLT_LEVELS, criteria=KLT_TERM)
|
| 984 |
+
good=[]
|
| 985 |
+
for i in range(len(prev_pts)):
|
| 986 |
+
if st[i]==1 and st2[i]==1:
|
| 987 |
+
fb=float(np.linalg.norm(prev_pts[i,0]-p0r[i,0]))
|
| 988 |
+
e=float(err[i][0]) if err is not None else 0.0
|
| 989 |
+
step=float(np.linalg.norm(p1[i,0]-prev_pts[i,0]))
|
| 990 |
+
if fb<=KLT_FB_MAX and e<=KLT_ERR_MAX and step<=KLT_STEP_MAX and _in_bounds(p1[i,0], W, H, KLT_OUT_MARGIN):#KTL_OUT_MARGIN 단순화 (8.20)
|
| 991 |
+
good.append(p1[i,0])
|
| 992 |
+
if len(good)<KLT_MIN_GOOD: return None, None
|
| 993 |
+
good=np.array(good, dtype=np.float32)
|
| 994 |
+
med=np.median(good, axis=0)
|
| 995 |
+
cx, cy = int(round(float(med[0]))), int(round(float(med[1])))
|
| 996 |
+
if not _in_bounds((cx,cy), W, H, 0): return None, None
|
| 997 |
+
return (cx,cy), good.reshape(-1,1,2)
|
| 998 |
+
|
| 999 |
+
# ===== Donut / merge utils =====
|
| 1000 |
+
def rect_from_poly(poly):#사용
|
| 1001 |
+
x,y,w,h=bbox_of_poly(poly); return (int(x),int(y),int(w),int(h))
|
| 1002 |
+
|
| 1003 |
+
def fingertip_overlaps_box(finger, box): #사용
|
| 1004 |
+
if finger is None: return False
|
| 1005 |
+
x,y,w,h=box
|
| 1006 |
+
return (x<=finger[0]<=x+w) and (y<=finger[1]<=y+h)
|
| 1007 |
+
|
| 1008 |
+
def clip_poly_to_rect(poly, rect):
|
| 1009 |
+
x,y,w,h=rect; rx1,ry1,rx2,ry2=x,y,x+w,y+h
|
| 1010 |
+
P=poly.copy()
|
| 1011 |
+
P[:,0]=np.clip(P[:,0], rx1, rx2); P[:,1]=np.clip(P[:,1], ry1, ry2)
|
| 1012 |
+
return P
|
| 1013 |
+
|
| 1014 |
+
def merge_update_overlays(items, new_items, roi_rect, now_ts,
|
| 1015 |
+
iou_th=MERGE_IOU_TH, center_dist_th=MERGE_CENTER_DIST):
|
| 1016 |
+
rx, ry, rw, rh = roi_rect
|
| 1017 |
+
|
| 1018 |
+
def center_in_roi(c):
|
| 1019 |
+
return (rx <= c[0] <= rx+rw) and (ry <= c[1] <= ry+rh)
|
| 1020 |
+
|
| 1021 |
+
roi_indices = [idx for idx, it in enumerate(items) if center_in_roi(poly_center(it['poly']))]
|
| 1022 |
+
used_old = set()
|
| 1023 |
+
|
| 1024 |
+
for ni in new_items:
|
| 1025 |
+
poly_new = clip_poly_to_rect(ni['poly'], roi_rect)
|
| 1026 |
+
box_new = bbox_of_poly(poly_new)
|
| 1027 |
+
raw_txt = str(ni.get('text','')).strip()
|
| 1028 |
+
raw_conf = float(ni.get('conf', 0.0))
|
| 1029 |
+
disp_new, canon_new, csc_new, conf_new = enrich_with_dict(raw_txt, raw_conf)
|
| 1030 |
+
|
| 1031 |
+
best_idx = -1
|
| 1032 |
+
best_iou = -1.0
|
| 1033 |
+
best_d = 1e9
|
| 1034 |
+
|
| 1035 |
+
for idx in roi_indices:
|
| 1036 |
+
if idx in used_old:
|
| 1037 |
+
continue
|
| 1038 |
+
it = items[idx]
|
| 1039 |
+
box_old = bbox_of_poly(it['poly'])
|
| 1040 |
+
|
| 1041 |
+
# ① 위치기반 매칭(IoU/센터거리)
|
| 1042 |
+
i = iou(box_new, box_old)
|
| 1043 |
+
cxn = (box_new[0]*2 + box_new[2]) * 0.5
|
| 1044 |
+
cyn = (box_new[1]*2 + box_new[3]) * 0.5
|
| 1045 |
+
cxo = (box_old[0]*2 + box_old[2]) * 0.5
|
| 1046 |
+
cyo = (box_old[1]*2 + box_old[3]) * 0.5
|
| 1047 |
+
d = math.hypot(cxn - cxo, cyn - cyo)
|
| 1048 |
+
loc_match = (i >= iou_th) or (d <= center_dist_th)
|
| 1049 |
+
|
| 1050 |
+
# ② 같은 글자 + 포함관계면 매칭으로 간주(작은 박스가 큰 박스 안에 있는 경우 등)
|
| 1051 |
+
text_same = _canon_equal(it.get('canon_text') or it.get('text'),
|
| 1052 |
+
canon_new or disp_new)
|
| 1053 |
+
contained = rect_contains(box_old, box_new) or rect_contains(box_new, box_old)
|
| 1054 |
+
text_same_contained = text_same and contained
|
| 1055 |
+
|
| 1056 |
+
if not (loc_match or text_same_contained):
|
| 1057 |
+
continue
|
| 1058 |
+
|
| 1059 |
+
# 베스트 선택(우선 IoU, 다음 거리)
|
| 1060 |
+
if (i > best_iou) or (abs(i - best_iou) < 1e-6 and d < best_d):
|
| 1061 |
+
best_iou, best_d, best_idx = i, d, idx
|
| 1062 |
+
|
| 1063 |
+
if best_idx >= 0:
|
| 1064 |
+
it = items[best_idx]
|
| 1065 |
+
# 우선순위: (사전 일치 점수) > (conf)
|
| 1066 |
+
csc_old = float(it.get('canon_score', 0.0))
|
| 1067 |
+
conf_old = float(it.get('conf', 0.0))
|
| 1068 |
+
|
| 1069 |
+
replace = False
|
| 1070 |
+
|
| 1071 |
+
if csc_new >= DICT_MERGE_SCORE and csc_old < DICT_MERGE_SCORE:
|
| 1072 |
+
replace = True
|
| 1073 |
+
elif csc_new >= DICT_MERGE_SCORE and csc_old >= DICT_MERGE_SCORE:
|
| 1074 |
+
if csc_new > csc_old + DICT_TIE_DELTA:
|
| 1075 |
+
replace = True
|
| 1076 |
+
elif abs(csc_new - csc_old) <= DICT_TIE_DELTA and conf_new > conf_old:
|
| 1077 |
+
replace = True
|
| 1078 |
+
else:
|
| 1079 |
+
if conf_new > conf_old and csc_new > csc_old: #신뢰도가 더 높을 경우만 대체 (8.20)
|
| 1080 |
+
replace = True #상대비교 기반으로 대체하면 어떨지 고민
|
| 1081 |
+
|
| 1082 |
+
if replace:
|
| 1083 |
+
it['poly'] = poly_new
|
| 1084 |
+
it['ocr_text'] = raw_txt
|
| 1085 |
+
it['text'] = disp_new
|
| 1086 |
+
it['canon_text'] = canon_new
|
| 1087 |
+
it['canon_score'] = csc_new
|
| 1088 |
+
it['conf'] = conf_new
|
| 1089 |
+
it['expiry'] = now_ts + BASE_TTL
|
| 1090 |
+
#연���시간 코드 중첩 삭제 (8.20)
|
| 1091 |
+
|
| 1092 |
+
used_old.add(best_idx)
|
| 1093 |
+
|
| 1094 |
+
else:
|
| 1095 |
+
if DICT_ONLY and (disp_new is None or disp_new not in DICT_WORDS): #사전 단어만 표기 (8.21)
|
| 1096 |
+
continue
|
| 1097 |
+
items.append({
|
| 1098 |
+
'poly': poly_new,
|
| 1099 |
+
'ocr_text': raw_txt,
|
| 1100 |
+
'text': disp_new,
|
| 1101 |
+
'canon_text': canon_new,
|
| 1102 |
+
'canon_score': csc_new,
|
| 1103 |
+
'conf': conf_new,
|
| 1104 |
+
'time': now_ts,
|
| 1105 |
+
'last_seen': now_ts,
|
| 1106 |
+
'expiry': now_ts + BASE_TTL,
|
| 1107 |
+
'pin_until': 0.0,
|
| 1108 |
+
'id': alloc_oid()
|
| 1109 |
+
})
|
| 1110 |
+
|
| 1111 |
+
# ROI 안에 있던 기존 항목들의 여유시간(keepalive) 연장
|
| 1112 |
+
# 메인루프에서 연장되므로 시간 연장 삭제(8.20)
|
| 1113 |
+
# prune_overlays에서 사용하는것과 겹침 삭제(8.20)
|
| 1114 |
+
# 만약 바운딩 박스가 많아지면 여기서 prune 한번 진행 필요
|
| 1115 |
+
return items
|
| 1116 |
+
|
| 1117 |
+
|
| 1118 |
+
def dedupe_same_text_overlays(items, iou_th=0.55, center_dist_th=26.0):
|
| 1119 |
+
"""동일/유사 텍스트(사전 정규화 기준) 중복 박스 제거.
|
| 1120 |
+
- 같은 텍스트로 간주되는 박스가 서로 많이 겹치거나 가깝거나
|
| 1121 |
+
한쪽이 다른쪽을 '포함'하면 하나만 남김
|
| 1122 |
+
- 우선순위: (1) 사전 일치 점수 높음 → (2) 동률이면 conf 높은 것
|
| 1123 |
+
"""
|
| 1124 |
+
def _canon_key(it):
|
| 1125 |
+
t = (it.get('canon_text') or it.get('text') or '').strip()
|
| 1126 |
+
try:
|
| 1127 |
+
return _normalize_plain(t)
|
| 1128 |
+
except Exception:
|
| 1129 |
+
import re as _re
|
| 1130 |
+
return _re.sub(r"\s+","",t).casefold()
|
| 1131 |
+
|
| 1132 |
+
def _rect(it):
|
| 1133 |
+
return bbox_of_poly(it['poly'])
|
| 1134 |
+
|
| 1135 |
+
def _score(it):
|
| 1136 |
+
csc = float(it.get('canon_score', 0.0))
|
| 1137 |
+
conf = float(it.get('conf', 0.0))
|
| 1138 |
+
return ((1 if csc >= DICT_MERGE_SCORE else 0), csc, conf)
|
| 1139 |
+
|
| 1140 |
+
groups = {}
|
| 1141 |
+
for it in items:
|
| 1142 |
+
key = _canon_key(it)
|
| 1143 |
+
if not key: # 빈 문자열 제외
|
| 1144 |
+
continue
|
| 1145 |
+
groups.setdefault(key, []).append(it)
|
| 1146 |
+
|
| 1147 |
+
keep = set()
|
| 1148 |
+
drop = set()
|
| 1149 |
+
for key, arr in groups.items():
|
| 1150 |
+
arr_sorted = sorted(arr, key=_score, reverse=True)
|
| 1151 |
+
for i, a in enumerate(arr_sorted):
|
| 1152 |
+
if id(a) in drop or id(a) in keep:
|
| 1153 |
+
continue
|
| 1154 |
+
keep.add(id(a))
|
| 1155 |
+
ax, ay, aw, ah = _rect(a)
|
| 1156 |
+
acx, acy = ax+aw*0.5, ay+ah*0.5
|
| 1157 |
+
for b in arr_sorted[i+1:]:
|
| 1158 |
+
if id(b) in drop or id(b) in keep:
|
| 1159 |
+
continue
|
| 1160 |
+
bx, by, bw, bh = _rect(b)
|
| 1161 |
+
bcx, bcy = bx+bw*0.5, by+bh*0.5
|
| 1162 |
+
ov = iou((ax,ay,aw,ah), (bx,by,bw,bh))
|
| 1163 |
+
d = ((acx-bcx)**2 + (acy-bcy)**2)**0.5
|
| 1164 |
+
contained = rect_contains((ax,ay,aw,ah), (bx,by,bw,bh)) or rect_contains((bx,by,bw,bh), (ax,ay,aw,ah))
|
| 1165 |
+
if contained or (ov >= iou_th) or (d <= center_dist_th):
|
| 1166 |
+
drop.add(id(b))
|
| 1167 |
+
|
| 1168 |
+
if not drop:
|
| 1169 |
+
return items
|
| 1170 |
+
return [it for it in items if id(it) not in drop]
|
| 1171 |
+
|
| 1172 |
+
|
| 1173 |
+
# ===== GUIDE MODE 유틸 =====
|
| 1174 |
+
def _overlay_center(it):
|
| 1175 |
+
P = it['poly']
|
| 1176 |
+
x1, y1 = float(np.min(P[:,0])), float(np.min(P[:,1]))
|
| 1177 |
+
x2, y2 = float(np.max(P[:,0])), float(np.max(P[:,1]))
|
| 1178 |
+
return (0.5*(x1+x2), 0.5*(y1+y2))
|
| 1179 |
+
|
| 1180 |
+
def _choose_target_overlay(target_canon: str, overlays, finger_xy=None):
|
| 1181 |
+
cands = []
|
| 1182 |
+
t = (target_canon or "").strip()
|
| 1183 |
+
if not t: return None
|
| 1184 |
+
for it in overlays:
|
| 1185 |
+
ct = (it.get('canon_text') or "").strip()
|
| 1186 |
+
tx = (it.get('text') or "").strip()
|
| 1187 |
+
ok = (ct == t) or (tx == t) or (t in tx)
|
| 1188 |
+
if ok:
|
| 1189 |
+
cx, cy = _overlay_center(it)
|
| 1190 |
+
d = 0.0
|
| 1191 |
+
if finger_xy is not None:
|
| 1192 |
+
d = math.hypot(cx - (finger_xy[0]), cy - (finger_xy[1]))
|
| 1193 |
+
canon_bonus = 1.0 if (ct == t) else 0.0
|
| 1194 |
+
cands.append((canon_bonus, float(it.get('conf',0.0)), -d, it))
|
| 1195 |
+
if not cands:
|
| 1196 |
+
return None
|
| 1197 |
+
cands.sort(reverse=True)
|
| 1198 |
+
return cands[0][3]
|
| 1199 |
+
|
| 1200 |
+
def _dir_sentence(dx, dy):
|
| 1201 |
+
def q(px):
|
| 1202 |
+
a = abs(int(round(px)))
|
| 1203 |
+
if a < 30: lvl = "조금"
|
| 1204 |
+
elif a < 90: lvl = "약간"
|
| 1205 |
+
elif a < 180: lvl = "보통"
|
| 1206 |
+
else: lvl = "많이"
|
| 1207 |
+
return lvl, a
|
| 1208 |
+
msg = []
|
| 1209 |
+
if dx > 0: lvl, a = q(dx); msg.append(f"오른쪽으로 {a}픽셀({lvl})")
|
| 1210 |
+
elif dx < 0: lvl, a = q(dx); msg.append(f"왼쪽으로 {a}픽셀({lvl})")
|
| 1211 |
+
if dy > 0: lvl, a = q(dy); msg.append(f"아래로 {a}픽셀({lvl})")
|
| 1212 |
+
elif dy < 0: lvl, a = q(dy); msg.append(f"위로 {a}픽셀({lvl})")
|
| 1213 |
+
return " , ".join(msg) if msg else "그대로 유지"
|
| 1214 |
+
|
| 1215 |
+
def set_guide_target_from_text(text: str):
|
| 1216 |
+
global GUIDE_TARGET, GUIDE_TARGET_ITEM
|
| 1217 |
+
if not text:
|
| 1218 |
+
GUIDE_TARGET = None
|
| 1219 |
+
GUIDE_TARGET_ITEM = None
|
| 1220 |
+
set_tts_target("목표가 비었습니다.", note="guide")
|
| 1221 |
+
return False
|
| 1222 |
+
canon, sc = map_to_dict_canon(text, threshold=DICT_THRESHOLD)
|
| 1223 |
+
if not canon:
|
| 1224 |
+
set_tts_target(f"'{text}'는 사전에 없습니다.", note="guide no-dict")
|
| 1225 |
+
return False
|
| 1226 |
+
GUIDE_TARGET = canon
|
| 1227 |
+
GUIDE_TARGET_ITEM = None
|
| 1228 |
+
set_tts_target(f"목표 '{canon}' 안내를 시작합니다.", note="guide")
|
| 1229 |
+
return True
|
| 1230 |
+
|
| 1231 |
+
def guide_tick(now_ts, finger_xy, overlays):
|
| 1232 |
+
global GUIDE_LAST_TS, GUIDE_LAST_SENT, GUIDE_TARGET_ITEM, GUIDE_TARGET
|
| 1233 |
+
|
| 1234 |
+
if not GUIDE_MODE or not GUIDE_TARGET:
|
| 1235 |
+
return
|
| 1236 |
+
|
| 1237 |
+
if GUIDE_REQUIRE_FINGER and finger_xy is None:
|
| 1238 |
+
if now_ts - GUIDE_LAST_TS >= GUIDE_REPEAT_SEC:
|
| 1239 |
+
set_tts_target("손가락을 화면에 올려 주세요.", note="guide")
|
| 1240 |
+
GUIDE_LAST_TS = now_ts
|
| 1241 |
+
GUIDE_LAST_SENT = "ask_finger"
|
| 1242 |
+
return
|
| 1243 |
+
|
| 1244 |
+
tgt = _choose_target_overlay(GUIDE_TARGET, overlays, finger_xy)
|
| 1245 |
+
GUIDE_TARGET_ITEM = tgt
|
| 1246 |
+
|
| 1247 |
+
if tgt is None:
|
| 1248 |
+
if now_ts - GUIDE_LAST_TS >= 2.0:
|
| 1249 |
+
set_tts_target(f"화면에서 '{GUIDE_TARGET}'을 찾지 못했습니다.", note="guide")
|
| 1250 |
+
GUIDE_LAST_TS = now_ts
|
| 1251 |
+
GUIDE_LAST_SENT = "not_found"
|
| 1252 |
+
return
|
| 1253 |
+
|
| 1254 |
+
cx, cy = _overlay_center(tgt)
|
| 1255 |
+
if finger_xy is None:
|
| 1256 |
+
if now_ts - GUIDE_LAST_TS >= GUIDE_REPEAT_SEC:
|
| 1257 |
+
set_tts_target(f"목표 '{GUIDE_TARGET}'이 화면에 있습니다. 손가락을 이동해 주세요.", note="guide")
|
| 1258 |
+
GUIDE_LAST_TS = now_ts
|
| 1259 |
+
GUIDE_LAST_SENT = "where_only"
|
| 1260 |
+
return
|
| 1261 |
+
|
| 1262 |
+
dx = int(round(cx - finger_xy[0]))
|
| 1263 |
+
dy = int(round(cy - finger_xy[1]))
|
| 1264 |
+
dist = math.hypot(dx, dy)
|
| 1265 |
+
|
| 1266 |
+
if dist <= GUIDE_TOL_PX:
|
| 1267 |
+
if GUIDE_LAST_SENT != "arrived":
|
| 1268 |
+
set_tts_target(f"도착. '{GUIDE_TARGET}' 입니다.", note="guide ok")
|
| 1269 |
+
GUIDE_LAST_SENT = "arrived"
|
| 1270 |
+
GUIDE_LAST_TS = now_ts
|
| 1271 |
+
return
|
| 1272 |
+
|
| 1273 |
+
if (now_ts - GUIDE_LAST_TS) >= GUIDE_REPEAT_SEC:
|
| 1274 |
+
msg = _dir_sentence(dx, dy)
|
| 1275 |
+
set_tts_target(f"{msg}", note=f"guide d={int(dist)}")
|
| 1276 |
+
GUIDE_LAST_SENT = msg
|
| 1277 |
+
GUIDE_LAST_TS = now_ts
|
| 1278 |
+
|
| 1279 |
+
def highlight_guide_target(frame_bgr, item):
|
| 1280 |
+
if item is None: return
|
| 1281 |
+
poly = item['poly'].astype(int)
|
| 1282 |
+
cv2.polylines(frame_bgr, [poly], True, (0,0,255), 3, cv2.LINE_AA)
|
| 1283 |
+
cx, cy = map(int, _overlay_center(item))
|
| 1284 |
+
cv2.circle(frame_bgr, (cx,cy), 6, (0,0,255), -1)
|
| 1285 |
+
|
| 1286 |
+
def stt_listen_once(timeout=4, phrase_time_limit=4):
|
| 1287 |
+
if not (USE_STT and _STT_OK):
|
| 1288 |
+
set_tts_target("음성 인식이 비활성화되어 있습니다.", note="stt off")
|
| 1289 |
+
return None
|
| 1290 |
+
try:
|
| 1291 |
+
r = sr.Recognizer()
|
| 1292 |
+
with sr.Microphone() as source:
|
| 1293 |
+
set_tts_target("목표 단어를 말씀해 주세요.", note="stt")
|
| 1294 |
+
if hasattr(r, "adjust_for_ambient_noise"):
|
| 1295 |
+
r.adjust_for_ambient_noise(source, duration=0.5)
|
| 1296 |
+
audio = r.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
|
| 1297 |
+
try:
|
| 1298 |
+
text = r.recognize_google(audio, language="ko-KR")
|
| 1299 |
+
except Exception:
|
| 1300 |
+
text = r.recognize_google(audio, language="ko-KR")
|
| 1301 |
+
return text
|
| 1302 |
+
except Exception as e:
|
| 1303 |
+
print(f"[STT] error: {e}")
|
| 1304 |
+
set_tts_target("음성 인식에 실패했습니다.", note="stt err")
|
| 1305 |
+
return None
|
| 1306 |
+
|
| 1307 |
+
# ===== OCR worker =====
|
| 1308 |
+
task_q=queue.Queue(maxsize=1)
|
| 1309 |
+
result_q=queue.Queue(maxsize=2)
|
| 1310 |
+
|
| 1311 |
+
def enhance_for_ocr(bgr):
|
| 1312 |
+
if ENHANCE_MODE=="off": return bgr
|
| 1313 |
+
if ENHANCE_MODE=="fast":
|
| 1314 |
+
blur=cv2.GaussianBlur(bgr,(0,0),0.8)
|
| 1315 |
+
return cv2.addWeighted(bgr, 1.6, blur, -0.6, 0)
|
| 1316 |
+
img=bgr.copy()
|
| 1317 |
+
img=cv2.bilateralFilter(img, d=0, sigmaColor=45, sigmaSpace=12)
|
| 1318 |
+
lab=cv2.cvtColor(img, cv2.COLOR_BGR2LAB); L,A,B=cv2.split(lab)
|
| 1319 |
+
clahe=cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8,8)); L=clahe.apply(L)
|
| 1320 |
+
img=cv2.cvtColor(cv2.merge([L,A,B]), cv2.COLOR_LAB2BGR)
|
| 1321 |
+
blur=cv2.GaussianBlur(img,(0,0),0.9)
|
| 1322 |
+
return cv2.addWeighted(img, 1.8, blur, -0.8, 0)
|
| 1323 |
+
|
| 1324 |
+
def prep_fixed(roi_bgr):
|
| 1325 |
+
h,w=roi_bgr.shape[:2]
|
| 1326 |
+
long_side=max(h,w); scale=min(1.0, float(MAX_OCR_LONG)/float(long_side))
|
| 1327 |
+
proc=cv2.resize(roi_bgr, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) if scale<1.0 else roi_bgr
|
| 1328 |
+
proc=enhance_for_ocr(proc)
|
| 1329 |
+
sx_pre=proc.shape[1]/float(w); sy_pre=proc.shape[0]/float(h)
|
| 1330 |
+
return proc, sx_pre, sy_pre
|
| 1331 |
+
|
| 1332 |
+
def run_ocr_rect(frame_work, rect_work, mask_boxes=None):
|
| 1333 |
+
x,y,w,h=rect_work
|
| 1334 |
+
src=frame_work[y:y+h, x:x+w].copy()
|
| 1335 |
+
base_off=(x,y)
|
| 1336 |
+
proc,sx_pre,sy_pre=prep_fixed(src)
|
| 1337 |
+
items=[]
|
| 1338 |
+
r1=easy_reader.readtext(proc, detail=1, decoder='greedy',
|
| 1339 |
+
rotation_info=[0,180],
|
| 1340 |
+
contrast_ths=0.05, adjust_contrast=0.7,
|
| 1341 |
+
text_threshold=0.6, low_text=0.3, link_threshold=0.4,
|
| 1342 |
+
canvas_size=1920, mag_ratio=1.3,
|
| 1343 |
+
paragraph=False, min_size=2)
|
| 1344 |
+
for (bbox_points,text,prob) in r1:
|
| 1345 |
+
poly=np.array(bbox_points,dtype=np.float32)
|
| 1346 |
+
poly[:,0]=poly[:,0]/sx_pre + base_off[0]
|
| 1347 |
+
poly[:,1]=poly[:,1]/sy_pre + base_off[1]
|
| 1348 |
+
bx,by,bw,bh=bbox_of_poly(poly)
|
| 1349 |
+
if bw*bh>=120: items.append({'poly':poly,'text':text,'conf':float(prob)})
|
| 1350 |
+
return items
|
| 1351 |
+
|
| 1352 |
+
def ocr_worker():
|
| 1353 |
+
while True:
|
| 1354 |
+
item=task_q.get()
|
| 1355 |
+
if item is None: break
|
| 1356 |
+
t0=time.time()
|
| 1357 |
+
out=[]
|
| 1358 |
+
for rect_work in item['rects']:
|
| 1359 |
+
out.extend(run_ocr_rect(item['frame_work'], rect_work, mask_boxes=None))
|
| 1360 |
+
dt_ms=(time.time()-t0)*1000.0
|
| 1361 |
+
result_q.put({
|
| 1362 |
+
'roi': item['roi'],
|
| 1363 |
+
'new_items': out,
|
| 1364 |
+
'dt_ms': dt_ms,
|
| 1365 |
+
'frame_idx': item['frame_idx'],
|
| 1366 |
+
})
|
| 1367 |
+
threading.Thread(target=ocr_worker, daemon=True).start()
|
| 1368 |
+
|
| 1369 |
+
def drain_queue(q):
|
| 1370 |
+
try:
|
| 1371 |
+
while True: q.get_nowait()
|
| 1372 |
+
except queue.Empty:
|
| 1373 |
+
pass
|
| 1374 |
+
|
| 1375 |
+
# === 폴백 요약기 === 삭제 (8.20)
|
| 1376 |
+
# def _fallback_summarize(frame_bgr):
|
| 1377 |
+
# try:
|
| 1378 |
+
# r = easy_reader.readtext(frame_bgr, detail=1)
|
| 1379 |
+
# tokens = [re.sub(r"[^가-힣0-9A-Za-z]", "", t).strip() for (_b,t,_c) in r]
|
| 1380 |
+
# tokens = [t for t in tokens if t]
|
| 1381 |
+
# if not tokens:
|
| 1382 |
+
# return "눈에 띄는 텍스트가 없습니다."
|
| 1383 |
+
# top = ", ".join(tokens[:5])
|
| 1384 |
+
# return f"화면에서 텍스트가 보입니다: {top}"
|
| 1385 |
+
# except Exception:
|
| 1386 |
+
# return "장면을 요약할 수 없습니다."
|
| 1387 |
+
|
| 1388 |
+
# ===== Main loop =====
|
| 1389 |
+
print("실시간 시작. 'q' 종료 / 'o' OCR ON/OFF / 't' HUD / 's' TTS / 'y' YOLO 입력 / 'p' YOLO PNG 저장")
|
| 1390 |
+
print("모드 전환: '1' 조작 모드 / '2' 보기 모드(상황 설명)")# / '3' 안내 모드(목표로 이동 안내)") # <<< CHANGED
|
| 1391 |
+
print("ROI 조절: '[' 너비-, ']' 너비+, ';' 높이-, \"'\" 높이+ / 'r' 기본값 복원")
|
| 1392 |
+
#print("GUIDE: '3' 안내 모드 / 'v' 음성으로 목표 지정 / 'f' 문자 입력 / 'c' 목표 취소")
|
| 1393 |
+
|
| 1394 |
+
s_ema=1.0; theta_ema=0.0; tx_ema=0.0; ty_ema=0.0
|
| 1395 |
+
prev_gray_full=None; prev_gray_klt=None
|
| 1396 |
+
prev_gray_s=None; prev_pts=None
|
| 1397 |
+
# <실험2> 손가락 없을 때 TTS 잘못 안내
|
| 1398 |
+
# [CASE 1] KLT OFF & FINGER_STALE_MS 800 -> 2000 (손가락 잘 안 따라올 수 있음)
|
| 1399 |
+
# [CASE 2] KLT ON & (1초 내내 KLT만 썼으면 finger_is_fresh = False)
|
| 1400 |
+
# [CASE 3] YOLO 연속 n번 해야 KLT ON
|
| 1401 |
+
|
| 1402 |
+
# 수정7: KLT 단독 추적 시작 시간 기록 (위 상황의 CASE2에 해당)
|
| 1403 |
+
klt_only_start_ts = 0.0
|
| 1404 |
+
|
| 1405 |
+
finger_src="NONE"; yolo_last_conf=None; klt_draw_pts=None; yolo_box_count=None; yolo_last_in=None
|
| 1406 |
+
|
| 1407 |
+
while True:
|
| 1408 |
+
ret, frame_cap = cap.read()
|
| 1409 |
+
if not ret: break
|
| 1410 |
+
|
| 1411 |
+
frame_work = frame_cap if WORK_SCALE==1.0 else cv2.resize(frame_cap, None, fx=WORK_SCALE, fy=WORK_SCALE, interpolation=cv2.INTER_AREA)
|
| 1412 |
+
H,W = frame_work.shape[:2]
|
| 1413 |
+
frame_for_ocr=frame_work.copy()
|
| 1414 |
+
frame_disp=frame_work.copy()
|
| 1415 |
+
|
| 1416 |
+
# 안내용 최신 프레임 공유
|
| 1417 |
+
with _latest_frame_lock:
|
| 1418 |
+
_latest_frame_for_info = frame_work.copy()
|
| 1419 |
+
|
| 1420 |
+
gray=cv2.cvtColor(frame_work, cv2.COLOR_BGR2GRAY)
|
| 1421 |
+
gray_klt=_build_gray_for_klt(gray) if USE_KLT_FALLBACK else gray
|
| 1422 |
+
|
| 1423 |
+
# ---- Global SIM ----
|
| 1424 |
+
gray_s=cv2.resize(gray, None, fx=FLOW_DS, fy=FLOW_DS, interpolation=cv2.INTER_AREA)
|
| 1425 |
+
M_s=None; did_motion=False
|
| 1426 |
+
if prev_gray_s is not None:
|
| 1427 |
+
reseed=((frame_idx % RESEED_INTERVAL_FRAMES)==0)
|
| 1428 |
+
if reseed: prev_pts=None
|
| 1429 |
+
M_s, next_pts = estimate_similarity_small(prev_gray_s, gray_s, prev_pts)
|
| 1430 |
+
prev_pts=next_pts
|
| 1431 |
+
prev_gray_s=gray_s
|
| 1432 |
+
|
| 1433 |
+
if M_s is not None:
|
| 1434 |
+
s_step,R_step,t_step_s=project_to_similarity(M_s)
|
| 1435 |
+
s_step=max(1.0-MAX_SCALE_STEP, min(1.0+MAX_SCALE_STEP, s_step))
|
| 1436 |
+
theta_step=angle_from_R(R_step)
|
| 1437 |
+
theta_step=max(-math.radians(MAX_ROT_STEP_DEG), min(math.radians(MAX_ROT_STEP_DEG), theta_step))
|
| 1438 |
+
s_ema=(1-EMA_ALPHA_SIM)*s_ema+EMA_ALPHA_SIM*s_step
|
| 1439 |
+
theta_ema=(theta_ema+((theta_step-theta_ema+math.pi)%(2*math.pi)-math.pi)*EMA_ALPHA_SIM)
|
| 1440 |
+
tx_ema=(1-EMA_ALPHA_SIM)*tx_ema+EMA_ALPHA_SIM*float(t_step_s[0])
|
| 1441 |
+
ty_ema=(1-EMA_ALPHA_SIM)*ty_ema+EMA_ALPHA_SIM*float(t_step_s[1])
|
| 1442 |
+
transform_overlays_similarity(overlays, s_ema, theta_ema, (tx_ema,ty_ema))
|
| 1443 |
+
did_motion=True
|
| 1444 |
+
else:
|
| 1445 |
+
if USE_ORB_FALLBACK and (prev_gray_full is not None):
|
| 1446 |
+
M2=orb_similarity(prev_gray_full, gray)
|
| 1447 |
+
if M2 is not None:
|
| 1448 |
+
s2,R2,t2s=project_to_similarity(M2)
|
| 1449 |
+
s2=max(1.0-MAX_SCALE_STEP, min(1.0+MAX_SCALE_STEP, s2))
|
| 1450 |
+
theta2=angle_from_R(R2)
|
| 1451 |
+
theta2=max(-math.radians(MAX_ROT_STEP_DEG), min(math.radians(MAX_ROT_STEP_DEG), theta2))
|
| 1452 |
+
s_ema=(1-EMA_ALPHA_SIM)*s_ema+EMA_ALPHA_SIM*s2
|
| 1453 |
+
theta_ema=(theta_ema+((theta2-theta_ema+math.pi)%(2*math.pi)-math.pi)*EMA_ALPHA_SIM)
|
| 1454 |
+
tx_ema=(1-EMA_ALPHA_SIM)*tx_ema+EMA_ALPHA_SIM*float(t2s[0])*FLOW_DS
|
| 1455 |
+
ty_ema=(1-EMA_ALPHA_SIM)*ty_ema+EMA_ALPHA_SIM*float(t2s[1])*FLOW_DS
|
| 1456 |
+
transform_overlays_similarity(overlays, s_ema, theta_ema, (tx_ema,ty_ema))
|
| 1457 |
+
did_motion=True
|
| 1458 |
+
|
| 1459 |
+
if did_motion:
|
| 1460 |
+
M2_step=np.array([[math.cos(theta_ema)*s_ema, -math.sin(theta_ema)*s_ema, float(tx_ema)/FLOW_DS],
|
| 1461 |
+
[math.sin(theta_ema)*s_ema, math.cos(theta_ema)*s_ema, float(ty_ema)/FLOW_DS]], dtype=np.float32)
|
| 1462 |
+
else:
|
| 1463 |
+
M2_step=np.array([[1,0,0],[0,1,0]], dtype=np.float32)
|
| 1464 |
+
if frame_idx>0:
|
| 1465 |
+
sim_steps.append((frame_idx-1, frame_idx, np.vstack([M2_step, [0,0,1]]).astype(np.float32)))
|
| 1466 |
+
if len(sim_steps)>SIM_HIST_MAX: sim_steps.pop(0)
|
| 1467 |
+
|
| 1468 |
+
# ---- YOLO finger (ASYNC) ----
|
| 1469 |
+
now=time.time()
|
| 1470 |
+
|
| 1471 |
+
with mode_lock:
|
| 1472 |
+
in_op_or_guide = (mode_state == MODE_OP) or (mode_state == MODE_GUIDE) #보기 모드에서 YOLO가 비동기로 실행 안되게 막음 (8.21)
|
| 1473 |
+
|
| 1474 |
+
if in_op_or_guide: #보기 모드에서 YOLO가 비동기로 실행 안되게 막음(8.21)
|
| 1475 |
+
if yolo_in_q.empty():
|
| 1476 |
+
try: yolo_in_q.put_nowait(frame_work.copy())
|
| 1477 |
+
except queue.Full: pass
|
| 1478 |
+
try: det=yolo_out_q.get_nowait()
|
| 1479 |
+
except queue.Empty: det=None
|
| 1480 |
+
else:
|
| 1481 |
+
det=None
|
| 1482 |
+
|
| 1483 |
+
finger_is_fresh=False
|
| 1484 |
+
finger_src="NONE"
|
| 1485 |
+
yolo_last_conf=None
|
| 1486 |
+
klt_draw_pts=None
|
| 1487 |
+
yolo_box_count=None
|
| 1488 |
+
|
| 1489 |
+
if isinstance(det, dict):
|
| 1490 |
+
xy=det.get('xy')
|
| 1491 |
+
if xy is not None:
|
| 1492 |
+
fx,fy=int(xy[0]), int(xy[1])
|
| 1493 |
+
if last_finger_xy is None:
|
| 1494 |
+
filt=np.array([fx,fy], dtype=np.float32)
|
| 1495 |
+
else:
|
| 1496 |
+
filt=(1-EMA_ALPHA_FINGER)*np.array(last_finger_xy,dtype=np.float32)+EMA_ALPHA_FINGER*np.array([fx,fy],dtype=np.float32)
|
| 1497 |
+
last_finger_xy=(int(filt[0]), int(filt[1]))
|
| 1498 |
+
finger_last_seen=now; finger_is_fresh=True; finger_src="YOLO"
|
| 1499 |
+
yolo_last_conf=float(det.get('conf',0.0))
|
| 1500 |
+
if USE_KLT_FALLBACK:
|
| 1501 |
+
klt_pts_prev=_klt_seed_ring(last_finger_xy)
|
| 1502 |
+
klt_lost_frames=0; #frames_since_reseed=0 삭제(8.20)
|
| 1503 |
+
if isinstance(det, dict) and det.get('yolo_in') is not None:
|
| 1504 |
+
yolo_last_in=det['yolo_in']
|
| 1505 |
+
if YOLO_SHOW_INPUT: cv2.imshow(YOLO_INPUT_WIN, yolo_last_in)
|
| 1506 |
+
if YOLO_DEBUG:
|
| 1507 |
+
rb=det.get('raw_boxes',[])
|
| 1508 |
+
yolo_box_count=len(rb)
|
| 1509 |
+
if YOLO_DRAW_ALL:
|
| 1510 |
+
for (x1,y1,x2,y2,conf,cls_id) in rb:
|
| 1511 |
+
cv2.rectangle(frame_disp,(int(x1),int(y1)),(int(x2),int(y2)),(0,200,255),1)
|
| 1512 |
+
cv2.putText(frame_disp,f"{conf:.2f}/{cls_id}",(int(x1),max(0,int(y1)-3)),
|
| 1513 |
+
cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,200,255),1,cv2.LINE_AA)
|
| 1514 |
+
# 수정5: KLT execution
|
| 1515 |
+
if USE_KLT_FALLBACK and not finger_is_fresh and (prev_gray_klt is not None) and (klt_pts_prev is not None):
|
| 1516 |
+
klt_xy, klt_pts_next = klt_track_multi(prev_gray_klt, gray_klt, klt_pts_prev, W, H)
|
| 1517 |
+
if klt_xy is not None:
|
| 1518 |
+
# KLT 추적 성공
|
| 1519 |
+
last_finger_xy = klt_xy
|
| 1520 |
+
klt_pts_prev = klt_pts_next
|
| 1521 |
+
klt_draw_pts = klt_pts_next # 화면 표시용
|
| 1522 |
+
|
| 1523 |
+
finger_last_seen = now
|
| 1524 |
+
finger_is_fresh = True # KLT가 찾았어도 'fresh'로 간주하여 OCR 등 후속 로직 실행
|
| 1525 |
+
finger_src = "KLT"
|
| 1526 |
+
klt_lost_frames = 0
|
| 1527 |
+
else:
|
| 1528 |
+
# KLT 추적 실패
|
| 1529 |
+
klt_lost_frames += 1
|
| 1530 |
+
if klt_lost_frames > KLT_LOSS_GRACE:
|
| 1531 |
+
klt_pts_prev = None # 추적점이 너무 오래되었으므로 초기화
|
| 1532 |
+
#
|
| 1533 |
+
# 수정7: KLT 단독 추적 시간제한
|
| 1534 |
+
KLT_TIMEOUT_SEC = 1.0
|
| 1535 |
+
if finger_src == "YOLO":
|
| 1536 |
+
klt_only_start_ts = 0.0 # YOLO가 잡았으면 타이머 리셋
|
| 1537 |
+
elif finger_src == "KLT":
|
| 1538 |
+
if klt_only_start_ts == 0.0:
|
| 1539 |
+
klt_only_start_ts = now # KLT 추적 시작, 타이머 개시
|
| 1540 |
+
# KLT 추적이 1초 이상 지속되면 포인트 무효화
|
| 1541 |
+
elif (now - klt_only_start_ts) > KLT_TIMEOUT_SEC:
|
| 1542 |
+
last_finger_xy = None # 손가락 좌표 삭제
|
| 1543 |
+
finger_is_fresh = False # tts 방지
|
| 1544 |
+
klt_pts_prev = None # klt execution 방지
|
| 1545 |
+
klt_only_start_ts = 0.0 # 타이머 리셋
|
| 1546 |
+
finger_src = "NONE"
|
| 1547 |
+
else: # "NONE"
|
| 1548 |
+
klt_only_start_ts = 0.0 # 아무것도 못 잡았으면 타이머 리셋
|
| 1549 |
+
|
| 1550 |
+
# ---- 모드 분기 ----
|
| 1551 |
+
with mode_lock:
|
| 1552 |
+
mode_now = mode_state
|
| 1553 |
+
|
| 1554 |
+
# 제거1 --- 중복 키 입력 제거 ---
|
| 1555 |
+
|
| 1556 |
+
# ---- ROI & OCR + 근접 읽기 (OP 모드에서만) ----
|
| 1557 |
+
roi=None; protected_boxes=[]; protected_ids=[]
|
| 1558 |
+
if mode_now == MODE_OP and finger_is_fresh and (last_finger_xy is not None):
|
| 1559 |
+
fx, fy = last_finger_xy
|
| 1560 |
+
roi = clamp_rect(int(fx-ROI_W//2), int(fy-ROI_H//2), ROI_W, ROI_H, W, H)
|
| 1561 |
+
last_roi = roi
|
| 1562 |
+
last_roi_active_until = now + ROI_KEEPALIVE_GRACE_SEC
|
| 1563 |
+
|
| 1564 |
+
rx,ry,rw,rh=roi
|
| 1565 |
+
for it in overlays:
|
| 1566 |
+
c=poly_center(it['poly'])
|
| 1567 |
+
if (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh):
|
| 1568 |
+
it['expiry']=max(it.get('expiry', now), now + BASE_TTL)# 시간 연장 방식 통일 (8.20)
|
| 1569 |
+
|
| 1570 |
+
for it in overlays:
|
| 1571 |
+
bx,by,bw,bh=rect_from_poly(it['poly'])
|
| 1572 |
+
if fingertip_overlaps_box((fx,fy),(bx,by,bw,bh)):
|
| 1573 |
+
protected_boxes.append((bx,by,bw,bh))
|
| 1574 |
+
protected_ids.append(it.get('id'))
|
| 1575 |
+
it['expiry']=max(it.get('expiry', now), now + BASE_TTL)
|
| 1576 |
+
it['pin_until']=now+PIN_GRACE_SEC
|
| 1577 |
+
|
| 1578 |
+
# 근접 읽기(TTS) - OP 모드에서만
|
| 1579 |
+
overlap_items=[]
|
| 1580 |
+
for it in overlays:
|
| 1581 |
+
bx,by,bw,bh=rect_from_poly(it['poly'])
|
| 1582 |
+
if fingertip_overlaps_box((fx,fy),(bx,by,bw,bh)):
|
| 1583 |
+
overlap_items.append(it)
|
| 1584 |
+
near=None; bestd=1e9
|
| 1585 |
+
for it in overlap_items:
|
| 1586 |
+
c=poly_center(it['poly']); d=np.hypot(c[0]-fx, c[1]-fy)
|
| 1587 |
+
if d<bestd: bestd=d; near=it
|
| 1588 |
+
|
| 1589 |
+
if near is not None:
|
| 1590 |
+
txt=str(near.get('text','')).strip()
|
| 1591 |
+
conf=float(near.get('conf',0.0))
|
| 1592 |
+
speak_ok=(conf>=TTS_CONF) or (_has_korean(txt) and (conf>=TTS_CONF_FALLBACK))
|
| 1593 |
+
note=""
|
| 1594 |
+
low_conf=(_has_korean(txt) and conf<TTS_CONF_FALLBACK) or (not _has_korean(txt) and TTS_CONF)
|
| 1595 |
+
if low_conf: note=(note+f" | low-conf({conf:.2f})") if note else f"low-conf({conf:.2f})"
|
| 1596 |
+
say_txt=None
|
| 1597 |
+
if speak_ok and txt:
|
| 1598 |
+
dict_thr=DICT_THRESHOLD_LOWCONF if low_conf else DICT_THRESHOLD
|
| 1599 |
+
mapped, sc = map_to_dict_canon(txt, threshold=dict_thr)
|
| 1600 |
+
if mapped:
|
| 1601 |
+
say_txt=mapped; note=(note+f" | dict:{sc:.0f}") if note else f"dict:{sc:.0f}"
|
| 1602 |
+
elif not STRICT_DICT_ONLY:
|
| 1603 |
+
thr=JAMO_THRESHOLD_LOWCONF if low_conf else JAMO_THRESHOLD
|
| 1604 |
+
fixed, changed = correct_text(txt, threshold=thr)
|
| 1605 |
+
say_txt=fixed if changed else txt
|
| 1606 |
+
if changed: note=(note+" | spellfix") if note else "spellfix"
|
| 1607 |
+
if say_txt:
|
| 1608 |
+
set_tts_target(say_txt, note=note,force=True); tts_last_seen_target_ts=now #선점발화 (8.21)
|
| 1609 |
+
else:
|
| 1610 |
+
if STRICT_DICT_ONLY and (speak_ok and txt):
|
| 1611 |
+
note=(note+" | no-dict") if note else "no-dict"
|
| 1612 |
+
set_tts_target(None, note=note); tts_current_display=txt
|
| 1613 |
+
else:
|
| 1614 |
+
if (now - tts_last_seen_target_ts) > TTS_TARGET_STICKY_SEC:
|
| 1615 |
+
set_tts_target(None, note="")
|
| 1616 |
+
|
| 1617 |
+
# ---- OCR 스케줄 (OP 모드에서만) ----
|
| 1618 |
+
if OCR_ENABLED:
|
| 1619 |
+
want_period=BASE_OCR_PERIOD
|
| 1620 |
+
roi_labels=[it for it in overlays if (roi[0]<=poly_center(it['poly'])[0]<=roi[0]+roi[2]
|
| 1621 |
+
and roi[1]<=poly_center(it['poly'])[1]<=roi[1]+roi[3])]
|
| 1622 |
+
roi_moved_fast=(last_roi is None) or (iou(last_roi, roi) < 0.6)
|
| 1623 |
+
roi_empty=(len(roi_labels)==0)
|
| 1624 |
+
roi_stale=(len(roi_labels)>0 and all((now - it.get('time',now) > STALE_AGE_SEC) or
|
| 1625 |
+
(it.get('conf',0)<LOW_CONF_TH) for it in roi_labels))
|
| 1626 |
+
if roi_moved_fast or roi_empty or roi_stale:
|
| 1627 |
+
want_period=min(want_period, EXTRA_OCR_PERIOD)
|
| 1628 |
+
|
| 1629 |
+
if (now - last_ocr_time) >= want_period and task_q.qsize()==0:
|
| 1630 |
+
gx,gy,gw,gh=roi
|
| 1631 |
+
g_roi=gray[gy:gy+gh, gx:gx+gw]
|
| 1632 |
+
blur_ok=(variance_of_laplacian(g_roi)>=BLUR_VAR_THRESH) or roi_empty
|
| 1633 |
+
avg_step=math.hypot(tx_ema, ty_ema)/max(1e-6, FLOW_DS)
|
| 1634 |
+
if blur_ok and avg_step>MOTION_GATE_PX: blur_ok=False
|
| 1635 |
+
if blur_ok:
|
| 1636 |
+
rects_to_run=[roi]
|
| 1637 |
+
try:
|
| 1638 |
+
task_q.put_nowait({
|
| 1639 |
+
'frame_work': frame_for_ocr.copy(),
|
| 1640 |
+
'rects': rects_to_run,
|
| 1641 |
+
'roi': roi,
|
| 1642 |
+
'frame_idx': frame_idx,
|
| 1643 |
+
})
|
| 1644 |
+
last_ocr_time=now; last_roi=roi
|
| 1645 |
+
except queue.Full:
|
| 1646 |
+
pass
|
| 1647 |
+
|
| 1648 |
+
elif mode_now == MODE_OP and (last_roi is not None) and (now <= last_roi_active_until):
|
| 1649 |
+
# YOLO가 잠깐 끊겨도 최근 ROI 내부 항목들의 TTL을 유지/초기화
|
| 1650 |
+
rx,ry,rw,rh = last_roi
|
| 1651 |
+
for it in overlays:
|
| 1652 |
+
c = poly_center(it['poly'])
|
| 1653 |
+
if (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh):
|
| 1654 |
+
it['expiry'] = max(it.get('expiry', now), now + BASE_TTL)
|
| 1655 |
+
else:
|
| 1656 |
+
# INFO 모드에서는 근접 읽기/ROI OCR 모두 비활성화
|
| 1657 |
+
if mode_now == MODE_OP:
|
| 1658 |
+
pass
|
| 1659 |
+
|
| 1660 |
+
# 손가락이 사라진 뒤에도 근접 읽기가 남아 반복되는 것 방지
|
| 1661 |
+
if mode_now == MODE_OP and not finger_is_fresh: #손가락이 사라진 뒤에도 근접 읽기가 남아 반복되는것을 제거 (8.21)
|
| 1662 |
+
if (time.time() - tts_last_seen_target_ts) > TTS_TARGET_STICKY_SEC:
|
| 1663 |
+
set_tts_target(None, note="")
|
| 1664 |
+
|
| 1665 |
+
# ---- OCR 결과 병합 (OP 모드에서만) ----
|
| 1666 |
+
if mode_now == MODE_OP:
|
| 1667 |
+
try:
|
| 1668 |
+
while True:
|
| 1669 |
+
res=result_q.get_nowait()
|
| 1670 |
+
if 'dt_ms' in res:
|
| 1671 |
+
if OCR_EMA is None: OCR_EMA=res['dt_ms']
|
| 1672 |
+
else: OCR_EMA=(1-OCR_EMA_ALPHA)*OCR_EMA + OCR_EMA_ALPHA*res['dt_ms']
|
| 1673 |
+
if res.get('new_items'):
|
| 1674 |
+
def _T_from_to(a,b):
|
| 1675 |
+
if b<=a: return np.eye(3,dtype=np.float32)
|
| 1676 |
+
T=np.eye(3,dtype=np.float32)
|
| 1677 |
+
for (src,dst,M3) in sim_steps:
|
| 1678 |
+
if a < dst <= b: T = M3 @ T
|
| 1679 |
+
return T
|
| 1680 |
+
T_cap2now=_T_from_to(res.get('frame_idx',frame_idx), frame_idx)
|
| 1681 |
+
def _apply(poly, M3):
|
| 1682 |
+
P=poly.astype(np.float32)
|
| 1683 |
+
return (P @ M3[:2,:2].T) + M3[:2,2]
|
| 1684 |
+
roi_now=_rect_aabb_after_M(res['roi'], T_cap2now, W, H)
|
| 1685 |
+
new_items=[]
|
| 1686 |
+
for ni in res['new_items']:
|
| 1687 |
+
ni['poly']=_apply(ni['poly'], T_cap2now)
|
| 1688 |
+
bx,by,bw,bh=bbox_of_poly(ni['poly'])
|
| 1689 |
+
cx,cy=bx+bw/2, by+bh/2
|
| 1690 |
+
gx,gy,gw,gh=roi_now
|
| 1691 |
+
if gx<=cx<=gx+gw and gy<=cy<=gy+gh:
|
| 1692 |
+
new_items.append(ni)
|
| 1693 |
+
overlays=merge_update_overlays(overlays, new_items, roi_now, now_ts=time.time(),
|
| 1694 |
+
iou_th=MERGE_IOU_TH, center_dist_th=MERGE_CENTER_DIST)
|
| 1695 |
+
except queue.Empty:
|
| 1696 |
+
pass
|
| 1697 |
+
|
| 1698 |
+
# ---- GUIDE MODE tick (OP 모드에서만) ----
|
| 1699 |
+
if mode_now == MODE_OP:
|
| 1700 |
+
guide_tick(now, last_finger_xy if finger_is_fresh else None, overlays)
|
| 1701 |
+
|
| 1702 |
+
# ---- Prune & render ----
|
| 1703 |
+
now2=time.time()
|
| 1704 |
+
if (now2-last_prune) >= PRUNE_TIMEOUT_SEC: #prune 주기 변수화 (8.20)
|
| 1705 |
+
overlays = dedupe_same_text_overlays(overlays)
|
| 1706 |
+
active_roi = None
|
| 1707 |
+
if mode_now == MODE_OP:
|
| 1708 |
+
if finger_is_fresh and roi is not None:
|
| 1709 |
+
active_roi = roi
|
| 1710 |
+
elif (last_roi is not None) and (now2 <= last_roi_active_until):
|
| 1711 |
+
active_roi = last_roi
|
| 1712 |
+
overlays = prune_overlays(overlays, now2, active_roi=active_roi)
|
| 1713 |
+
last_prune=now2
|
| 1714 |
+
|
| 1715 |
+
if roi is not None and (mode_now == MODE_OP) and finger_is_fresh and last_finger_xy is not None:
|
| 1716 |
+
cv2.rectangle(frame_disp, (roi[0],roi[1]), (roi[0]+roi[2], roi[1]+roi[3]), (120,120,255), 1)
|
| 1717 |
+
|
| 1718 |
+
if last_finger_xy is not None:
|
| 1719 |
+
color=(0,255,0) if finger_src=="YOLO" else ((255,0,255) if finger_src=="KLT" else (160,160,160))
|
| 1720 |
+
cv2.circle(frame_disp, last_finger_xy, 9, color, -1)
|
| 1721 |
+
if finger_src=="KLT" and YOLO_DEBUG and klt_draw_pts is not None:
|
| 1722 |
+
for p in klt_draw_pts[:60]:
|
| 1723 |
+
cv2.circle(frame_disp, (int(p[0,0]), int(p[0,1])), 2, (180,0,180), -1)
|
| 1724 |
+
|
| 1725 |
+
# 안내 목표 강조(OP 모드에서만)
|
| 1726 |
+
if (mode_now == MODE_OP) and GUIDE_MODE and GUIDE_TARGET_ITEM is not None:
|
| 1727 |
+
highlight_guide_target(frame_disp, GUIDE_TARGET_ITEM)
|
| 1728 |
+
|
| 1729 |
+
# 오버레이 렌더링(OP 모드에서만)
|
| 1730 |
+
if mode_now == MODE_OP:
|
| 1731 |
+
draw_overlays(frame_disp, overlays, now2)
|
| 1732 |
+
|
| 1733 |
+
if SHOW_TTS_HINT:
|
| 1734 |
+
mode_txt = "MODE: OP" if mode_now == MODE_OP else f"MODE: INFO({int(INFO_PERIOD_SEC)}s)"
|
| 1735 |
+
l1=f"{mode_txt} | OCR: {'ON' if (OCR_ENABLED and mode_now==MODE_OP) else 'OFF'} | {OCR_ENGINE}"
|
| 1736 |
+
if OCR_EMA is not None and mode_now==MODE_OP: l1+=f" ~{int(OCR_EMA)} ms"
|
| 1737 |
+
l1+=f" TTS: {'ON' if TTS_ENABLE else 'OFF'}"
|
| 1738 |
+
src_txt=f"SRC: {finger_src}"
|
| 1739 |
+
if finger_src=="YOLO" and yolo_last_conf is not None: src_txt+=f" conf={yolo_last_conf:.2f}"
|
| 1740 |
+
if finger_src=="KLT" and klt_draw_pts is not None: src_txt+=f" klt_pts={len(klt_draw_pts)}"
|
| 1741 |
+
age_ms=int((now - finger_last_seen)*1000.0) if last_finger_xy is not None else -1
|
| 1742 |
+
if age_ms>=0: src_txt+=f" age={age_ms} ms"
|
| 1743 |
+
if yolo_box_count is not None: src_txt+=f" boxes={yolo_box_count}"
|
| 1744 |
+
roi_txt=f"ROI: {ROI_W}x{ROI_H} ([ ] width ; ' height)"
|
| 1745 |
+
|
| 1746 |
+
# <<< CHANGED: 현재 target 없더라도 마지막 발화 문구를 HUD에 유지
|
| 1747 |
+
say_txt = (tts_current_display.strip() or tts_last_spoken_text.strip())
|
| 1748 |
+
say_line=f"SAY: {say_txt}" if say_txt else "SAY: (none)"
|
| 1749 |
+
if tts_current_note: say_line+=f" [{tts_current_note}]"
|
| 1750 |
+
|
| 1751 |
+
guide_txt = f"GUIDE: {'ON' if (mode_now == MODE_OP and GUIDE_MODE) else 'OFF'}"
|
| 1752 |
+
if (mode_now == MODE_OP) and GUIDE_TARGET:
|
| 1753 |
+
guide_txt += f" target='{GUIDE_TARGET}'"
|
| 1754 |
+
if (mode_now == MODE_OP) and GUIDE_TARGET_ITEM is not None:
|
| 1755 |
+
cx, cy = map(int, _overlay_center(GUIDE_TARGET_ITEM))
|
| 1756 |
+
guide_txt += f" tgt@({cx},{cy})"
|
| 1757 |
+
|
| 1758 |
+
lines=[guide_txt, l1, src_txt, roi_txt, say_line]
|
| 1759 |
+
|
| 1760 |
+
img_rgb=cv2.cvtColor(frame_disp, cv2.COLOR_BGR2RGB)
|
| 1761 |
+
pil=Image.fromarray(img_rgb); draw=ImageDraw.Draw(pil)
|
| 1762 |
+
font_path=None
|
| 1763 |
+
for p in [r"C:\Windows\Fonts\malgun.ttf", r"C:\Windows\Fonts\NanumGothic.ttf",
|
| 1764 |
+
r"C:\Windows\Fonts\NotoSansCJKkr-Regular.otf",
|
| 1765 |
+
"/usr/share/fonts/truetype/noto/NotoSansCJKkr-Regular.ttc"]:
|
| 1766 |
+
if os.path.isfile(p): font_path=p; break
|
| 1767 |
+
font=ImageFont.truetype(font_path, 22) if font_path else ImageFont.load_default()
|
| 1768 |
+
|
| 1769 |
+
pad_x,pad_y,gap=10,8,4
|
| 1770 |
+
widths=[draw.textlength(s, font=font) for s in lines]
|
| 1771 |
+
tw=int(max(widths)) if widths else 0; lh=24
|
| 1772 |
+
th=lh*len(lines)+(len(lines)-1)*gap
|
| 1773 |
+
x0,y0=8,6
|
| 1774 |
+
bg=Image.new("RGBA",(tw+pad_x*2, th+pad_y*2),(0,0,0,180))
|
| 1775 |
+
pil.paste(bg,(x0,y0),bg)
|
| 1776 |
+
y=y0+pad_y
|
| 1777 |
+
for s in lines:
|
| 1778 |
+
draw.text((x0+pad_x,y), s, font=font, fill=(255,255,255), stroke_width=2, stroke_fill=(0,0,0))
|
| 1779 |
+
y+=lh+gap
|
| 1780 |
+
frame_disp[:]=cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
|
| 1781 |
+
|
| 1782 |
+
disp_scale=min(1.0, DISPLAY_MAX_W/float(W))
|
| 1783 |
+
vis=frame_disp if disp_scale==1.0 else cv2.resize(frame_disp, None, fx=disp_scale, fy=disp_scale, interpolation=cv2.INTER_AREA)
|
| 1784 |
+
cv2.imshow(WINDOW_NAME, vis)
|
| 1785 |
+
|
| 1786 |
+
key=cv2.waitKey(1)&0xFF
|
| 1787 |
+
if key==ord('q'): break
|
| 1788 |
+
elif key==ord('o'):
|
| 1789 |
+
OCR_ENABLED = not OCR_ENABLED
|
| 1790 |
+
drain_queue(task_q)
|
| 1791 |
+
last_ocr_time = 0.0 if OCR_ENABLED else time.time()
|
| 1792 |
+
print(f"[OCR] {'ENABLED' if OCR_ENABLED else 'DISABLED'}")
|
| 1793 |
+
elif key==ord('t'):
|
| 1794 |
+
SHOW_TTS_HINT = not SHOW_TTS_HINT
|
| 1795 |
+
print(f"[HUD] {'ON' if SHOW_TTS_HINT else 'OFF'}")
|
| 1796 |
+
elif key==ord('s'):
|
| 1797 |
+
TTS_ENABLE = not TTS_ENABLE
|
| 1798 |
+
set_tts_target(None, note="")
|
| 1799 |
+
print(f"[TTS] {'ENABLED' if TTS_ENABLE else 'DISABLED'}")
|
| 1800 |
+
elif key==ord('y'):
|
| 1801 |
+
YOLO_SHOW_INPUT = not YOLO_SHOW_INPUT
|
| 1802 |
+
if not YOLO_SHOW_INPUT:
|
| 1803 |
+
try: cv2.destroyWindow(YOLO_INPUT_WIN)
|
| 1804 |
+
except: pass
|
| 1805 |
+
print(f"[YOLO] INPUT PREVIEW {'ON' if YOLO_SHOW_INPUT else 'OFF'}")
|
| 1806 |
+
elif key==ord('p'):
|
| 1807 |
+
if yolo_last_in is not None:
|
| 1808 |
+
os.makedirs('yolo_inputs', exist_ok=True)
|
| 1809 |
+
fname=time.strftime("yolo_inputs/%Y%m%d_%H%M%S.png")
|
| 1810 |
+
cv2.imwrite(fname, yolo_last_in)
|
| 1811 |
+
print(f"[YOLO] saved input preview -> {fname}")
|
| 1812 |
+
else:
|
| 1813 |
+
print("[YOLO] no input to save yet")
|
| 1814 |
+
# --- ROI 크기 조절 ---
|
| 1815 |
+
elif key==ord('['): # width -
|
| 1816 |
+
ROI_W=max(MIN_ROI_W, ROI_W-40)
|
| 1817 |
+
elif key==ord(']'): # width +
|
| 1818 |
+
ROI_W=min(W, ROI_W+40)
|
| 1819 |
+
elif key==ord(';'): # height -
|
| 1820 |
+
ROI_H=max(MIN_ROI_H, ROI_H-30)
|
| 1821 |
+
elif key==ord("'"): # height +
|
| 1822 |
+
ROI_H=min(H, ROI_H+30)
|
| 1823 |
+
elif key==ord('r'): # reset ROI
|
| 1824 |
+
ROI_W, ROI_H = 420, 420
|
| 1825 |
+
print("[ROI] reset to 420x420")
|
| 1826 |
+
|
| 1827 |
+
# ---- 모드 전환 키 ---- ---- 모드 전환 키 ----
|
| 1828 |
+
elif key == ord('1'):
|
| 1829 |
+
_enter_op_mode()
|
| 1830 |
+
print("[MODE] OP")
|
| 1831 |
+
|
| 1832 |
+
elif key == ord('2'):
|
| 1833 |
+
_enter_info_mode()
|
| 1834 |
+
print("[MODE] INFO")
|
| 1835 |
+
|
| 1836 |
+
elif key == ord('3'):
|
| 1837 |
+
_enter_guide_mode()
|
| 1838 |
+
print("[MODE] GUIDE")
|
| 1839 |
+
|
| 1840 |
+
# ---- GUIDE mode keys ----
|
| 1841 |
+
elif key == ord('c'):
|
| 1842 |
+
GUIDE_TARGET = None
|
| 1843 |
+
GUIDE_TARGET_ITEM = None
|
| 1844 |
+
set_tts_target("목표를 취소했습니다.", note="guide cancel")
|
| 1845 |
+
print("[GUIDE] target cleared")
|
| 1846 |
+
|
| 1847 |
+
elif key == ord('v'):
|
| 1848 |
+
if mode_state == MODE_GUIDE:
|
| 1849 |
+
text = stt_listen_once(timeout=4, phrase_time_limit=4) if (USE_STT and _STT_OK) else None
|
| 1850 |
+
if text:
|
| 1851 |
+
print(f"[STT] heard: {text}")
|
| 1852 |
+
set_guide_target_from_text(text)
|
| 1853 |
+
else:
|
| 1854 |
+
print("[STT] no text")
|
| 1855 |
+
else:
|
| 1856 |
+
set_tts_target("먼저 3번을 눌러 안내 모드를 켜 주세요.", note="guide")
|
| 1857 |
+
|
| 1858 |
+
elif key == ord('f'):
|
| 1859 |
+
if mode_state == MODE_GUIDE:
|
| 1860 |
+
try:
|
| 1861 |
+
print("\n[GUIDE] 입력 예시: '세탁', '건조맞춤' ...")
|
| 1862 |
+
user_in = input("[GUIDE] 목표 단어 입력: ").strip()
|
| 1863 |
+
if user_in:
|
| 1864 |
+
set_guide_target_from_text(user_in)
|
| 1865 |
+
except Exception:
|
| 1866 |
+
pass
|
| 1867 |
+
else:
|
| 1868 |
+
set_tts_target("먼저 3번을 눌러 안내 모드를 켜 주세요.", note="guide")
|
| 1869 |
+
|
| 1870 |
+
|
| 1871 |
+
frame_idx+=1
|
| 1872 |
+
prev_gray_full=gray.copy()
|
| 1873 |
+
prev_gray_klt=gray_klt.copy()
|
| 1874 |
+
|
| 1875 |
+
# cleanup
|
| 1876 |
+
task_q.put(None)
|
| 1877 |
+
_info_stop.set()
|
| 1878 |
+
if TTS_ENABLE and 'tts' in globals() and tts:
|
| 1879 |
+
tts_stop.set()
|
| 1880 |
+
# tts_q.put(None) 삭제 (8.20)
|
| 1881 |
+
try: tts.close()
|
| 1882 |
+
except Exception: pass
|
| 1883 |
+
yolo_stop.set()
|
| 1884 |
+
cap.release()
|
| 1885 |
+
cv2.destroyAllWindows()
|
tts_reader.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tts_reader.py — import해서 쓰는 모듈 버전
|
| 2 |
+
import os, time, threading, queue, hashlib
|
| 3 |
+
from typing import Optional, Iterable
|
| 4 |
+
import pygame
|
| 5 |
+
from google.cloud import texttospeech
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _is_korean(s: str) -> bool:
|
| 9 |
+
return any('가' <= ch <= '힣' for ch in (s or ""))
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TTSReader:
|
| 13 |
+
"""
|
| 14 |
+
- say(text): 비동기 합성+재생 (메인 루프 non-blocking)
|
| 15 |
+
- 같은 문구 과도 반복 방지(cooldown_sec)
|
| 16 |
+
- 텍스트별 mp3 캐시(tts_cache/)로 재사용
|
| 17 |
+
- 한/영 자동 보이스 선택
|
| 18 |
+
- ignore/min_len로 노이즈 필터 가능
|
| 19 |
+
- credentials_path를 넘기지 않으면 GOOGLE_APPLICATION_CREDENTIALS 환경변수 사용
|
| 20 |
+
"""
|
| 21 |
+
def __init__(
|
| 22 |
+
self,
|
| 23 |
+
*,
|
| 24 |
+
credentials_path: Optional[str] = None,
|
| 25 |
+
cache_dir: str = "tts_cache",
|
| 26 |
+
cooldown_sec: float = 1.2,
|
| 27 |
+
speaking_rate: float = 1.05,
|
| 28 |
+
pitch: float = 0.0,
|
| 29 |
+
ko_voice: str = "ko-KR-Standard-A",
|
| 30 |
+
en_voice: str = "en-US-Standard-C",
|
| 31 |
+
min_len: int = 2,
|
| 32 |
+
ignore: Optional[Iterable[str]] = None,
|
| 33 |
+
):
|
| 34 |
+
# 인증
|
| 35 |
+
if credentials_path:
|
| 36 |
+
self.client = texttospeech.TextToSpeechClient.from_service_account_file(credentials_path)
|
| 37 |
+
else:
|
| 38 |
+
self.client = texttospeech.TextToSpeechClient()
|
| 39 |
+
|
| 40 |
+
# 기본 필터
|
| 41 |
+
self.ignore = set(["", None, "None", "hand not detected", "hand detected, but ocr doesn't exist"])
|
| 42 |
+
if ignore:
|
| 43 |
+
self.ignore |= set(ignore)
|
| 44 |
+
self.min_len = min_len
|
| 45 |
+
|
| 46 |
+
# 보이스/오디오 설정
|
| 47 |
+
self.ko_voice = ko_voice
|
| 48 |
+
self.en_voice = en_voice
|
| 49 |
+
self.speaking_rate = speaking_rate
|
| 50 |
+
self.pitch = pitch
|
| 51 |
+
self.cooldown_sec = cooldown_sec
|
| 52 |
+
|
| 53 |
+
# 캐시
|
| 54 |
+
self.cache_dir = cache_dir
|
| 55 |
+
os.makedirs(self.cache_dir, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
# 상태
|
| 58 |
+
self.last_text = ""
|
| 59 |
+
self.last_time = 0.0
|
| 60 |
+
self._running = True
|
| 61 |
+
|
| 62 |
+
# 재생 스레드
|
| 63 |
+
self.q = queue.Queue()
|
| 64 |
+
if not pygame.mixer.get_init():
|
| 65 |
+
pygame.mixer.init()
|
| 66 |
+
target_fn = getattr(self, '_worker', None)
|
| 67 |
+
if target_fn is None:
|
| 68 |
+
# 안전장치: 동일 로직의 임시 워커 생성
|
| 69 |
+
def target_fn():
|
| 70 |
+
while self._running:
|
| 71 |
+
text = self.q.get()
|
| 72 |
+
if text is None:
|
| 73 |
+
break
|
| 74 |
+
try:
|
| 75 |
+
path = self._synth_if_needed(text)
|
| 76 |
+
self._play(path)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"[TTS] error: {e}")
|
| 79 |
+
self.worker = threading.Thread(target=target_fn, daemon=True)
|
| 80 |
+
self.worker.start()
|
| 81 |
+
|
| 82 |
+
# 컨텍스트 매니저 지원 (선택)
|
| 83 |
+
def __enter__(self):
|
| 84 |
+
return self
|
| 85 |
+
def __exit__(self, exc_type, exc, tb):
|
| 86 |
+
self.close()
|
| 87 |
+
|
| 88 |
+
def close(self):
|
| 89 |
+
"""앱 종료 시 호출(선택)."""
|
| 90 |
+
self._running = False
|
| 91 |
+
self.q.put(None)
|
| 92 |
+
try:
|
| 93 |
+
self.worker.join(timeout=2.0)
|
| 94 |
+
except Exception:
|
| 95 |
+
pass
|
| 96 |
+
# pygame.mixer.quit() # 앱 전체에서 mixer 공유 시 보통 유지
|
| 97 |
+
|
| 98 |
+
# ---------- public API ----------
|
| 99 |
+
def say(self, text: Optional[str]) -> bool:
|
| 100 |
+
"""
|
| 101 |
+
텍스트를 읽도록 큐에 추가. 스킵되면 False, 큐에 들어가면 True.
|
| 102 |
+
디바운스/필터/길이 조건을 통과해야 읽음.
|
| 103 |
+
"""
|
| 104 |
+
text = (text or "").strip()
|
| 105 |
+
# 추가 1: 한국어일 때만 읽기
|
| 106 |
+
if not _is_korean(text):
|
| 107 |
+
return False
|
| 108 |
+
|
| 109 |
+
if not text or text in self.ignore or len(text) < self.min_len:
|
| 110 |
+
return False
|
| 111 |
+
|
| 112 |
+
now = time.time()
|
| 113 |
+
if text == self.last_text and (now - self.last_time) < self.cooldown_sec:
|
| 114 |
+
return False
|
| 115 |
+
|
| 116 |
+
self.last_text = text
|
| 117 |
+
self.last_time = now
|
| 118 |
+
self.q.put(text)
|
| 119 |
+
return True
|
| 120 |
+
|
| 121 |
+
def say_if_close(self, text: Optional[str], distance: float, threshold: float = 100.0) -> bool:
|
| 122 |
+
"""
|
| 123 |
+
손가락-텍스트 거리가 threshold보다 가까울 때만 읽고 싶을 때 사용.
|
| 124 |
+
"""
|
| 125 |
+
if distance is None or distance >= threshold:
|
| 126 |
+
return False
|
| 127 |
+
return self.say(text)
|
| 128 |
+
# 추가 2: 큐 비우기
|
| 129 |
+
def clear_queue(self):
|
| 130 |
+
"""큐에 대기 중인 모든 TTS 요청을 비웁니다."""
|
| 131 |
+
with self.q.mutex:
|
| 132 |
+
self.q.queue.clear()
|
| 133 |
+
|
| 134 |
+
# ---------- internals ----------
|
| 135 |
+
def _voice(self, text: str):
|
| 136 |
+
if _is_korean(text):
|
| 137 |
+
return texttospeech.VoiceSelectionParams(language_code="ko-KR", name=self.ko_voice)
|
| 138 |
+
return texttospeech.VoiceSelectionParams(language_code="en-US", name=self.en_voice)
|
| 139 |
+
|
| 140 |
+
def _audio_cfg(self):
|
| 141 |
+
return texttospeech.AudioConfig(
|
| 142 |
+
audio_encoding=texttospeech.AudioEncoding.MP3,
|
| 143 |
+
speaking_rate=self.speaking_rate,
|
| 144 |
+
pitch=self.pitch,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
def _cache_path(self, text: str) -> str:
|
| 148 |
+
h = hashlib.sha1(text.encode("utf-8")).hexdigest()
|
| 149 |
+
return os.path.join(self.cache_dir, f"{h}.mp3")
|
| 150 |
+
|
| 151 |
+
def _synth_if_needed(self, text: str) -> str:
|
| 152 |
+
path = self._cache_path(text)
|
| 153 |
+
if not os.path.exists(path):
|
| 154 |
+
req = texttospeech.SynthesisInput(text=text)
|
| 155 |
+
resp = self.client.synthesize_speech(input=req, voice=self._voice(text), audio_config=self._audio_cfg())
|
| 156 |
+
with open(path, "wb") as f:
|
| 157 |
+
f.write(resp.audio_content)
|
| 158 |
+
return path
|
| 159 |
+
|
| 160 |
+
def _play(self, path: str):
|
| 161 |
+
pygame.mixer.music.load(path)
|
| 162 |
+
pygame.mixer.music.play()
|
| 163 |
+
while pygame.mixer.music.get_busy() and self._running:
|
| 164 |
+
time.sleep(0.03)
|
| 165 |
+
|
| 166 |
+
def _worker(self):
|
| 167 |
+
while self._running:
|
| 168 |
+
text = self.q.get()
|
| 169 |
+
if text is None:
|
| 170 |
+
break
|
| 171 |
+
try:
|
| 172 |
+
path = self._synth_if_needed(text)
|
| 173 |
+
self._play(path)
|
| 174 |
+
except Exception as e:
|
| 175 |
+
print(f"[TTS] error: {e}")
|
| 176 |
+
|
| 177 |
+
def stop(self):
|
| 178 |
+
try:
|
| 179 |
+
import pygame
|
| 180 |
+
pygame.mixer.music.stop()
|
| 181 |
+
except Exception:
|
| 182 |
+
pass
|
| 183 |
+
|
| 184 |
+
def cancel(self):
|
| 185 |
+
try: self.stop()
|
| 186 |
+
except Exception: pass
|
| 187 |
+
|
| 188 |
+
def flush(self):
|
| 189 |
+
try: self.stop()
|
| 190 |
+
except Exception: pass
|
| 191 |
+
|
| 192 |
+
def is_busy(self):
|
| 193 |
+
try:
|
| 194 |
+
import pygame
|
| 195 |
+
return pygame.mixer.music.get_busy()
|
| 196 |
+
except Exception:
|
| 197 |
+
return False
|