yugangee commited on
Commit
17f032f
·
verified ·
1 Parent(s): 6e529ca

Upload 4 files

Browse files
Files changed (4) hide show
  1. requirements.txt +0 -0
  2. seeing.py +671 -0
  3. total_mode.py +1885 -0
  4. tts_reader.py +197 -0
requirements.txt ADDED
Binary file (6.29 kB). View file
 
seeing.py ADDED
@@ -0,0 +1,671 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # seeing.py
2
+ # INFO 모드에서 현재 프레임을 분석해 "요약 문장"을 만들어주는 모듈.
3
+ # test (1).py의 최신 로직을 모두 포함하여 재구성되었습니다.
4
+
5
+ import os
6
+ import re
7
+ import math
8
+ import cv2
9
+ import numpy as np
10
+ from typing import List, Tuple, Dict, Any, Optional
11
+
12
+ # ==============================
13
+ # 모듈 레벨 설정 및 상수
14
+ # ==============================
15
+ SIDE_LEFT = ["통살균", "원격제어", "예약", "내마음"]
16
+ SIDE_RIGHT = ["터보샷", "구김방지", "알림음", "빨래추가"]
17
+ SIDE_EUCLID_MAX_REL = 0.08
18
+
19
+ CATEGORY_OPTIONS = {
20
+ "세탁": ["불림", "애벌세탁", "강력", "표준", "적은때"],
21
+ "헹굼": ["5회", "4", "3", "2", "1"],
22
+ "탈수": ["건조맞춤", "강", "중", "약", "섬세"],
23
+ "물온도": ["95", "60", "40", "30", "냉수"],
24
+ }
25
+ READOUT_ORDER = ("세탁", "헹굼", "탈수", "물온도")
26
+
27
+ LABEL_SYNONYMS = {
28
+ r"\s+": "",
29
+ r"[**()\[\]]": "",
30
+ r"^이?터보\s*샷?$": "터보샷",
31
+ r"\*?터보\s*샷": "터보샷",
32
+ r"\*?알림\s*음(?:\(3초\))?": "알림음",
33
+ r"Wi[\-\s]?Fi": "WiFi",
34
+ r"일회": "1회", r"이회": "2회", r"삼회": "3회", r"사회": "4회", r"오회": "5회",
35
+ r"95\s*℃|95도": "95", r"60\s*℃|60도": "60",
36
+ r"40\s*℃|40도": "40", r"30\s*℃|30도": "30",
37
+ }
38
+
39
+ SIDE_SET = set(SIDE_LEFT + SIDE_RIGHT)
40
+ CAT2SET = {k:set(v) for k,v in CATEGORY_OPTIONS.items()}
41
+ ALL_ALLOWED = SIDE_SET.union(*CAT2SET.values())
42
+
43
+ # --- 중앙 밴드 설정 ---
44
+ CENTER_BAND_PAD_REL = 0.06
45
+ CENTER_BAND_FALLBACK = (0.34, 0.66)
46
+ CENTER_RIGHT_MIN_PX = 6
47
+ CENTER_RIGHT_MIN_FRAC = 0.18
48
+
49
+ # --- 사이드 매칭 설정 ---
50
+ SIDE_COLW_REL = 0.08
51
+ SIDE_DMAX_REL = 0.25
52
+ SIDE_Y_GAP_MIN = 2
53
+ SIDE_Y_TOL_REL = 0.02
54
+
55
+
56
+ # === (ADD) 프레임 안정화기 ==========================================
57
+ class MotionStabilizer:
58
+ def __init__(self, downscale=0.5, ecc=True, homography=False,
59
+ max_iter=50, eps=1e-6):
60
+ import cv2
61
+ self.ds = float(downscale)
62
+ self.warp_mode = (cv2.MOTION_HOMOGRAPHY if homography
63
+ else (cv2.MOTION_EUCLIDEAN if ecc else cv2.MOTION_TRANSLATION))
64
+ self.max_iter = int(max_iter)
65
+ self.eps = float(eps)
66
+ self.prev_gray_ds = None # float32 [0..1]
67
+ self.homography = bool(homography)
68
+
69
+ def reset(self):
70
+ self.prev_gray_ds = None
71
+
72
+ def _ds(self, img):
73
+ if self.ds and self.ds < 1.0:
74
+ h, w = img.shape[:2]
75
+ return cv2.resize(img, (int(w*self.ds), int(h*self.ds)), interpolation=cv2.INTER_AREA)
76
+ return img
77
+
78
+ def _undscale_warp(self, M):
79
+ s = self.ds
80
+ if self.homography:
81
+ S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
82
+ Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
83
+ return Si @ M @ S
84
+ else:
85
+ A = np.eye(3, dtype=np.float32)
86
+ A[:2,:] = M
87
+ S = np.array([[s,0,0],[0,s,0],[0,0,1]], np.float32)
88
+ Si = np.array([[1/s,0,0],[0,1/s,0],[0,0,1]], np.float32)
89
+ A = Si @ A @ S
90
+ return A[:2,:]
91
+
92
+ def apply(self, frame_bgr):
93
+ import cv2
94
+ g = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
95
+ g_ds = self._ds(g)
96
+ g_ds_f = g_ds.astype(np.float32) / 255.0
97
+
98
+ if self.prev_gray_ds is None:
99
+ self.prev_gray_ds = g_ds_f
100
+ return frame_bgr
101
+
102
+ warp = (np.eye(3, dtype=np.float32) if self.homography
103
+ else np.eye(2, 3, dtype=np.float32))
104
+ criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
105
+ self.max_iter, self.eps)
106
+ try:
107
+ _, warp = cv2.findTransformECC(
108
+ templateImage=self.prev_gray_ds,
109
+ inputImage=g_ds_f,
110
+ warpMatrix=warp,
111
+ motionType=(cv2.MOTION_HOMOGRAPHY if self.homography else self.warp_mode),
112
+ criteria=criteria,
113
+ inputMask=None, gaussFiltSize=1
114
+ )
115
+ if self.homography:
116
+ Wf = self._undscale_warp(warp)
117
+ out = cv2.warpPerspective(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
118
+ flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
119
+ borderMode=cv2.BORDER_REPLICATE)
120
+ self.prev_gray_ds = cv2.warpPerspective(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
121
+ flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
122
+ borderMode=cv2.BORDER_REPLICATE)
123
+ else:
124
+ Wf = self._undscale_warp(warp)
125
+ out = cv2.warpAffine(frame_bgr, Wf, (frame_bgr.shape[1], frame_bgr.shape[0]),
126
+ flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
127
+ borderMode=cv2.BORDER_REPLICATE)
128
+ self.prev_gray_ds = cv2.warpAffine(g_ds_f, warp, (g_ds_f.shape[1], g_ds_f.shape[0]),
129
+ flags=cv2.INTER_LINEAR | cv2.WARP_INVERSE_MAP,
130
+ borderMode=cv2.BORDER_REPLICATE)
131
+ return out
132
+ except Exception:
133
+ self.prev_gray_ds = g_ds_f
134
+ return frame_bgr
135
+ # ===================================================================
136
+
137
+ # (ADD) 기본 토글/싱글톤
138
+ STABILIZE_DEFAULT = True # 기본 ON. 필요시 False로 끄기
139
+ _global_stabilizer = None
140
+
141
+ # ==============================
142
+ # 내부 헬퍼 함수들
143
+ # ==============================
144
+
145
+ def _canon_text(raw: str) -> str:
146
+ if not raw: return ""
147
+ s = str(raw)
148
+ for pat, rep in LABEL_SYNONYMS.items():
149
+ s = re.sub(pat, rep, s, flags=re.IGNORECASE)
150
+ m = re.fullmatch(r"([1-4])회", s)
151
+ if m:
152
+ s = m.group(1)
153
+ elif re.fullmatch(r"5", s):
154
+ s = "5회"
155
+ digits = re.sub(r"[^0-9]", "", s)
156
+ if digits and any(digits in v for v in CATEGORY_OPTIONS.values()):
157
+ s = digits if s != "5회" else "5회"
158
+ s = re.sub(r"[^0-9A-Za-z가-힣]", "", s)
159
+ return s
160
+
161
+ def _is_side_button(tok: str) -> bool:
162
+ return tok in SIDE_SET
163
+
164
+ def _which_category(tok: str):
165
+ for cat, opts in CAT2SET.items():
166
+ if tok in opts: return cat
167
+ return None
168
+
169
+ def _order_pts(pts):
170
+ rect = np.zeros((4, 2), dtype=np.float32)
171
+ s = pts.sum(axis=1); d = np.diff(pts, axis=1)
172
+ rect[0] = pts[np.argmin(s)]
173
+ rect[2] = pts[np.argmax(s)]
174
+ rect[1] = pts[np.argmin(d)]
175
+ rect[3] = pts[np.argmax(d)]
176
+ return rect
177
+
178
+ def _warp_points(H, pts_xy):
179
+ pts = np.asarray(pts_xy, dtype=np.float32).reshape(-1,1,2)
180
+ return cv2.perspectiveTransform(pts, H).reshape(-1,2)
181
+
182
+ def _map_rect_from_rectified(Hinv, x, y, w, h, offset=(0,0)):
183
+ corners = np.float32([[x,y], [x+w,y], [x+w,y+h], [x,y+h]])
184
+ mapped = _warp_points(Hinv, corners)
185
+ x1,y1 = mapped.min(axis=0); x2,y2 = mapped.max(axis=0)
186
+ ox, oy = offset
187
+ return int(x1+ox), int(y1+oy), int(x2-x1), int(y2-y1)
188
+
189
+ def _easyocr_to_items(detections):
190
+ items = []
191
+ for bbox, text, conf in detections:
192
+ quad = np.array(bbox, dtype=float)
193
+ xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
194
+ cx, cy = float(sum(xs)/4), float(sum(ys)/4)
195
+ xyxy = np.array([min(xs), min(ys), max(xs), max(ys)], dtype=float)
196
+ items.append({"text": text.strip(), "conf": float(conf),
197
+ "box": quad, "center": (cx, cy), "xyxy": xyxy})
198
+ return items
199
+
200
+ def _detect_panel_roi(img_bgr, v_pctl=35, bh_kernel=31, min_area_frac=0.08, ar_range=(1.1, 4.0), pad_frac=0.01):
201
+ h, w = img_bgr.shape[:2]
202
+ hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
203
+ V = hsv[:,:,2]
204
+ k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (bh_kernel, bh_kernel))
205
+ bh = cv2.morphologyEx(V, cv2.MORPH_BLACKHAT, k)
206
+ _, m_bh = cv2.threshold(bh, max(20, bh.mean() + 1.0*bh.std()), 255, cv2.THRESH_BINARY)
207
+ thr_dark = int(np.percentile(V, v_pctl))
208
+ m_dark = cv2.inRange(V, 0, thr_dark)
209
+ mask = cv2.bitwise_or(m_bh, m_dark)
210
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)), 2)
211
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)), 1)
212
+ cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
213
+ H, W = h, w
214
+ best = None
215
+ for c in cnts:
216
+ area = cv2.contourArea(c)
217
+ if area < min_area_frac * (H*W): continue
218
+ hull = cv2.convexHull(c)
219
+ x,y,wid,hei = cv2.boundingRect(hull)
220
+ ar = max(wid,hei) / max(1, min(wid,hei))
221
+ if not (ar_range[0] <= ar <= ar_range[1]): continue
222
+ if (best is None) or (area > best[0]):
223
+ best = (area, (x,y,wid,hei))
224
+ if best is None:
225
+ return (0,0,W,H), mask
226
+ x,y,wid,hei = best[1]
227
+ pad = int(pad_frac * max(H, W))
228
+ x0 = max(0, x - pad); y0 = max(0, y - pad)
229
+ x1 = min(W, x + wid + pad); y1 = min(H, y + hei + pad)
230
+ return (x0,y0,x1,y1), mask
231
+
232
+ def _deskew_panel_by_mask(panel_bgr, panel_mask_roi, min_quad_area_frac=0.05):
233
+ h, w = panel_bgr.shape[:2]
234
+ cnts, _ = cv2.findContours(panel_mask_roi, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
235
+ if not cnts: return panel_bgr, None
236
+ c = max(cnts, key=cv2.contourArea)
237
+ if cv2.contourArea(c) < (min_quad_area_frac * h * w):
238
+ return panel_bgr, None
239
+ peri = cv2.arcLength(c, True)
240
+ approx = cv2.approxPolyDP(c, 0.02 * peri, True)
241
+ src = approx.reshape(4,2).astype(np.float32) if len(approx) == 4 else cv2.boxPoints(cv2.minAreaRect(c)).astype(np.float32)
242
+ src = _order_pts(src)
243
+ (tl, tr, br, bl) = src
244
+ Wt = int(max(np.linalg.norm(br-bl), np.linalg.norm(tr-tl))); Wt = max(100, Wt)
245
+ Ht = int(max(np.linalg.norm(tr-br), np.linalg.norm(tl-bl))); Ht = max(100, Ht)
246
+ dst = np.array([[0,0],[Wt-1,0],[Wt-1,Ht-1],[0,Ht-1]], dtype=np.float32)
247
+ H = cv2.getPerspectiveTransform(src, dst)
248
+ warped = cv2.warpPerspective(panel_bgr, H, (Wt, Ht), flags=cv2.INTER_CUBIC)
249
+ return warped, H
250
+
251
+ def _build_glare_mask(panel_bgr, v_thr=235, s_thr=45, lap_var_thr=25.0, min_area_rel=1e-4, max_area_rel=2e-2, ar_min=3.0, close_ks=5, open_ks=3, dil_ks=3):
252
+ h, w = panel_bgr.shape[:2]
253
+ hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
254
+ H, S, V = cv2.split(hsv)
255
+ m_hi = (V >= v_thr) & (S <= s_thr)
256
+ m = (m_hi.astype(np.uint8) * 255)
257
+ m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(close_ks,close_ks)), 1)
258
+ m = cv2.morphologyEx(m, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(open_ks,open_ks)), 1)
259
+ area_img = float(h*w)
260
+ out = np.zeros_like(m, dtype=np.uint8)
261
+ num, lab, stats, _ = cv2.connectedComponentsWithStats(m, 8)
262
+ gray = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2GRAY)
263
+ for i in range(1, num):
264
+ x,y,wid,hei,area = stats[i]
265
+ rel = area/area_img
266
+ if rel < min_area_rel or rel > max_area_rel: continue
267
+ ar = max(wid,hei)/max(1, min(wid,hei))
268
+ if ar < ar_min: continue
269
+ crop = gray[y:y+hei, x:x+wid]
270
+ if cv2.Laplacian(crop, cv2.CV_64F).var() > lap_var_thr: continue
271
+ out[lab==i] = 255
272
+ out = cv2.dilate(out, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(dil_ks,dil_ks)), 1)
273
+ ratio = out.sum() / 255.0 / area_img
274
+ return out, float(ratio)
275
+
276
+ def _apply_deglare_toneclip(panel_bgr, glare_mask, ring_px=3, add_v=18):
277
+ hsv = cv2.cvtColor(panel_bgr, cv2.COLOR_BGR2HSV)
278
+ H, S, V = cv2.split(hsv)
279
+ dil = cv2.dilate(glare_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ring_px*2+1, ring_px*2+1)), 1)
280
+ ring = cv2.bitwise_and(dil, cv2.bitwise_not(glare_mask))
281
+ if cv2.countNonZero(ring) == 0:
282
+ return panel_bgr
283
+ ring_med = int(np.median(V[ring.astype(bool)]))
284
+ cap = np.clip(ring_med + add_v, 0, 255).astype(np.uint8)
285
+ V2 = V.copy()
286
+ V2[glare_mask.astype(bool)] = np.minimum(V2[glare_mask.astype(bool)], cap)
287
+ return cv2.cvtColor(cv2.merge([H,S,V2]), cv2.COLOR_HSV2BGR)
288
+
289
+ def _ocr_with_deglare_when_needed(panel_rect_bgr, reader, area_gate=0.002):
290
+ det_orig = reader.readtext(panel_rect_bgr)
291
+ m_gl, ratio = _build_glare_mask(panel_rect_bgr)
292
+ if ratio < area_gate:
293
+ return det_orig
294
+ degl = _apply_deglare_toneclip(panel_rect_bgr, m_gl)
295
+ det_degl = reader.readtext(degl)
296
+ def _score(dets):
297
+ return sum(c for _,_,c in dets) + 0.3*sum(1 for _,t,_ in dets if len(re.sub(r"[^가-힣0-9]","",t))>0)
298
+ return det_degl if _score(det_degl) >= 0.85 * _score(det_orig) else det_orig
299
+
300
+ def _build_text_mask_from_easyocr(detections, shape_hw, dilate_px=2):
301
+ H, W = shape_hw[:2]
302
+ mask = np.zeros((H, W), np.uint8)
303
+ if not detections: return mask
304
+ polys = [np.array(bbox, dtype=np.int32).reshape(-1, 1, 2) for bbox, _, _ in detections]
305
+ if polys:
306
+ cv2.fillPoly(mask, polys, 255)
307
+ if dilate_px > 0:
308
+ k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px*2+1, dilate_px*2+1))
309
+ mask = cv2.dilate(mask, k, 1)
310
+ return mask
311
+
312
+ def _auto_led_params_simple(shape, k_frac=0.015, area_lo_frac=1e-5, area_hi_frac=1.5e-3):
313
+ h, w = shape[:2]
314
+ long_side = max(h, w)
315
+ k_auto = int(round(long_side * k_frac))
316
+ if k_auto % 2 == 0: k_auto += 1
317
+ k_auto = max(5, min(k_auto, 31))
318
+ min_area = max(6, int(h * w * area_lo_frac))
319
+ max_area = max(min_area+1, int(h * w * area_hi_frac))
320
+ return k_auto, min_area, max_area
321
+
322
+ def _detect_leds_glare_core(img_bgr, k=None, sigma=2.3, ring_px=7, ring_v_thr=200, core_s_thr_bg=78, dv_thr_bg=45, strict_aspect=(2.0, 4.2), strict_extent=0.64, strict_solidity=0.80, include_white=False, exclude_mask=None, dv_thr_any=35, min_short_px=10, min_area_abs=40):
323
+ def _masked_mean_median(img_gray, mask_bool):
324
+ vals = img_gray[mask_bool]
325
+ return (float(vals.mean()), float(np.median(vals))) if vals.size > 0 else (0.0, 0.0)
326
+ k_auto, min_area, max_area = _auto_led_params_simple(img_bgr.shape)
327
+ if not k or k <= 0: k = k_auto
328
+ g = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
329
+ g_eq = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(g)
330
+ Hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
331
+ H,S,V = cv2.split(Hsv)
332
+ se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
333
+ tophat = cv2.morphologyEx(g_eq, cv2.MORPH_TOPHAT, se)
334
+ m, s = float(tophat.mean()), float(tophat.std())
335
+ _, seed_th = cv2.threshold(tophat, np.clip(m + sigma*s, 40, 240), 255, cv2.THRESH_BINARY)
336
+ _, seed_v = cv2.threshold(V, 210, 255, cv2.THRESH_BINARY)
337
+ seed = cv2.bitwise_or(seed_th, seed_v)
338
+ m_color = (cv2.inRange(H, 35, 85) | cv2.inRange(H, 90, 140)) & (cv2.inRange(S, 50, 255) & cv2.inRange(V, 160, 255))
339
+ if include_white: m_color |= (cv2.inRange(S, 0, 60) & cv2.inRange(V, 200, 255))
340
+ reinforced = cv2.bitwise_and(seed, cv2.dilate(m_color, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1))
341
+ ratio = (cv2.countNonZero(reinforced) / float(max(1, cv2.countNonZero(seed)))) if cv2.countNonZero(seed)>0 else 0.0
342
+ core = reinforced if ratio >= 0.3 else seed
343
+ if exclude_mask is not None:
344
+ core = cv2.bitwise_and(core, cv2.bitwise_not(exclude_mask))
345
+ core = cv2.medianBlur(core, 3)
346
+ core = cv2.morphologyEx(core, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)), 1)
347
+ core = cv2.morphologyEx(core, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)), 1)
348
+ num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
349
+ for i in range(1, num):
350
+ if (min_short_px and stats[i,3] < min_short_px) or not (max(min_area, min_area_abs) <= stats[i,4] <= max_area):
351
+ core[lab == i] = 0
352
+ num, lab, stats, cents = cv2.connectedComponentsWithStats(core, 8)
353
+ leds, ring_kernel = [], cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (ring_px*2+1, ring_px*2+1))
354
+ for i in range(1, num):
355
+ x,y,wid,hei,area = stats[i]
356
+ aspect = max(wid,hei) / max(1, min(wid,hei))
357
+ if aspect > 6.5: continue
358
+ comp_mask = (lab == i)
359
+ dil = cv2.dilate(comp_mask.astype(np.uint8), ring_kernel, 1).astype(bool)
360
+ ring_mask = np.logical_and(dil, np.logical_not(comp_mask))
361
+ core_v_mean, _ = _masked_mean_median(V, comp_mask)
362
+ _, ring_med = _masked_mean_median(V, ring_mask)
363
+ if (core_v_mean - ring_med) < dv_thr_any: continue
364
+ if ring_med >= ring_v_thr:
365
+ cnts, _ = cv2.findContours((comp_mask.astype(np.uint8) * 255), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
366
+ if cnts:
367
+ c = max(cnts, key=cv2.contourArea)
368
+ solidity = cv2.contourArea(c) / max(1.0, cv2.contourArea(cv2.convexHull(c)))
369
+ extent = area / float(max(1, wid*hei))
370
+ if not (strict_aspect[0] <= aspect <= strict_aspect[1]) or extent < strict_extent or solidity < strict_solidity:
371
+ continue
372
+ leds.append((int(x), int(y), int(wid), int(hei), (float(cents[i][0]), float(cents[i][1])), float(core_v_mean)))
373
+ return leds
374
+
375
+ def _norm_ko(s: str) -> str:
376
+ return re.sub(r"\s+", "", s or "")
377
+
378
+ def _find_category_anchors(items):
379
+ anchors = {}
380
+ for it in items:
381
+ raw = it["text"]; norm = _norm_ko(raw)
382
+ for cat in CATEGORY_OPTIONS.keys():
383
+ if cat in norm:
384
+ x1,y1,x2,y2 = it["xyxy"]; h = (y2 - y1); area = (x2 - x1) * h
385
+ prev = anchors.get(cat)
386
+ if not prev or (h > prev.get("_h", -1)) or (h == prev.get("_h", -1) and area > prev.get("_a", -1)):
387
+ anchors[cat] = {"center": it["center"], "xyxy": it["xyxy"], "_h": h, "_a": area}
388
+ for cat in anchors:
389
+ anchors[cat].pop("_h", None); anchors[cat].pop("_a", None)
390
+ return anchors
391
+
392
+ def _compute_center_band(items, img_shape):
393
+ H, W = img_shape[:2]
394
+ xs = [x for it in items if any(cat in _norm_ko(it["text"]) for cat in CATEGORY_OPTIONS.keys()) for x in (it["xyxy"][0], it["xyxy"][2])]
395
+ if len(xs) >= 2:
396
+ left = max(0.0, min(xs) - CENTER_BAND_PAD_REL * W)
397
+ right = min(float(W), max(xs) + CENTER_BAND_PAD_REL * W)
398
+ else:
399
+ left, right = CENTER_BAND_FALLBACK[0] * W, CENTER_BAND_FALLBACK[1] * W
400
+ return float(left), float(right)
401
+
402
+ def _match_leds_to_texts(items, leds, img_shape, dmax_px=None, rel_gate=1.1, x_orient_eps=4, y_orient_eps=0):
403
+ Hh, Ww = img_shape[:2]
404
+ dmax_px = dmax_px or max(50, int(0.065 * max(Hh, Ww)))
405
+ band_left, band_right = _compute_center_band(items, img_shape)
406
+ side_colw, side_dmax, side_y_tol, side_eucl_max = SIDE_COLW_REL*max(Hh,Ww), SIDE_DMAX_REL*max(Hh,Ww), SIDE_Y_TOL_REL*Hh, SIDE_EUCLID_MAX_REL*max(Hh,Ww)
407
+ choices = []
408
+ for li, (_x,_y,_w,_h,(cx, cy),bright) in enumerate(leds):
409
+ best_cand = None
410
+ for ti, it in enumerate(items):
411
+ tx, ty, tw, th, raw, x1, *_ = it["center"][0], it["center"][1], it["xyxy"][2]-it["xyxy"][0], it["xyxy"][3]-it["xyxy"][1], it["text"], it["xyxy"][0]
412
+ tok = _canon_text(raw)
413
+ if not tok or tok not in ALL_ALLOWED: continue
414
+ dist = 0
415
+ if _is_side_button(tok):
416
+ # LED는 중앙 밴드 '밖'에 있어야 함
417
+ if (cx < band_left or cx > band_right) and ty >= cy - side_y_tol and abs(tx - cx) <= max(side_colw, 0.5*tw):
418
+ dist = max(0.0, ty - cy) + 0.3 * abs(tx - cx)
419
+ if dist > side_dmax or math.hypot(tx - cx, ty - cy) > side_eucl_max:
420
+ continue
421
+
422
+ else:
423
+ if band_left <= cx <= band_right and band_left <= tx <= band_right and abs(ty-cy) <= max(y_orient_eps, 0.6*th) and x1 >= cx + max(CENTER_RIGHT_MIN_PX, CENTER_RIGHT_MIN_FRAC*tw):
424
+ dist = math.hypot(tx-cx, ty-cy)
425
+ if dist > dmax_px: continue
426
+ if dist > 0 and (not best_cand or dist < best_cand[0]):
427
+ best_cand = (dist, ti, tok)
428
+ if best_cand:
429
+ dist, ti, tok = best_cand
430
+ choices.append((dist, li, ti, tok, float(bright), tuple(items[ti]["center"]), (cx,cy)))
431
+ choices.sort(key=lambda x: x[0])
432
+ used_led, used_txt, pairs_led = set(), set(), []
433
+ for d, li, ti, tok, bri, ptxt, pled in choices:
434
+ if li not in used_led and ti not in used_txt:
435
+ used_led.add(li); used_txt.add(ti)
436
+ pairs_led.append((ptxt, pled, tok, li, bri))
437
+ pairs_led.sort(key=lambda p: (int(p[1][1] // 30), p[1][0]))
438
+ return [p[2] for p in pairs_led], pairs_led
439
+
440
+ def _choose_and_enforce_categories(pairs_led, items, leds, img_shape, cw_rel=0.06, dmax_rel=0.20, fill_default=None):
441
+ H, W = img_shape[:2]; L = max(H, W)
442
+ colw, dmax = cw_rel * L, dmax_rel * L
443
+ picked = {}
444
+ bucket = {cat: [] for cat in CATEGORY_OPTIONS.keys()}
445
+ for _, _, tok, li, bri in pairs_led:
446
+ cat = _which_category(tok)
447
+ if cat: bucket[cat].append((tok, bri, li))
448
+ for cat, arr in bucket.items():
449
+ if arr: picked[cat] = max(arr, key=lambda x: x[1])[0]
450
+ anchors = _find_category_anchors(items)
451
+ for cat in CATEGORY_OPTIONS:
452
+ if cat in picked: continue
453
+ a = anchors.get(cat)
454
+ if a:
455
+ ax, ay = a["center"]
456
+ cand_leds = sorted([ (bri, idx) for idx, (*_, (cx,cy), bri) in enumerate(leds) if abs(cx-ax)<=colw and cy>=ay-2 ], reverse=True)
457
+ if cand_leds:
458
+ led_center = leds[cand_leds[0][1]][4]
459
+ best_tok, best_d = None, dmax
460
+ for it in items:
461
+ tok = _canon_text(it["text"])
462
+ if tok in CAT2SET[cat]:
463
+ tx, ty = it["center"]
464
+ if abs(tx - ax) <= colw and ty >= ay - 2:
465
+ d = math.hypot(tx - led_center[0], ty - led_center[1])
466
+ if d < best_d: best_d, best_tok = d, tok
467
+ picked[cat] = best_tok or (fill_default.get(cat) if fill_default else "미확인")
468
+ return picked
469
+
470
+ # --- 카테고리 진단 상태 (ON / TXT_ONLY / NO_TXT) 빌더 ---
471
+ STATE_KR = {"ON":"확인됨", "TXT_ONLY":"텍스트만", "NO_TXT":"텍스트없음"}
472
+
473
+ def _build_category_status(items, pairs_led):
474
+ """
475
+ 각 카테고리에 대해 OCR 인식/LED 매칭 상태를 진단.
476
+ 반환: {cat: {"picked": 토큰 또는 "미확인",
477
+ "state": "ON" | "TXT_ONLY" | "NO_TXT"}}
478
+ """
479
+ # 1) OCR로 읽힌 후보 수집
480
+ ocr_tokens_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
481
+ for it in items:
482
+ tok = _canon_text(it.get("text",""))
483
+ cat = _which_category(tok)
484
+ if cat:
485
+ ocr_tokens_by_cat[cat].add(tok)
486
+
487
+ # 2) LED-텍스트 매칭으로 확정된 것들 수집
488
+ led_matched_by_cat = {cat: set() for cat in CATEGORY_OPTIONS.keys()}
489
+ for _,_,tok,_,_ in pairs_led:
490
+ cat = _which_category(tok)
491
+ if cat:
492
+ led_matched_by_cat[cat].add(tok)
493
+
494
+ # 3) 상태 구성
495
+ status = {}
496
+ for cat in CATEGORY_OPTIONS.keys():
497
+ if led_matched_by_cat[cat]:
498
+ picked = sorted(list(led_matched_by_cat[cat]))[0]
499
+ state = "ON"
500
+ elif ocr_tokens_by_cat[cat]:
501
+ picked = "미확인"
502
+ state = "TXT_ONLY"
503
+ else:
504
+ picked = "미확인"
505
+ state = "NO_TXT"
506
+ status[cat] = {"picked": picked, "state": state}
507
+ return status
508
+
509
+ def _compose_readout(cat_map, side_on, order=READOUT_ORDER,
510
+ diag_status: Dict[str, Dict[str,str]] = None,
511
+ state_labels: Dict[str,str] = STATE_KR):
512
+ """
513
+ diag_status가 있으면 각 카테고리 뒤에 (상태)를 붙임.
514
+ 상태: ON | TXT_ONLY | NO_TXT (한국어 꼬리표는 STATE_KR로 매핑)
515
+ """
516
+ parts = []
517
+ for k in order:
518
+ val = cat_map.get(k, "미확인")
519
+ if diag_status and k in diag_status:
520
+ st = diag_status[k]["state"]
521
+ tail = state_labels.get(st, st) if state_labels else st
522
+ parts.append(f"{k} {val}({tail})")
523
+ else:
524
+ parts.append(f"{k} {val}")
525
+ cat_sentence = ", ".join(parts)
526
+ side_sentence = " / ".join(side_on) if side_on else ""
527
+ final_parts = [p for p in (cat_sentence, side_sentence) if p]
528
+ return ", ".join(final_parts) if final_parts else "켜진 표시 없음"
529
+
530
+ # ==============================
531
+ # 최종 요약 진입점
532
+ # ==============================
533
+ def summarize_scene(frame_bgr: np.ndarray, reader,
534
+ do_pic=True, debug_font=None,
535
+ debug_dir: Optional[str]=None,
536
+ diagnostic: bool=False,
537
+ # === (ADD) 안정화 옵션 ===
538
+ stabilize: Optional[bool]=None,
539
+ stabilizer: Optional[MotionStabilizer]=None) -> str:
540
+ """
541
+ 현재 프레임(frame_bgr)을 분석하여, '조작 패널 상태'에 대한 한국어 요약 문장을 반환.
542
+ """
543
+ try:
544
+ # === (ADD) 안정화 프레임 선택 ===
545
+ use_stab = STABILIZE_DEFAULT if (stabilize is None) else bool(stabilize)
546
+ frame_in = frame_bgr
547
+ if use_stab:
548
+ global _global_stabilizer
549
+ st = stabilizer or _global_stabilizer
550
+ if st is None:
551
+ st = MotionStabilizer(downscale=0.5, ecc=True, homography=False)
552
+ _global_stabilizer = st
553
+ frame_in = st.apply(frame_bgr)
554
+
555
+ # 1. 패널 ROI 탐지 및 정사영 변환 (frame_in 사용)
556
+ (x0,y0,x1,y1), panel_mask_full = _detect_panel_roi(frame_in)
557
+ panel_bgr = frame_in[y0:y1, x0:x1].copy()
558
+ panel_mask_roi = panel_mask_full[y0:y1, x0:x1].copy()
559
+ panel_rect, H = _deskew_panel_by_mask(panel_bgr, panel_mask_roi)
560
+ Hinv = np.linalg.inv(H) if H is not None else None
561
+
562
+ # 2. OCR (필요 시 디글레어 포함)
563
+ result_panel = _ocr_with_deglare_when_needed(panel_rect, reader)
564
+ items_local = _easyocr_to_items(result_panel)
565
+
566
+ # 3. 텍스트 마스크 생성 및 LED 탐지
567
+ text_mask_local = _build_text_mask_from_easyocr(result_panel, panel_rect.shape[:2])
568
+ leds_local = _detect_leds_glare_core(
569
+ panel_rect, k=15, sigma=2.0, include_white=True,
570
+ exclude_mask=text_mask_local, dv_thr_any=22, min_short_px=10, min_area_abs=40
571
+ )
572
+
573
+ # 4. OCR/LED 결과를 원본 좌표계로 복원 (frame_in 기준 좌표)
574
+ items = []
575
+ if Hinv is not None:
576
+ for it in items_local:
577
+ mapped = _warp_points(Hinv, it["box"]) + np.array([x0, y0])
578
+ xs, ys = mapped[:,0], mapped[:,1]
579
+ items.append({"text": it["text"], "conf": it["conf"], "box": mapped.tolist(),
580
+ "center": (xs.mean(), ys.mean()),
581
+ "xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
582
+ else:
583
+ for it in items_local:
584
+ bx = np.array(it["box"]) + np.array([x0, y0])
585
+ xs, ys = bx[:,0], bx[:,1]
586
+ items.append({"text": it["text"], "conf": it["conf"], "box": bx.tolist(),
587
+ "center": (xs.mean(), ys.mean()),
588
+ "xyxy": np.array([xs.min(), ys.min(), xs.max(), ys.max()])})
589
+
590
+ leds = []
591
+ if Hinv is not None:
592
+ for (x,y,w,h,c,b) in leds_local:
593
+ gx,gy,gw,gh = _map_rect_from_rectified(Hinv, x,y,w,h, offset=(x0,y0))
594
+ gcx, gcy = (_warp_points(Hinv, [c]) + np.array([x0, y0]))[0]
595
+ leds.append((gx,gy,gw,gh, (gcx, gcy), b))
596
+ else:
597
+ for (x,y,w,h,c,b) in leds_local:
598
+ leds.append((x+x0, y+y0, w,h, (c[0]+x0, c[1]+y0), b))
599
+
600
+ # 5. LED-텍스트 매칭 (frame_in.shape 사용)
601
+ led_tokens, pairs_led = _match_leds_to_texts(items, leds, frame_in.shape)
602
+
603
+ # 6. 카테고리별 최종 선택 및 문장 생성
604
+ cat_map = _choose_and_enforce_categories(pairs_led, items, leds, frame_in.shape)
605
+
606
+ # === 추가: 카테고리 상태(ON/TXT_ONLY/NO_TXT) ===
607
+ diag_status = _build_category_status(items, pairs_led) if diagnostic else None
608
+
609
+ side_on = sorted(list(set(tok for _,_,tok,_,_ in pairs_led if _is_side_button(tok))))
610
+ final_text = _compose_readout(cat_map, side_on, diag_status=diag_status)
611
+
612
+ if do_pic:
613
+ try:
614
+ import time as _time
615
+ from PIL import Image, ImageDraw, ImageFont
616
+ out_dir = debug_dir or os.path.join(os.getcwd(), "debug_summaries")
617
+ os.makedirs(out_dir, exist_ok=True)
618
+
619
+ vis = frame_in.copy() # (CHANGE) 안정화된 프레임로 시각화
620
+
621
+ for it in items:
622
+ poly = np.array(it["box"], dtype=np.int32)
623
+ cv2.polylines(vis, [poly], True, (0, 255, 0), 2, cv2.LINE_AA)
624
+ for (x, y, w, h, (cx, cy), bri) in leds:
625
+ cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 165, 0), 2)
626
+ cv2.circle(vis, (int(cx), int(cy)), 3, (255, 165, 0), -1)
627
+
628
+ def _pick_kr_font(size=20, font_path=None):
629
+ cands = [
630
+ font_path,
631
+ r"C:\Windows\Fonts\malgun.ttf",
632
+ r"C:\Windows\Fonts\malgunbd.ttf",
633
+ "/System/Library/Fonts/AppleSDGothicNeo.ttc",
634
+ "/usr/share/fonts/truetype/nanum/NanumGothic.ttf",
635
+ "/usr/share/fonts/truetype/noto/NotoSansKR-Regular.ttf",
636
+ "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
637
+ ]
638
+ for p in cands:
639
+ if p and os.path.exists(p):
640
+ try:
641
+ return ImageFont.truetype(p, size)
642
+ except Exception:
643
+ pass
644
+ return ImageFont.load_default()
645
+
646
+ pil = Image.fromarray(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
647
+ draw = ImageDraw.Draw(pil)
648
+ font = _pick_kr_font(size=20, font_path=debug_font)
649
+
650
+ for it in items:
651
+ x1, y1 = int(it["xyxy"][0]), int(it["xyxy"][1])
652
+ label = f"{it.get('text','')} ({it.get('conf',0.0):.2f})"
653
+ bbox = draw.textbbox((0, 0), label, font=font)
654
+ tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
655
+ draw.rectangle([x1, y1 - th - 6, x1 + tw + 8, y1 + 2], fill=(0, 0, 0))
656
+ draw.text((x1 + 4, y1 - th - 4), label, font=font, fill=(255, 255, 255))
657
+
658
+ vis = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
659
+
660
+ ts = _time.strftime("%Y%m%d_%H%M%S"); ms = int((_time.time() % 1) * 1000)
661
+ out_put_text = f"{(final_text or '미확인 상태')} ({ts}_{ms:03d}).png"
662
+ out_path = os.path.join(out_dir, out_put_text)
663
+ cv2.imwrite(out_path, vis)
664
+ except Exception as _e:
665
+ print(f"[seeing.summarize_scene] pic save failed: {_e})")
666
+
667
+ return final_text or "켜진 표시 없음"
668
+
669
+ except Exception as e:
670
+ print(f"[seeing.summarize_scene] error: {e}")
671
+ return "현재 상태를 파악하는데 실패했습니다."
total_mode.py ADDED
@@ -0,0 +1,1885 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fingertip-Only OCR — EasyOCR-only + SIM-history warp
2
+ # (OP/INFO 모드 스케줄 재구성 & HUD 개선)
3
+ # --------------------------------------------------------------
4
+
5
+ import cv2, time, numpy as np, threading, queue, os, math, re, traceback
6
+ from PIL import Image, ImageDraw, ImageFont
7
+ from tts_reader import TTSReader
8
+ import seeing
9
+
10
+ try:
11
+ from rapidfuzz import process, fuzz
12
+ from jamo import h2j, j2hcj
13
+ _SPELLFIX_OK = True
14
+ except Exception as _e:
15
+ print(f"[SPELLFIX] disabled: { _e }")
16
+ _SPELLFIX_OK = False
17
+
18
+ # ========= User / Display =========
19
+ CAMERA_ID = 0
20
+ CAPTURE_TARGET_W = 1920
21
+ CAPTURE_TARGET_H = 1080
22
+ WORK_WIDTH_TARGET = 1280
23
+ DISPLAY_MAX_W = 1280
24
+ WINDOW_NAME = 'Assistive Fingertip OCR (fast)'
25
+
26
+ # ========= OCR / Scheduling =========
27
+ OCR_ENABLED = True
28
+ BASE_OCR_PERIOD = 1.5
29
+ EXTRA_OCR_PERIOD = 0.6
30
+ STALE_AGE_SEC = 7.0
31
+ LOW_CONF_TH = 0.55
32
+
33
+ # ========= ROI (work-space) =========
34
+ # <실험1> 문제 상황: 서연 세탁기 글씨가 작아서 detection 못함
35
+ # [CASE 1] ROI_W, ROI_H = 420, 420 -> detection 성능 향상 (부족함)
36
+ # [CASE 2] 서연 세탁기 사진 잘라서 글씨 더 크게 보이도록 조정 (안 해봄)
37
+ # [CASE 3] MAX_OCR_LONG 을 420으로 제한하지 않고 원본을 넣기 (속도 느려짐)
38
+ # [CASE 4] 실제 사이즈로 인쇄
39
+
40
+ ROI_W, ROI_H = 420,420
41
+ MIN_ROI_W, MIN_ROI_H = 200, 120
42
+ BLUR_VAR_THRESH = 80.0
43
+
44
+ # ROI 유지 유예(손가락 잠깐 끊겨도 ROI 내부 TTL 갱신)
45
+ ROI_KEEPALIVE_GRACE_SEC = 1.2
46
+ last_roi_active_until = 0.0
47
+ # ========= No masking =========
48
+ #USE_MASKED_FULL_ROI = True 삭제(8.20)
49
+ # EXCLUDE_PAD = 8
50
+ # MASK_FILL_VAL = (127,127,127)
51
+
52
+ # ========= Donut OCR (unused) =========
53
+ # DONUT_PAD = 3 삭제(8.20)
54
+ # SUBROI_MIN_AREA = 1200
55
+ # MAX_SUBROIS = 1
56
+
57
+ # ========= TTL / Pruning =========
58
+ BASE_TTL = 3.0 # 연장시간 조정(8.20)
59
+ PIN_GRACE_SEC = 1.2
60
+ MAX_OVERLAYS = 300
61
+ #ONSCREEN_KEEPALIVE = 0.8 삭제(8.20)
62
+ HARD_MAX_LIFETIME = 9.0
63
+ no_repeat_until_ts = 2.0 # 같은 문장 재발화 금지 시간 (8.21)
64
+
65
+ IGNORE_HARD_CAP_WHILE_FINGER_IN_ROI = True
66
+ PRUNE_TIMEOUT_SEC =0.5 # prune 주기 변수화 (8.20)
67
+
68
+ # ========= Merge criteria =========
69
+ MERGE_IOU_TH = 0.50
70
+ MERGE_CENTER_DIST = 28.0
71
+
72
+ # ========= TTS =========
73
+ TTS_ENABLE = True
74
+ TTS_CONF = 0.0 # 발화 기준 임계치 필요할듯. 지금은 다 말함 (8.20)
75
+ TTS_REPEAT_SEC = 1.0
76
+ # TTS_QUEUE_MAX = 1 삭제(8.20)
77
+ TTS_TARGET_STICKY_SEC = 0.6
78
+ # TTS_DEBUG = False 삭제 (8.20)
79
+ # TTS_STRICT_LATEST = True 삭제(8.20)
80
+
81
+ STRICT_DICT_ONLY = True
82
+ TTS_CONF_FALLBACK = 0.35
83
+
84
+ SHOW_TTS_HINT = True
85
+ tts_current_display = ""
86
+ tts_current_note = ""
87
+ tts_last_spoken_text = "" # <<< CHANGED: 마지막 발화 문구를 HUD에 유지
88
+
89
+ # ★ 추가: 모드 전환 멘트 직후 1회 즉시 요약 트리거 + 선점 락
90
+ INFO_FORCE_IMMEDIATE = False
91
+ tts_force_lock = threading.Lock()
92
+
93
+ # ========= Speed knobs =========
94
+ # 수정1: MAX_OCR_LONG 416 -> 420
95
+ MAX_OCR_LONG = 420
96
+ ENHANCE_MODE = "off"
97
+ MOTION_GATE_PX = 2.0
98
+ MAX_TEXT_DRAW = 30
99
+
100
+ # ========= Global tracking (SIM) =========
101
+ FLOW_DS = 0.45
102
+ FLOW_MAX_CORNERS=240; FLOW_QUALITY=0.01; FLOW_MIN_DISTANCE=7
103
+ FLOW_WINSIZE=(21,21); FLOW_LEVELS=3
104
+ RESEED_INTERVAL_FRAMES=8
105
+
106
+ MAX_TRANS_PX = 90
107
+ MAX_SCALE_STEP = 0.18
108
+ MAX_ROT_STEP_DEG = 10.0
109
+ EMA_ALPHA_SIM = 0.28
110
+
111
+ USE_ORB_FALLBACK = True
112
+ ORB_NFEATURES=600; ORB_MIN_GOOD=45
113
+
114
+ # ========= Finger =========
115
+ EMA_ALPHA_FINGER=0.35
116
+ FINGER_STALE_MS = 800
117
+ finger_last_seen = 0.0
118
+ # had_finger = False 삭제(8.20)
119
+ last_finger_xy = None
120
+
121
+ # ========= YOLO =========
122
+ YOLO_DEBUG = True
123
+ YOLO_DRAW_ALL = True
124
+ YOLO_IMG_SIZE = 640
125
+ YOLO_CONF_TH = 0.25
126
+ YOLO_IOU_TH = 0.50
127
+ YOLO_CLASS_NAME = None
128
+ YOLO_CLASS_ID = 0
129
+ # fingerip_o.pt 사용 가능
130
+ YOLO_WEIGHTS = r'weights/fingertip.pt'
131
+
132
+ YOLO_SHOW_INPUT = False
133
+ YOLO_INPUT_WIN = 'YOLO_INPUT'
134
+
135
+ DO_PIC=True # 보기모드 입력 출력(8.21)
136
+ # # ========= speed change parameters =========
137
+ # # 해상도/스케일
138
+ # WORK_WIDTH_TARGET = 960
139
+ # YOLO_IMG_SIZE = 448
140
+ # MAX_OCR_LONG = 360
141
+ # FLOW_DS = 0.35
142
+
143
+ # # ROI 크기
144
+ # ROI_W, ROI_H = 270,270
145
+
146
+ # # 빈도/주기
147
+ # BASE_OCR_PERIOD = 2.0
148
+ # EXTRA_OCR_PERIOD = 0.9
149
+ # RESEED_INTERVAL_FRAMES = 12
150
+ # PRUNE_TIMEOUT_SEC = 1.0
151
+
152
+ # # 전역 SIM
153
+ # FLOW_MAX_CORNERS = 150
154
+ # FLOW_WINSIZE = (17,17)
155
+ # FLOW_LEVELS = 2
156
+ # # estimateAffinePartial2D maxIters ~800로 하향
157
+
158
+ # # KLT
159
+ # KLT_LEVELS = 2
160
+ # KLT_WIN = (25,25)
161
+ # KLT_TERM = (cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT, 12, 0.03)
162
+ # KLT_N_SAMPLES = 6
163
+ # KLT_USE_CLAHE = False
164
+
165
+ # # YOLO
166
+ # YOLO_CONF_TH = 0.3 # 잡음↓
167
+ # # yolo_model.predict(..., half=True) # (GPU일 때)
168
+
169
+ # # OCR
170
+ # # rotation_info=[0] 로 축소
171
+ # # canvas_size=1280, mag_ratio=1.1
172
+ # MAX_TEXT_DRAW = 30
173
+ # MAX_OVERLAYS = 150
174
+
175
+
176
+ # ========= KLT fallback =========
177
+ # 수정2: False -> True
178
+ USE_KLT_FALLBACK = True
179
+ KLT_WIN=(31,31); KLT_LEVELS=4
180
+ KLT_TERM=(cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT, 20, 0.03)
181
+ KLT_FB_MAX=4.0; KLT_ERR_MAX=100.0; KLT_STEP_MAX=30.0
182
+ KLT_OUT_MARGIN=4; KLT_N_SAMPLES=12; KLT_RING_R=10
183
+ # KLT_RESEED_EVERY=6; 삭제(8.20)
184
+ KLT_MIN_GOOD=5; KLT_LOSS_GRACE=3
185
+ KLT_USE_CLAHE=True
186
+
187
+ klt_pts_prev=None; klt_lost_frames=0; #frames_since_reseed=0 삭제 (8.20)
188
+
189
+ # ========= OCR time meter =========
190
+ OCR_EMA=None; OCR_EMA_ALPHA=0.25
191
+
192
+ # ==== Dict-based merge parameters ====
193
+ # 수정3: 80 -> 60
194
+ DICT_MERGE_SCORE = 70 # 사전 변환 수정(8.21)
195
+ DICT_TIE_DELTA = 3
196
+ DICT_ONLY =True # 사전 단어만 표기(8.21)
197
+
198
+ # ========= GUIDE MODE =========
199
+ GUIDE_MODE = False
200
+ GUIDE_TARGET = None
201
+ GUIDE_TOL_PX = 40
202
+ GUIDE_REPEAT_SEC = 1.0
203
+ GUIDE_LAST_TS = 0.0
204
+ GUIDE_LAST_SENT = ""
205
+ GUIDE_TARGET_ITEM = None
206
+ GUIDE_REQUIRE_FINGER = True
207
+
208
+ # ========= INFO/OP 모드 =========
209
+ MODE_OP = 1 # 조작 모드
210
+ MODE_INFO = 2 # 보기 모드
211
+ MODE_GUIDE = 3 # 안내 모드
212
+
213
+ mode_lock = threading.Lock()
214
+ mode_state = MODE_OP
215
+
216
+ # 보기 주기(초)
217
+ INFO_PERIOD_SEC = 5.0 # <<< CHANGED: 8s → 5s
218
+
219
+ # 즉시 실행/주기 스케줄용
220
+ _next_info_due = 0.0
221
+
222
+ # 보기용 최신 프레임 공유
223
+ _latest_frame_for_info = None
224
+ _latest_frame_lock = threading.Lock()
225
+
226
+ # 보기 스레드 제어
227
+ _info_stop = threading.Event()
228
+
229
+ def _is_speaker_busy() -> bool:
230
+ try:
231
+ import pygame
232
+ return pygame.mixer.music.get_busy()
233
+ except Exception:
234
+ return False
235
+
236
+ def _say_once(text: str):
237
+ """한 문장만 안전하게 재생(비동기 TTS) + 표시 유지.
238
+ - 재생 시작을 잠깐 대기(최대 2s 시도)
239
+ - 끝날 때까지 폴링(최대 30s), 그 후 target만 None으로 지워 재반복 차단
240
+ - HUD는 tts_last_spoken_text로 마지막 발화를 계속 보여줌
241
+ """
242
+ global no_repeat_until_ts
243
+
244
+ t_start = time.time() # ★ 누락되었던 t_start 보완
245
+ set_tts_target(text)
246
+ no_repeat_until_ts = time.time() + 60.0 # 같은 문장 재enqueue 금지(안전 마진)
247
+
248
+ # 재생 시작 감지(최대 2s)
249
+ while not _info_stop.is_set():
250
+ if _is_speaker_busy():
251
+ break
252
+ if '_last_spoken_enqueue_ts' in globals() and _last_spoken_enqueue_ts >= t_start:
253
+ time.sleep(0.1)
254
+ break
255
+ if (time.time() - t_start) > 2.0:
256
+ break
257
+ time.sleep(0.02)
258
+
259
+ # 재생 종료 대기(최대 30s)
260
+ t0 = time.time()
261
+ while _is_speaker_busy() and not _info_stop.is_set():
262
+ if (time.time() - t0) > 30.0:
263
+ break
264
+ time.sleep(0.05)
265
+
266
+ # target만 지워서 재반복 방지(표시는 tts_last_spoken_text로 유지됨)
267
+ set_tts_target(None)
268
+ no_repeat_until_ts = 0.0
269
+
270
+ def announce_force_async(text: str, after=None):
271
+ """모드 전환 전용: 현재 재생 중이어도 즉시 중단하고 text부터 발화."""
272
+ def _runner():
273
+ with tts_force_lock:
274
+ if TTS_ENABLE and tts is not None:
275
+ try: tts.clear_queue()
276
+ except Exception: pass
277
+ for m in ("stop","cancel","flush"):
278
+ if hasattr(tts, m):
279
+ try: getattr(tts, m)()
280
+ except Exception: pass
281
+ _say_once(text) # 마지막 멘트 HUD 유지 + 재반복 억제 로직 그대로
282
+ if callable(after):
283
+ try: after()
284
+ except Exception: pass
285
+ threading.Thread(target=_runner, daemon=True).start()
286
+
287
+ def _enter_op_mode():
288
+ # 1) 지금 말하는 TTS 전부 끊고, 2) "조작 모드로 전환합니다"를 끝까지 말한 다음, 3) 모드 적용
289
+ def _after():
290
+ global mode_state
291
+ with mode_lock:
292
+ mode_state = MODE_OP
293
+ globals().update(GUIDE_MODE=False)
294
+ announce_force_async("조작 모드로 전환합니다.", after=_after)
295
+
296
+ def _enter_info_mode():
297
+ # 1) 선점 발화 → 2) 발화 끝난 뒤 INFO 모드 플래그 세팅 + 첫 요약 즉시 허용
298
+ def _after():
299
+ global mode_state, _next_info_due, INFO_FORCE_IMMEDIATE
300
+ with mode_lock:
301
+ mode_state = MODE_INFO
302
+ _next_info_due = 0.0 # 진입 직후 1회 즉시
303
+ INFO_FORCE_IMMEDIATE = True # 다음 루프에서 바로 요약
304
+ globals().update(GUIDE_MODE=False)
305
+ announce_force_async("보기 모드로 전환합니다. 지금부터 상황을 설명합니다.", after=_after)
306
+
307
+
308
+ def _enter_guide_mode():
309
+ # 1) 선점 발화 → 2) 발화 완료 후 GUIDE 모드 적용
310
+ def _after():
311
+ global mode_state, GUIDE_MODE
312
+ with mode_lock:
313
+ mode_state = MODE_GUIDE
314
+ GUIDE_MODE = True
315
+ announce_force_async("안내 모드로 전환합니다. 목표를 지정해 주세요.", after=_after)
316
+
317
+
318
+ # def _finger_present_now() -> bool: #손가락 탐지 제거 (8.21)
319
+ # try:
320
+ # if last_finger_xy is None:
321
+ # return False
322
+ # return (time.time() - finger_last_seen) * 1000.0 <= FINGER_STALE_MS
323
+ # except NameError:
324
+ # return False
325
+
326
+ # def _wait_till_no_finger(max_wait_sec: float = 8.0):
327
+ # t0 = time.time()
328
+ # while _finger_present_now() and not _info_stop.is_set():
329
+ # if time.time() - t0 > max_wait_sec:
330
+ # break
331
+ # time.sleep(0.05)
332
+
333
+ def _info_worker():
334
+ """보기 모드: 진입 즉시 1회, 이후 5초마다. 말하는 중이면 '말 끝 + 2초' 후 실행
335
+ 단, 모드 진입 멘트 직후 1회는 지연 없이 곧바로 요약."""
336
+ global _next_info_due, INFO_FORCE_IMMEDIATE
337
+
338
+ while not _info_stop.is_set():
339
+ time.sleep(0.05)
340
+
341
+ with mode_lock:
342
+ info_on = (mode_state == MODE_INFO)
343
+ if not info_on:
344
+ _next_info_due = 0.0
345
+ continue
346
+
347
+ now = time.time()
348
+ if now < _next_info_due:
349
+ continue
350
+
351
+ # 1) 말하는 중이면 끝날 때까지 대기
352
+ was_busy = False
353
+ while _is_speaker_busy() and not _info_stop.is_set():
354
+ was_busy = True
355
+ time.sleep(0.05)
356
+
357
+ # 1-1) 일반 경우: 말 끝났으면 2초 뒤로
358
+ # 단, 직전이 "진입 멘트"였다면 지연 없이 곧바로 진행
359
+ if was_busy:
360
+ if INFO_FORCE_IMMEDIATE:
361
+ # 진입 멘트 방금 끝남 → 즉시 1회 실행
362
+ INFO_FORCE_IMMEDIATE = False
363
+ else:
364
+ _next_info_due = time.time() + 2.0
365
+ continue
366
+
367
+ # 2) 손가락 있으면 치워 달라고 말하고(한번) 손가락 사라질 때까지 대기
368
+
369
+
370
+ # 3) 최신 프레임 요약
371
+ with _latest_frame_lock:
372
+ frame = None if _latest_frame_for_info is None else _latest_frame_for_info.copy()
373
+
374
+ if frame is not None:
375
+ try:
376
+ summary = seeing.summarize_scene(frame, easy_reader,do_pic=DO_PIC, debug_dir=r"logs/ocr_bbox",debug_font=r"C:\Windows\Fonts\malgun.ttf")
377
+ except Exception as e:
378
+ print("[INFO] summarize failed:", e)
379
+ summary = None
380
+
381
+ if summary:
382
+ _say_once(summary)
383
+
384
+ # 4) 다음 실행 예약(지금 시점 + 5s)
385
+ _next_info_due = time.time() + INFO_PERIOD_SEC
386
+
387
+ # ===== STT =====
388
+ USE_STT = True
389
+ try:
390
+ import speech_recognition as sr
391
+ _STT_OK = True
392
+ except Exception as _e:
393
+ print(f"[STT] disabled: {_e}")
394
+ _STT_OK = False
395
+
396
+ # ========= GPU / OCR / YOLO 로드 =========
397
+ def torch_cuda_ok():
398
+ try:
399
+ import torch
400
+ ok = bool(torch.cuda.is_available())
401
+ print(f"[GPU] torch CUDA available: {ok}")
402
+ return ok
403
+ except Exception as e:
404
+ print(f"[GPU] torch check failed: {e}")
405
+ return False
406
+
407
+ gpu_ok = torch_cuda_ok()
408
+
409
+ OCR_ENGINE=None; easy_reader=None
410
+ import easyocr
411
+ try:
412
+ easy_reader = easyocr.Reader(['ko'], gpu=gpu_ok,
413
+ model_storage_directory='models',
414
+ user_network_directory='user_network',
415
+ recog_network='best_accuracy1',
416
+ download_enabled=False)
417
+ OCR_ENGINE = 'easyocr_gpu' if gpu_ok else 'easyocr_cpu'
418
+ print(f"[OCR] EasyOCR (GPU={gpu_ok})")
419
+ except Exception as e:
420
+ traceback.print_exc()
421
+ raise SystemExit("No OCR engine available")
422
+
423
+ # === 보기(상황 설명) 스레드 기동 ===
424
+ def _start_info_thread_once():
425
+ if not hasattr(_start_info_thread_once, "_started"):
426
+ threading.Thread(target=_info_worker, daemon=True).start()
427
+ _start_info_thread_once._started = True
428
+ _start_info_thread_once()
429
+
430
+ # ========= YOLO =========
431
+ try:
432
+ from ultralytics import YOLO
433
+ yolo_device = 0 if gpu_ok else 'cpu'
434
+ yolo_model = YOLO(YOLO_WEIGHTS)
435
+ print(f"[YOLO] Loaded: {YOLO_WEIGHTS} (device={yolo_device})")
436
+ class_names = yolo_model.names
437
+ if YOLO_CLASS_NAME:
438
+ inv = {str(v).lower(): int(k) for k, v in class_names.items()}
439
+ if YOLO_CLASS_NAME.lower() in inv:
440
+ YOLO_CLASS_ID = inv[YOLO_CLASS_NAME.lower()]
441
+ except Exception as e:
442
+ traceback.print_exc()
443
+ raise SystemExit("[YOLO] 모델 로드 실패. YOLO_WEIGHTS 경로/파일 확인")
444
+
445
+ def _pick_best_tip(cands, last_xy):
446
+ if not cands: return None
447
+ if last_xy is None:
448
+ return max(cands, key=lambda t: t[2])
449
+ lx, ly = last_xy
450
+ def score(t):
451
+ cx, cy, conf, _ = t
452
+ d2 = (cx-lx)**2 + (cy-ly)**2
453
+ return conf - 0.0005*d2
454
+ return max(cands, key=score)
455
+
456
+ # --- YOLO 비동기 워커 ---
457
+ yolo_in_q=queue.Queue(maxsize=1); yolo_out_q=queue.Queue(maxsize=1); yolo_stop=threading.Event()
458
+ def _yolo_worker():
459
+ while not yolo_stop.is_set():
460
+ try:
461
+ frame = yolo_in_q.get(timeout=0.2)
462
+ except queue.Empty:
463
+ continue
464
+ yolo_in_vis, _r, _off = _yolo_letterbox_bgr(frame, YOLO_IMG_SIZE)
465
+ res = yolo_model.predict(source=frame, imgsz=YOLO_IMG_SIZE,
466
+ conf=YOLO_CONF_TH, iou=YOLO_IOU_TH,
467
+ device=yolo_device, verbose=False)
468
+ det=None; raw_boxes=[]
469
+ if res and res[0].boxes is not None and len(res[0].boxes) > 0:
470
+ cands=[]
471
+ for b in res[0].boxes:
472
+ x1,y1,x2,y2 = b.xyxy[0].tolist()
473
+ conf = float(b.conf[0]) if b.conf is not None else 0.0
474
+ cls_id = int(b.cls[0]) if b.cls is not None else 0
475
+ raw_boxes.append((x1,y1,x2,y2,conf,cls_id))
476
+ if YOLO_CLASS_ID is not None and cls_id != YOLO_CLASS_ID: continue
477
+ cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
478
+ cands.append((cx, cy, conf, (x1, y1, x2-x1, y2-y1)))
479
+ best=_pick_best_tip(cands, last_finger_xy)
480
+ if best is not None:
481
+ cx, cy, conf, (x,y,w,h) = best
482
+ det={'xy':(int(round(cx)), int(round(cy))),
483
+ 'box':(int(x), int(y), int(w), int(h)),
484
+ 'conf':conf, 'ts':time.time(),
485
+ 'raw_boxes':raw_boxes, 'yolo_in':yolo_in_vis}
486
+ else:
487
+ det={'xy':None, 'raw_boxes':[], 'yolo_in':yolo_in_vis}
488
+ try:
489
+ while True: yolo_out_q.get_nowait()
490
+ except queue.Empty:
491
+ pass
492
+ try: yolo_out_q.put_nowait(det)
493
+ except queue.Full: pass
494
+ threading.Thread(target=_yolo_worker, daemon=True).start()
495
+
496
+ def _yolo_letterbox_bgr(img, new_size=YOLO_IMG_SIZE, pad_val=114):
497
+ h, w = img.shape[:2]
498
+ r = min(new_size / float(h), new_size / float(w))
499
+ new_w, new_h = int(round(w*r)), int(round(h*r))
500
+ resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
501
+ dw = (new_size - new_w) / 2.0; dh = (new_size - new_h) / 2.0
502
+ left, right = int(math.floor(dw)), int(math.ceil(dw))
503
+ top, bottom = int(math.floor(dh)), int(math.ceil(dh))
504
+ out = cv2.copyMakeBorder(resized, top, bottom, left, right,
505
+ borderType=cv2.BORDER_CONSTANT,
506
+ value=(pad_val, pad_val, pad_val))
507
+ return out, r, (left, top)
508
+
509
+ # ========= TTS =========
510
+ # tts_q=queue.Queue(maxsize=TTS_QUEUE_MAX) 삭제(8.20)
511
+ # tts_is_speaking=threading.Event() 삭제(8.20)
512
+ tts_stop=threading.Event()
513
+ tts_target_lock=threading.Lock()
514
+ tts_target_text=None
515
+ _last_spoken_enqueue_ts=0.0
516
+ # tts_last_done_ts=0.0 삭제(8.20)
517
+ tts_last_seen_target_ts=0.0
518
+
519
+ SPELLFIX_ENABLE=True
520
+ JAMO_THRESHOLD=85; JAMO_THRESHOLD_LOWCONF=80
521
+ # 수정4: 딕셔너리 확장 (서연 세탁기 포함하도록)
522
+ #"동작","일시정지",
523
+ DICT_WORDS=["통살균","원격제어","예약","내마음","세탁","헹굼","탈수","물온도","빨래추가","알림음","구김방지","터보샷", "강력물살","온수세탁","냉수세탁","물높이","코스","동작/일시정지","전원"]
524
+ CANON={"표준세탁":"세탁","손세탁":"세탁"}
525
+
526
+ def _to_jamo(s: str) -> str:
527
+ try:
528
+ return j2hcj(h2j(s))
529
+ except Exception:
530
+ return s or ""
531
+
532
+ if _SPELLFIX_OK:
533
+ _DICT_JAMO=[_to_jamo(w) for w in DICT_WORDS]
534
+ else:
535
+ _DICT_JAMO=[]
536
+
537
+ _TOKENIZER=re.compile(r"[가-힣A-Za-z0-9]+|[^\s가-힣A-Za-z0-9]")
538
+
539
+ def correct_token(tok: str, threshold: int):
540
+ if not (_SPELLFIX_OK and SPELLFIX_ENABLE and _DICT_JAMO and tok):
541
+ return tok, 0.0
542
+ q=_to_jamo(tok)
543
+ res=process.extractOne(q, _DICT_JAMO, scorer=fuzz.ratio, score_cutoff=threshold)
544
+ if not res: return tok, 0.0
545
+ matched, score, idx=res
546
+ best=DICT_WORDS[idx]; best=CANON.get(best, best)
547
+ return best, float(score)
548
+
549
+ def correct_text(text: str, threshold: int):
550
+ if not (_SPELLFIX_OK and SPELLFIX_ENABLE and _DICT_JAMO and text):
551
+ return text, False
552
+ out=[]; changed=False
553
+ for tok in _TOKENIZER.findall(text):
554
+ if re.match(r"^[가-힣A-Za-z0-9]+$", tok):
555
+ fixed, sc = correct_token(tok, threshold=threshold)
556
+ if fixed!=tok: changed=True
557
+ out.append(fixed)
558
+ else:
559
+ out.append(tok)
560
+ return "".join(out), changed
561
+
562
+ DICT_SPEAK_ENABLE=True
563
+ DICT_THRESHOLD=80; DICT_THRESHOLD_LOWCONF=80
564
+ def _build_dict_index(words, canon_map):
565
+ keys=[]; vals=[]
566
+ for w in words: keys.append(w); vals.append(canon_map.get(w,w))
567
+ for alias, canon in canon_map.items(): keys.append(alias); vals.append(canon)
568
+ keys_j=[_to_jamo(re.sub(r"\s+","",k)) for k in keys]
569
+ return keys, keys_j, vals
570
+ _DICT_KEYS, _DICT_KEYS_J, _DICT_VALS=_build_dict_index(DICT_WORDS, CANON)
571
+ # _DICT_KEYS_PLAIN=[re.sub(r"\s+","",k).casefold() for k in _DICT_KEYS] 삭제(8.20)
572
+ def _normalize_plain(s:str)->str: return re.sub(r"\s+","",(s or "")).casefold()
573
+ def map_to_dict_canon(text: str, threshold: int): #사용
574
+ if not DICT_SPEAK_ENABLE or not text: return None, 0.0
575
+ if _SPELLFIX_OK:
576
+ queries=[]
577
+ s=re.sub(r"\s+","",text)
578
+ if s: queries.append(_to_jamo(s))
579
+ for tok in _TOKENIZER.findall(text):
580
+ if re.match(r"^[가-힣A-Za-z0-9]+$", tok): queries.append(_to_jamo(tok))
581
+ best_idx, best_sc=-1, 0.0
582
+ for q in queries:
583
+ res=process.extractOne(q, _DICT_KEYS_J, scorer=fuzz.ratio, score_cutoff=threshold)
584
+ if res:
585
+ _, sc, idx=res
586
+ if sc>best_sc:
587
+ best_sc=float(sc); best_idx=int(idx)
588
+ if best_idx>=0: return _DICT_VALS[best_idx], best_sc
589
+
590
+ # 수정6: 사전 매칭 점수로만 결정. 부분집합 때문에 옵션을 읽어버리는 문제(강 -> 강력세탁 매칭) 방지.
591
+ # q_full=_normalize_plain(text)
592
+ # q_tokens=[_normalize_plain(tok) for tok in _TOKENIZER.findall(text) if re.match(r"^[가-힣A-Za-z0-9]+$", tok)]
593
+ # for q in [q_full]+q_tokens:
594
+ # if not q: continue
595
+ # for i,k in enumerate(_DICT_KEYS_PLAIN):
596
+ # if q==k: return _DICT_VALS[i], 100.0
597
+ # for q in [q_full]+q_tokens:
598
+ # if not q: continue
599
+ # for i,k in enumerate(_DICT_KEYS_PLAIN):
600
+ # if (k and k in q) or (q and q in k): return _DICT_VALS[i], 90.0
601
+ return None, 0.0
602
+
603
+ def enrich_with_dict(text: str, conf: float):
604
+ canon, sc = map_to_dict_canon(text, threshold=DICT_MERGE_SCORE)
605
+ display = canon if canon else text
606
+ return display, canon, float(sc or 0.0), float(conf or 0.0)
607
+
608
+ def _has_korean(s: str)->bool:
609
+ return any('가'<=ch<='힣' for ch in (s or ""))
610
+
611
+ # (tts / 안내 스레드 근처 아무 곳에 추가)
612
+ # def announce_async(text: str): 삭제(8.20)
613
+ # threading.Thread(target=_say_once, args=(text,), daemon=True).start()
614
+
615
+
616
+ # TTS 초기화
617
+ try:
618
+ import tempfile, os
619
+ try:
620
+ tts=TTSReader(cooldown_sec=TTS_REPEAT_SEC, speaking_rate=1.05, pitch=0.0,
621
+ min_len=2, credentials_path=r"yugpae-4f8335e15ba0.json",
622
+ cache_dir=None, persist_cache=False)
623
+ except TypeError:
624
+ tts=TTSReader(cooldown_sec=TTS_REPEAT_SEC, speaking_rate=1.05, pitch=0.0,
625
+ min_len=2, credentials_path=r"yugpae-4f8335e15ba0.json")
626
+ for attr in ("set_cache","disable_cache"):
627
+ if hasattr(tts, attr):
628
+ try: getattr(tts, attr)(persist=False, dir=None)
629
+ except Exception: pass
630
+ try:
631
+ if not (hasattr(tts,"cache_dir") and getattr(tts,"cache_dir") is None):
632
+ tmp_cache=os.path.join(tempfile.gettempdir(),"tts_runtime_cache")
633
+ os.makedirs(tmp_cache, exist_ok=True)
634
+ if hasattr(tts,"cache_dir"): tts.cache_dir=tmp_cache
635
+ except Exception: pass
636
+ except Exception as e:
637
+ print(f"[TTS] init failed: {e}")
638
+ TTS_ENABLE=False
639
+ tts=None
640
+
641
+ try:
642
+ import pygame
643
+ if not pygame.mixer.get_init(): pygame.mixer.init()
644
+ pygame.mixer.music.set_volume(1.0)
645
+ except Exception: pass
646
+
647
+ def tts_scheduler():
648
+ global _last_spoken_enqueue_ts, tts_last_spoken_text, no_repeat_until_ts #tts_last_done_ts 삭제(8.20)
649
+ last_sent_text = None
650
+ while not tts_stop.is_set():
651
+ time.sleep(0.05)
652
+ if not TTS_ENABLE or tts is None:
653
+ continue
654
+
655
+ with tts_target_lock:
656
+ tgt = (tts_target_text or "").strip()
657
+
658
+ # 타겟이 없으면 아무것도 하지 않고 넘김 (중단/정지 금지)
659
+ if not tgt:
660
+ last_sent_text = None
661
+ continue
662
+
663
+ now = time.time()
664
+
665
+ # 지금 말하는 중이면 일반 TTS는 절대 선점/중단하지 않음
666
+ if _is_speaker_busy():
667
+ continue
668
+
669
+ # 같은 문장을 너무 자주 반복하지 않음
670
+ if tgt == last_sent_text and now < no_repeat_until_ts:
671
+ continue
672
+
673
+ # 재생 (모드 전환이 아닌 한 clear_queue/stop/flush 절대 금지)
674
+ try:
675
+ tts.say(tgt)
676
+ tts_last_spoken_text = tgt
677
+ _last_spoken_enqueue_ts = now
678
+ #tts_last_done_ts = now 삭제(8.20)
679
+ last_sent_text = tgt
680
+ except Exception as e:
681
+ print(f"[TTS] error: {e}")
682
+
683
+
684
+
685
+ if TTS_ENABLE:
686
+ threading.Thread(target=tts_scheduler, daemon=True).start()
687
+
688
+ # def set_tts_target(text_or_none, note: str=""):
689
+ # global tts_target_text, tts_current_display, tts_current_note
690
+ # # 일반 TTS는 오직 타겟만 갱신. 여기서 재생을 중단/선점하지 않음.
691
+ # with tts_target_lock:
692
+ # tts_target_text = text_or_none
693
+ # tts_current_display = (text_or_none or "").strip()
694
+ # tts_current_note = note or ""
695
+ last_text="" #선점 발화를 위한 마지막 text 기록 (8.21)
696
+
697
+ def set_tts_target(text_or_none, note: str="", # 선점 발화를 위한 force 추가 (8.21)
698
+ *, force: bool=False):
699
+ """TTS 타겟 갱신.
700
+ - force=True : 지금 재생 중단(큐 비우고 stop/cancel/flush) 후 새 타겟 적용
701
+ - bypass_repeat=True: 같은 문장 반복 억제 타이머 무시(바로 재발화)
702
+ - speak_now=True : 스케줄러 기다리지 않고 즉시 say() 실행
703
+ """
704
+ global tts_target_text, tts_current_display, tts_current_note
705
+ global no_repeat_until_ts, _last_spoken_enqueue_ts
706
+ global last_text
707
+ # 1) 타겟 갱신
708
+ with tts_target_lock:
709
+ tts_target_text = text_or_none
710
+ tts_current_display = (text_or_none or "").strip()
711
+ tts_current_note = note or ""
712
+
713
+ # 2) 선점 옵션
714
+ if force and ("tts" in globals()) and (tts is not None) and last_text != text_or_none:
715
+ last_text=text_or_none
716
+ try:
717
+ if hasattr(tts, "clear_queue"): tts.clear_queue()
718
+ for m in ("stop","cancel","flush"):
719
+ if hasattr(tts, m):
720
+ try: getattr(tts, m)()
721
+ except Exception: pass
722
+ except Exception:
723
+ pass
724
+ _last_spoken_enqueue_ts = 0.0 # 스케줄러와 동기화
725
+
726
+
727
+
728
+
729
+ # ========= Camera =========
730
+ cap = cv2.VideoCapture(CAMERA_ID, cv2.CAP_DSHOW) if cv2.getBuildInformation().find('Windows')!=-1 else cv2.VideoCapture(CAMERA_ID)
731
+ if not cap.isOpened(): raise SystemExit("카메라 열기 실패")
732
+ cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
733
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAPTURE_TARGET_W)
734
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAPTURE_TARGET_H)
735
+ cap.set(cv2.CAP_PROP_FPS, 30)
736
+ try: cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
737
+ except: pass
738
+ time.sleep(0.15)
739
+ Wc=int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)); Hc=int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
740
+ print(f"[Camera] requested ~{CAPTURE_TARGET_W}x{CAPTURE_TARGET_H}, actual {Wc}x{Hc}")
741
+
742
+ WORK_SCALE=min(1.0, WORK_WIDTH_TARGET/float(Wc))
743
+ print(f"[Work] WORK_SCALE={WORK_SCALE:.3f} (work width ~{int(Wc*WORK_SCALE)})")
744
+
745
+ # ========= State =========
746
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
747
+ frame_idx=0
748
+ prev_gray_s=None; prev_pts=None
749
+ overlays=[]; last_prune=time.time()
750
+
751
+ # OCR 스케줄
752
+ last_ocr_time=0.0
753
+ last_roi=None
754
+
755
+ # ORB
756
+ orb=None; bf=None
757
+ if USE_ORB_FALLBACK:
758
+ orb=cv2.ORB_create(nfeatures=ORB_NFEATURES)
759
+ bf=cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
760
+
761
+ # ========= Utils =========
762
+ _next_oid=1
763
+ def alloc_oid():
764
+ global _next_oid
765
+ oid=_next_oid; _next_oid+=1; return oid
766
+
767
+ def clamp_rect(x,y,w,h,W,H):
768
+ x=max(0,min(x,W-1)); y=max(0,min(y,H-1))
769
+ w=max(1,min(w,W-x)); h=max(1,min(h,H-y))
770
+ return x,y,w,h
771
+
772
+ def poly_center(poly): return np.mean(poly,axis=0)
773
+
774
+ def bbox_of_poly(poly):
775
+ x1=float(np.min(poly[:,0])); y1=float(np.min(poly[:,1]))
776
+ x2=float(np.max(poly[:,0])); y2=float(np.max(poly[:,1]))
777
+ return (x1,y1,x2-x1,y2-y1)
778
+
779
+ def variance_of_laplacian(g): return cv2.Laplacian(g, cv2.CV_64F).var()
780
+
781
+ def rect_contains(outer, inner, tol=2.0):
782
+ ox, oy, ow, oh = outer
783
+ ix, iy, iw, ih = inner
784
+ return (ix >= ox - tol) and (iy >= oy - tol) and \
785
+ (ix + iw <= ox + ow + tol) and (iy + ih <= oy + oh + tol)
786
+
787
+ def _canon_equal(a: str, b: str) -> bool:
788
+ a = (a or "").strip(); b = (b or "").strip()
789
+ if not a or not b: return False
790
+ try:
791
+ return _normalize_plain(a) == _normalize_plain(b)
792
+ except Exception:
793
+ import re
794
+ aa = re.sub(r"\s+","",a).casefold()
795
+ bb = re.sub(r"\s+","",b).casefold()
796
+ return aa == bb
797
+
798
+
799
+ def iou(a,b):
800
+ ax,ay,aw,ah=a; bx,by,bw,bh=b
801
+ ax2,ay2=ax+aw,ay+ah; bx2,by2=bx+bw,by+bh
802
+ ix1,iy1=max(ax,bx),max(ay,by)
803
+ ix2,iy2=min(ax2,bx2),min(ay2,by2)
804
+ iw,ih=max(0,ix2-ix1),max(0,iy2-iy1)
805
+ inter=iw*ih; union=aw*ah+bw*bh-inter+1e-9
806
+ return inter/union
807
+
808
+ # def expand_rect(x,y,w,h,pad,W,H): 삭제(8.20)
809
+ # x2=x-pad; y2=y-pad; w2=w+2*pad; h2=h+2*pad
810
+ # return clamp_rect(x2,y2,w2,h2,W,H)
811
+
812
+ def is_visible_in_view(poly, W, H, min_overlap=0.7):
813
+ x, y, w, h = bbox_of_poly(poly)
814
+ x1, y1, x2, y2 = x, y, x+w, y+h
815
+ vx1, vy1, vx2, vy2 = 0, 0, W, H
816
+ ix1, iy1 = max(x1, vx1), max(y1, vy1)
817
+ ix2, iy2 = min(x2, vx2), min(y2, vy2)
818
+ iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1)
819
+ inter = iw * ih; area = max(1.0, w * h)
820
+ return (inter / area) >= min_overlap
821
+
822
+ def draw_overlays(frame, items, now_ts):
823
+ H, W = frame.shape[:2]
824
+ to_draw=[]
825
+ for it in items:
826
+ if is_visible_in_view(it['poly'], W, H, min_overlap=0.7):
827
+ it['last_seen']=now_ts
828
+ to_draw.append(it)
829
+ to_draw=to_draw[:MAX_TEXT_DRAW]
830
+ for it in to_draw:
831
+ cv2.polylines(frame, [it['poly'].astype(int)], True, (255,165,0), 2, cv2.LINE_AA)
832
+
833
+ img_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
834
+ pil=Image.fromarray(img_rgb); draw=ImageDraw.Draw(pil)
835
+ font_path=None
836
+ for p in [r"C:\Windows\Fonts\malgun.ttf", r"C:\Windows\Fonts\NanumGothic.ttf",
837
+ r"C:\Windows\Fonts\NotoSansCJKkr-Regular.otf",
838
+ "/usr/share/fonts/truetype/noto/NotoSansCJKkr-Regular.ttc"]:
839
+ if os.path.isfile(p): font_path=p; break
840
+ font=ImageFont.truetype(font_path, 22) if font_path else ImageFont.load_default()
841
+
842
+ for it in to_draw:
843
+ poly=it['poly'].astype(int)
844
+ x=int(np.min(poly[:,0])); y=int(np.min(poly[:,1]))-6
845
+ draw.text((x, max(0,y)), f"{it['text']} ({it['conf']:.2f})",
846
+ font=font, fill=(255,255,255), stroke_width=2, stroke_fill=(0,0,0))
847
+ frame[:]=cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
848
+
849
+ def prune_overlays(items, now, active_roi=None):
850
+ def center_in_roi(c, roi):
851
+ if roi is None: return False
852
+ rx,ry,rw,rh = roi
853
+ return (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh)
854
+
855
+ kept=[]
856
+ for it in items:
857
+ pinned = (now <= it.get('pin_until', 0.0))
858
+ if pinned:
859
+ kept.append(it); continue
860
+ birth = it.get('time', now)
861
+ alive_by_ttl = (now <= it.get('expiry', 0.0))
862
+ if IGNORE_HARD_CAP_WHILE_FINGER_IN_ROI and active_roi is not None:
863
+ c = poly_center(it['poly'])
864
+ if center_in_roi(c, active_roi):
865
+ if alive_by_ttl:
866
+ kept.append(it)
867
+ continue
868
+ under_hard_cap = ((now - birth) <= HARD_MAX_LIFETIME)
869
+ if alive_by_ttl and under_hard_cap:
870
+ kept.append(it)
871
+
872
+ if len(kept) > MAX_OVERLAYS:
873
+ kept = sorted(
874
+ kept,
875
+ key=lambda d: max(d.get('expiry', 0.0), d.get('pin_until', 0.0)),
876
+ reverse=True
877
+ )[:MAX_OVERLAYS]
878
+ return kept
879
+
880
+ # ========= SIM helpers =========
881
+ def closest_rotation(A):
882
+ U, _, Vt = np.linalg.svd(A); R = U @ Vt
883
+ if np.linalg.det(R) < 0: Vt[-1,:]*=-1; R = U @ Vt
884
+ return R
885
+ def project_to_similarity(M): #사용
886
+ A=M[:,:2]; R=closest_rotation(A)
887
+ s=float(np.trace(A.T@R)/2.0); t=M[:,2].reshape(2)
888
+ return s, R, t
889
+ def angle_from_R(R): return math.atan2(R[1,0], R[0,0]) #사용
890
+ def build_similarity(s, theta):
891
+ c, n = math.cos(theta), math.sin(theta)
892
+ A=np.array([[c,-n],[n,c]], dtype=np.float32)*float(s)
893
+ return A
894
+
895
+ SIM_HIST_MAX=240
896
+ sim_steps=[]
897
+
898
+ def _rect_aabb_after_M(rect, M3, W, H):
899
+ x,y,w,h=rect
900
+ corners=np.array([[x,y],[x+w,y],[x+w,y+h],[x,y+h]], dtype=np.float32)
901
+ tc=(corners @ M3[:2,:2].T)+M3[:2,2]
902
+ minx,miny=float(np.min(tc[:,0])), float(np.min(tc[:,1]))
903
+ maxx,maxy=float(np.max(tc[:,0])), float(np.max(tc[:,1]))
904
+ rx=int(max(0,minx)); ry=int(max(0,miny))
905
+ rh=int(max(1,min(H-1,maxy)-ry)); rw=int(max(1,min(W-1,maxx)-rx)) #약간의 오류 수정 (8.21)
906
+ return (rx,ry,rw,rh)
907
+
908
+ def estimate_similarity_small(prev_gray_s, gray_s, prev_pts): #사용
909
+ if prev_pts is None or len(prev_pts) < 140:
910
+ prev_pts=cv2.goodFeaturesToTrack(prev_gray_s, maxCorners=FLOW_MAX_CORNERS,
911
+ qualityLevel=FLOW_QUALITY, minDistance=FLOW_MIN_DISTANCE, blockSize=7)
912
+ if prev_pts is None: return None, None
913
+ next_pts, st, err=cv2.calcOpticalFlowPyrLK(prev_gray_s, gray_s, prev_pts, None,
914
+ winSize=FLOW_WINSIZE, maxLevel=FLOW_LEVELS,
915
+ criteria=(cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT,12,0.03))
916
+ if next_pts is None: return None, None
917
+ P=prev_pts[st==1].reshape(-1,1,2); Q=next_pts[st==1].reshape(-1,1,2)
918
+ if len(P) < 60: return None, None
919
+ M,_=cv2.estimateAffinePartial2D(P,Q,method=cv2.RANSAC,
920
+ ransacReprojThreshold=3.0, maxIters=1500, confidence=0.99)
921
+ if M is None: return None, None
922
+ return M, next_pts
923
+
924
+ def transform_overlays_similarity(items, s, theta, t_s): #사용
925
+ tx=float(t_s[0])/FLOW_DS; ty=float(t_s[1])/FLOW_DS
926
+ step_mag=math.hypot(tx,ty)
927
+ if step_mag>MAX_TRANS_PX:
928
+ scale=MAX_TRANS_PX/(step_mag+1e-6)
929
+ tx*=scale; ty*=scale
930
+ A=build_similarity(s, theta).astype(np.float32)
931
+ for it in items:
932
+ pts=it['poly'].astype(np.float32)
933
+ it['poly']=(pts@A.T)+np.array([tx,ty], dtype=np.float32)
934
+
935
+ def orb_similarity(prev_g, cur_g):
936
+ kp1, des1 = orb.detectAndCompute(prev_g, None)
937
+ kp2, des2 = orb.detectAndCompute(cur_g, None)
938
+ if des1 is None or des2 is None or len(kp1)<8 or len(kp2)<8: return None
939
+ matches=bf.knnMatch(des1, des2, k=2)
940
+ good=[]
941
+ for mn in matches:
942
+ if len(mn)==2:
943
+ m,n=mn
944
+ if m.distance < 0.75*n.distance: good.append(m)
945
+ if len(good) < ORB_MIN_GOOD: return None
946
+ src=np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
947
+ dst=np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
948
+ M,_=cv2.estimateAffinePartial2D(src,dst,method=cv2.RANSAC,
949
+ ransacReprojThreshold=3.0,maxIters=1500,confidence=0.99)
950
+ return M
951
+
952
+ # ========= KLT =========
953
+ def _build_gray_for_klt(gray): #사용
954
+ g=gray
955
+ if KLT_USE_CLAHE:
956
+ clahe=cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
957
+ g=clahe.apply(g)
958
+ gx=cv2.Sobel(g, cv2.CV_32F, 1, 0, ksize=3)
959
+ gy=cv2.Sobel(g, cv2.CV_32F, 0, 1, ksize=3)
960
+ mag=cv2.magnitude(gx,gy)
961
+ if mag.max()>0: mag=(mag/mag.max())*255.0
962
+ return mag.astype(np.uint8)
963
+
964
+ def _klt_seed_ring(center, n=KLT_N_SAMPLES, r=KLT_RING_R): #사용
965
+ cx, cy = float(center[0]), float(center[1])
966
+ pts=[(cx,cy)]
967
+ for k in range(n):
968
+ a=2.0*math.pi*k/float(n)
969
+ pts.append((cx+r*math.cos(a), cy+r*math.sin(a)))
970
+ return np.array(pts, dtype=np.float32).reshape(-1,1,2)
971
+
972
+ def _in_bounds(pt, W, H, margin=0):
973
+ x,y=float(pt[0]), float(pt[1])
974
+ return (-margin<=x<=(W-1+margin)) and (-margin<=y<=(H-1+margin))
975
+
976
+ def klt_track_multi(prev_gray, cur_gray, prev_pts, W, H): #사용
977
+ if prev_gray is None or cur_gray is None or prev_pts is None or len(prev_pts)==0:
978
+ return None, None
979
+ p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, cur_gray, prev_pts, None,
980
+ winSize=KLT_WIN, maxLevel=KLT_LEVELS, criteria=KLT_TERM)
981
+ if p1 is None: return None, None
982
+ p0r, st2, err2 = cv2.calcOpticalFlowPyrLK(cur_gray, prev_gray, p1, None,
983
+ winSize=KLT_WIN, maxLevel=KLT_LEVELS, criteria=KLT_TERM)
984
+ good=[]
985
+ for i in range(len(prev_pts)):
986
+ if st[i]==1 and st2[i]==1:
987
+ fb=float(np.linalg.norm(prev_pts[i,0]-p0r[i,0]))
988
+ e=float(err[i][0]) if err is not None else 0.0
989
+ step=float(np.linalg.norm(p1[i,0]-prev_pts[i,0]))
990
+ if fb<=KLT_FB_MAX and e<=KLT_ERR_MAX and step<=KLT_STEP_MAX and _in_bounds(p1[i,0], W, H, KLT_OUT_MARGIN):#KTL_OUT_MARGIN 단순화 (8.20)
991
+ good.append(p1[i,0])
992
+ if len(good)<KLT_MIN_GOOD: return None, None
993
+ good=np.array(good, dtype=np.float32)
994
+ med=np.median(good, axis=0)
995
+ cx, cy = int(round(float(med[0]))), int(round(float(med[1])))
996
+ if not _in_bounds((cx,cy), W, H, 0): return None, None
997
+ return (cx,cy), good.reshape(-1,1,2)
998
+
999
+ # ===== Donut / merge utils =====
1000
+ def rect_from_poly(poly):#사용
1001
+ x,y,w,h=bbox_of_poly(poly); return (int(x),int(y),int(w),int(h))
1002
+
1003
+ def fingertip_overlaps_box(finger, box): #사용
1004
+ if finger is None: return False
1005
+ x,y,w,h=box
1006
+ return (x<=finger[0]<=x+w) and (y<=finger[1]<=y+h)
1007
+
1008
+ def clip_poly_to_rect(poly, rect):
1009
+ x,y,w,h=rect; rx1,ry1,rx2,ry2=x,y,x+w,y+h
1010
+ P=poly.copy()
1011
+ P[:,0]=np.clip(P[:,0], rx1, rx2); P[:,1]=np.clip(P[:,1], ry1, ry2)
1012
+ return P
1013
+
1014
+ def merge_update_overlays(items, new_items, roi_rect, now_ts,
1015
+ iou_th=MERGE_IOU_TH, center_dist_th=MERGE_CENTER_DIST):
1016
+ rx, ry, rw, rh = roi_rect
1017
+
1018
+ def center_in_roi(c):
1019
+ return (rx <= c[0] <= rx+rw) and (ry <= c[1] <= ry+rh)
1020
+
1021
+ roi_indices = [idx for idx, it in enumerate(items) if center_in_roi(poly_center(it['poly']))]
1022
+ used_old = set()
1023
+
1024
+ for ni in new_items:
1025
+ poly_new = clip_poly_to_rect(ni['poly'], roi_rect)
1026
+ box_new = bbox_of_poly(poly_new)
1027
+ raw_txt = str(ni.get('text','')).strip()
1028
+ raw_conf = float(ni.get('conf', 0.0))
1029
+ disp_new, canon_new, csc_new, conf_new = enrich_with_dict(raw_txt, raw_conf)
1030
+
1031
+ best_idx = -1
1032
+ best_iou = -1.0
1033
+ best_d = 1e9
1034
+
1035
+ for idx in roi_indices:
1036
+ if idx in used_old:
1037
+ continue
1038
+ it = items[idx]
1039
+ box_old = bbox_of_poly(it['poly'])
1040
+
1041
+ # ① 위치기반 매칭(IoU/센터거리)
1042
+ i = iou(box_new, box_old)
1043
+ cxn = (box_new[0]*2 + box_new[2]) * 0.5
1044
+ cyn = (box_new[1]*2 + box_new[3]) * 0.5
1045
+ cxo = (box_old[0]*2 + box_old[2]) * 0.5
1046
+ cyo = (box_old[1]*2 + box_old[3]) * 0.5
1047
+ d = math.hypot(cxn - cxo, cyn - cyo)
1048
+ loc_match = (i >= iou_th) or (d <= center_dist_th)
1049
+
1050
+ # ② 같은 글자 + 포함관계면 매칭으로 간주(작은 박스가 큰 박스 안에 있는 경우 등)
1051
+ text_same = _canon_equal(it.get('canon_text') or it.get('text'),
1052
+ canon_new or disp_new)
1053
+ contained = rect_contains(box_old, box_new) or rect_contains(box_new, box_old)
1054
+ text_same_contained = text_same and contained
1055
+
1056
+ if not (loc_match or text_same_contained):
1057
+ continue
1058
+
1059
+ # 베스트 선택(우선 IoU, 다음 거리)
1060
+ if (i > best_iou) or (abs(i - best_iou) < 1e-6 and d < best_d):
1061
+ best_iou, best_d, best_idx = i, d, idx
1062
+
1063
+ if best_idx >= 0:
1064
+ it = items[best_idx]
1065
+ # 우선순위: (사전 일치 점수) > (conf)
1066
+ csc_old = float(it.get('canon_score', 0.0))
1067
+ conf_old = float(it.get('conf', 0.0))
1068
+
1069
+ replace = False
1070
+
1071
+ if csc_new >= DICT_MERGE_SCORE and csc_old < DICT_MERGE_SCORE:
1072
+ replace = True
1073
+ elif csc_new >= DICT_MERGE_SCORE and csc_old >= DICT_MERGE_SCORE:
1074
+ if csc_new > csc_old + DICT_TIE_DELTA:
1075
+ replace = True
1076
+ elif abs(csc_new - csc_old) <= DICT_TIE_DELTA and conf_new > conf_old:
1077
+ replace = True
1078
+ else:
1079
+ if conf_new > conf_old and csc_new > csc_old: #신뢰도가 더 높을 경우만 대체 (8.20)
1080
+ replace = True #상대비교 기반으로 대체하면 어떨지 고민
1081
+
1082
+ if replace:
1083
+ it['poly'] = poly_new
1084
+ it['ocr_text'] = raw_txt
1085
+ it['text'] = disp_new
1086
+ it['canon_text'] = canon_new
1087
+ it['canon_score'] = csc_new
1088
+ it['conf'] = conf_new
1089
+ it['expiry'] = now_ts + BASE_TTL
1090
+ #연���시간 코드 중첩 삭제 (8.20)
1091
+
1092
+ used_old.add(best_idx)
1093
+
1094
+ else:
1095
+ if DICT_ONLY and (disp_new is None or disp_new not in DICT_WORDS): #사전 단어만 표기 (8.21)
1096
+ continue
1097
+ items.append({
1098
+ 'poly': poly_new,
1099
+ 'ocr_text': raw_txt,
1100
+ 'text': disp_new,
1101
+ 'canon_text': canon_new,
1102
+ 'canon_score': csc_new,
1103
+ 'conf': conf_new,
1104
+ 'time': now_ts,
1105
+ 'last_seen': now_ts,
1106
+ 'expiry': now_ts + BASE_TTL,
1107
+ 'pin_until': 0.0,
1108
+ 'id': alloc_oid()
1109
+ })
1110
+
1111
+ # ROI 안에 있던 기존 항목들의 여유시간(keepalive) 연장
1112
+ # 메인루프에서 연장되므로 시간 연장 삭제(8.20)
1113
+ # prune_overlays에서 사용하는것과 겹침 삭제(8.20)
1114
+ # 만약 바운딩 박스가 많아지면 여기서 prune 한번 진행 필요
1115
+ return items
1116
+
1117
+
1118
+ def dedupe_same_text_overlays(items, iou_th=0.55, center_dist_th=26.0):
1119
+ """동일/유사 텍스트(사전 정규화 기준) 중복 박스 제거.
1120
+ - 같은 텍스트로 간주되는 박스가 서로 많이 겹치거나 가깝거나
1121
+ 한쪽이 다른쪽을 '포함'하면 하나만 남김
1122
+ - 우선순위: (1) 사전 일치 점수 높음 → (2) 동률이면 conf 높은 것
1123
+ """
1124
+ def _canon_key(it):
1125
+ t = (it.get('canon_text') or it.get('text') or '').strip()
1126
+ try:
1127
+ return _normalize_plain(t)
1128
+ except Exception:
1129
+ import re as _re
1130
+ return _re.sub(r"\s+","",t).casefold()
1131
+
1132
+ def _rect(it):
1133
+ return bbox_of_poly(it['poly'])
1134
+
1135
+ def _score(it):
1136
+ csc = float(it.get('canon_score', 0.0))
1137
+ conf = float(it.get('conf', 0.0))
1138
+ return ((1 if csc >= DICT_MERGE_SCORE else 0), csc, conf)
1139
+
1140
+ groups = {}
1141
+ for it in items:
1142
+ key = _canon_key(it)
1143
+ if not key: # 빈 문자열 제외
1144
+ continue
1145
+ groups.setdefault(key, []).append(it)
1146
+
1147
+ keep = set()
1148
+ drop = set()
1149
+ for key, arr in groups.items():
1150
+ arr_sorted = sorted(arr, key=_score, reverse=True)
1151
+ for i, a in enumerate(arr_sorted):
1152
+ if id(a) in drop or id(a) in keep:
1153
+ continue
1154
+ keep.add(id(a))
1155
+ ax, ay, aw, ah = _rect(a)
1156
+ acx, acy = ax+aw*0.5, ay+ah*0.5
1157
+ for b in arr_sorted[i+1:]:
1158
+ if id(b) in drop or id(b) in keep:
1159
+ continue
1160
+ bx, by, bw, bh = _rect(b)
1161
+ bcx, bcy = bx+bw*0.5, by+bh*0.5
1162
+ ov = iou((ax,ay,aw,ah), (bx,by,bw,bh))
1163
+ d = ((acx-bcx)**2 + (acy-bcy)**2)**0.5
1164
+ contained = rect_contains((ax,ay,aw,ah), (bx,by,bw,bh)) or rect_contains((bx,by,bw,bh), (ax,ay,aw,ah))
1165
+ if contained or (ov >= iou_th) or (d <= center_dist_th):
1166
+ drop.add(id(b))
1167
+
1168
+ if not drop:
1169
+ return items
1170
+ return [it for it in items if id(it) not in drop]
1171
+
1172
+
1173
+ # ===== GUIDE MODE 유틸 =====
1174
+ def _overlay_center(it):
1175
+ P = it['poly']
1176
+ x1, y1 = float(np.min(P[:,0])), float(np.min(P[:,1]))
1177
+ x2, y2 = float(np.max(P[:,0])), float(np.max(P[:,1]))
1178
+ return (0.5*(x1+x2), 0.5*(y1+y2))
1179
+
1180
+ def _choose_target_overlay(target_canon: str, overlays, finger_xy=None):
1181
+ cands = []
1182
+ t = (target_canon or "").strip()
1183
+ if not t: return None
1184
+ for it in overlays:
1185
+ ct = (it.get('canon_text') or "").strip()
1186
+ tx = (it.get('text') or "").strip()
1187
+ ok = (ct == t) or (tx == t) or (t in tx)
1188
+ if ok:
1189
+ cx, cy = _overlay_center(it)
1190
+ d = 0.0
1191
+ if finger_xy is not None:
1192
+ d = math.hypot(cx - (finger_xy[0]), cy - (finger_xy[1]))
1193
+ canon_bonus = 1.0 if (ct == t) else 0.0
1194
+ cands.append((canon_bonus, float(it.get('conf',0.0)), -d, it))
1195
+ if not cands:
1196
+ return None
1197
+ cands.sort(reverse=True)
1198
+ return cands[0][3]
1199
+
1200
+ def _dir_sentence(dx, dy):
1201
+ def q(px):
1202
+ a = abs(int(round(px)))
1203
+ if a < 30: lvl = "조금"
1204
+ elif a < 90: lvl = "약간"
1205
+ elif a < 180: lvl = "보통"
1206
+ else: lvl = "많이"
1207
+ return lvl, a
1208
+ msg = []
1209
+ if dx > 0: lvl, a = q(dx); msg.append(f"오른쪽으로 {a}픽셀({lvl})")
1210
+ elif dx < 0: lvl, a = q(dx); msg.append(f"왼쪽으로 {a}픽셀({lvl})")
1211
+ if dy > 0: lvl, a = q(dy); msg.append(f"아래로 {a}픽셀({lvl})")
1212
+ elif dy < 0: lvl, a = q(dy); msg.append(f"위로 {a}픽셀({lvl})")
1213
+ return " , ".join(msg) if msg else "그대로 유지"
1214
+
1215
+ def set_guide_target_from_text(text: str):
1216
+ global GUIDE_TARGET, GUIDE_TARGET_ITEM
1217
+ if not text:
1218
+ GUIDE_TARGET = None
1219
+ GUIDE_TARGET_ITEM = None
1220
+ set_tts_target("목표가 비었습니다.", note="guide")
1221
+ return False
1222
+ canon, sc = map_to_dict_canon(text, threshold=DICT_THRESHOLD)
1223
+ if not canon:
1224
+ set_tts_target(f"'{text}'는 사전에 없습니다.", note="guide no-dict")
1225
+ return False
1226
+ GUIDE_TARGET = canon
1227
+ GUIDE_TARGET_ITEM = None
1228
+ set_tts_target(f"목표 '{canon}' 안내를 시작합니다.", note="guide")
1229
+ return True
1230
+
1231
+ def guide_tick(now_ts, finger_xy, overlays):
1232
+ global GUIDE_LAST_TS, GUIDE_LAST_SENT, GUIDE_TARGET_ITEM, GUIDE_TARGET
1233
+
1234
+ if not GUIDE_MODE or not GUIDE_TARGET:
1235
+ return
1236
+
1237
+ if GUIDE_REQUIRE_FINGER and finger_xy is None:
1238
+ if now_ts - GUIDE_LAST_TS >= GUIDE_REPEAT_SEC:
1239
+ set_tts_target("손가락을 화면에 올려 주세요.", note="guide")
1240
+ GUIDE_LAST_TS = now_ts
1241
+ GUIDE_LAST_SENT = "ask_finger"
1242
+ return
1243
+
1244
+ tgt = _choose_target_overlay(GUIDE_TARGET, overlays, finger_xy)
1245
+ GUIDE_TARGET_ITEM = tgt
1246
+
1247
+ if tgt is None:
1248
+ if now_ts - GUIDE_LAST_TS >= 2.0:
1249
+ set_tts_target(f"화면에서 '{GUIDE_TARGET}'을 찾지 못했습니다.", note="guide")
1250
+ GUIDE_LAST_TS = now_ts
1251
+ GUIDE_LAST_SENT = "not_found"
1252
+ return
1253
+
1254
+ cx, cy = _overlay_center(tgt)
1255
+ if finger_xy is None:
1256
+ if now_ts - GUIDE_LAST_TS >= GUIDE_REPEAT_SEC:
1257
+ set_tts_target(f"목표 '{GUIDE_TARGET}'이 화면에 있습니다. 손가락을 이동해 주세요.", note="guide")
1258
+ GUIDE_LAST_TS = now_ts
1259
+ GUIDE_LAST_SENT = "where_only"
1260
+ return
1261
+
1262
+ dx = int(round(cx - finger_xy[0]))
1263
+ dy = int(round(cy - finger_xy[1]))
1264
+ dist = math.hypot(dx, dy)
1265
+
1266
+ if dist <= GUIDE_TOL_PX:
1267
+ if GUIDE_LAST_SENT != "arrived":
1268
+ set_tts_target(f"도착. '{GUIDE_TARGET}' 입니다.", note="guide ok")
1269
+ GUIDE_LAST_SENT = "arrived"
1270
+ GUIDE_LAST_TS = now_ts
1271
+ return
1272
+
1273
+ if (now_ts - GUIDE_LAST_TS) >= GUIDE_REPEAT_SEC:
1274
+ msg = _dir_sentence(dx, dy)
1275
+ set_tts_target(f"{msg}", note=f"guide d={int(dist)}")
1276
+ GUIDE_LAST_SENT = msg
1277
+ GUIDE_LAST_TS = now_ts
1278
+
1279
+ def highlight_guide_target(frame_bgr, item):
1280
+ if item is None: return
1281
+ poly = item['poly'].astype(int)
1282
+ cv2.polylines(frame_bgr, [poly], True, (0,0,255), 3, cv2.LINE_AA)
1283
+ cx, cy = map(int, _overlay_center(item))
1284
+ cv2.circle(frame_bgr, (cx,cy), 6, (0,0,255), -1)
1285
+
1286
+ def stt_listen_once(timeout=4, phrase_time_limit=4):
1287
+ if not (USE_STT and _STT_OK):
1288
+ set_tts_target("음성 인식이 비활성화되어 있습니다.", note="stt off")
1289
+ return None
1290
+ try:
1291
+ r = sr.Recognizer()
1292
+ with sr.Microphone() as source:
1293
+ set_tts_target("목표 단어를 말씀해 주세요.", note="stt")
1294
+ if hasattr(r, "adjust_for_ambient_noise"):
1295
+ r.adjust_for_ambient_noise(source, duration=0.5)
1296
+ audio = r.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
1297
+ try:
1298
+ text = r.recognize_google(audio, language="ko-KR")
1299
+ except Exception:
1300
+ text = r.recognize_google(audio, language="ko-KR")
1301
+ return text
1302
+ except Exception as e:
1303
+ print(f"[STT] error: {e}")
1304
+ set_tts_target("음성 인식에 실패했습니다.", note="stt err")
1305
+ return None
1306
+
1307
+ # ===== OCR worker =====
1308
+ task_q=queue.Queue(maxsize=1)
1309
+ result_q=queue.Queue(maxsize=2)
1310
+
1311
+ def enhance_for_ocr(bgr):
1312
+ if ENHANCE_MODE=="off": return bgr
1313
+ if ENHANCE_MODE=="fast":
1314
+ blur=cv2.GaussianBlur(bgr,(0,0),0.8)
1315
+ return cv2.addWeighted(bgr, 1.6, blur, -0.6, 0)
1316
+ img=bgr.copy()
1317
+ img=cv2.bilateralFilter(img, d=0, sigmaColor=45, sigmaSpace=12)
1318
+ lab=cv2.cvtColor(img, cv2.COLOR_BGR2LAB); L,A,B=cv2.split(lab)
1319
+ clahe=cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8,8)); L=clahe.apply(L)
1320
+ img=cv2.cvtColor(cv2.merge([L,A,B]), cv2.COLOR_LAB2BGR)
1321
+ blur=cv2.GaussianBlur(img,(0,0),0.9)
1322
+ return cv2.addWeighted(img, 1.8, blur, -0.8, 0)
1323
+
1324
+ def prep_fixed(roi_bgr):
1325
+ h,w=roi_bgr.shape[:2]
1326
+ long_side=max(h,w); scale=min(1.0, float(MAX_OCR_LONG)/float(long_side))
1327
+ proc=cv2.resize(roi_bgr, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) if scale<1.0 else roi_bgr
1328
+ proc=enhance_for_ocr(proc)
1329
+ sx_pre=proc.shape[1]/float(w); sy_pre=proc.shape[0]/float(h)
1330
+ return proc, sx_pre, sy_pre
1331
+
1332
+ def run_ocr_rect(frame_work, rect_work, mask_boxes=None):
1333
+ x,y,w,h=rect_work
1334
+ src=frame_work[y:y+h, x:x+w].copy()
1335
+ base_off=(x,y)
1336
+ proc,sx_pre,sy_pre=prep_fixed(src)
1337
+ items=[]
1338
+ r1=easy_reader.readtext(proc, detail=1, decoder='greedy',
1339
+ rotation_info=[0,180],
1340
+ contrast_ths=0.05, adjust_contrast=0.7,
1341
+ text_threshold=0.6, low_text=0.3, link_threshold=0.4,
1342
+ canvas_size=1920, mag_ratio=1.3,
1343
+ paragraph=False, min_size=2)
1344
+ for (bbox_points,text,prob) in r1:
1345
+ poly=np.array(bbox_points,dtype=np.float32)
1346
+ poly[:,0]=poly[:,0]/sx_pre + base_off[0]
1347
+ poly[:,1]=poly[:,1]/sy_pre + base_off[1]
1348
+ bx,by,bw,bh=bbox_of_poly(poly)
1349
+ if bw*bh>=120: items.append({'poly':poly,'text':text,'conf':float(prob)})
1350
+ return items
1351
+
1352
+ def ocr_worker():
1353
+ while True:
1354
+ item=task_q.get()
1355
+ if item is None: break
1356
+ t0=time.time()
1357
+ out=[]
1358
+ for rect_work in item['rects']:
1359
+ out.extend(run_ocr_rect(item['frame_work'], rect_work, mask_boxes=None))
1360
+ dt_ms=(time.time()-t0)*1000.0
1361
+ result_q.put({
1362
+ 'roi': item['roi'],
1363
+ 'new_items': out,
1364
+ 'dt_ms': dt_ms,
1365
+ 'frame_idx': item['frame_idx'],
1366
+ })
1367
+ threading.Thread(target=ocr_worker, daemon=True).start()
1368
+
1369
+ def drain_queue(q):
1370
+ try:
1371
+ while True: q.get_nowait()
1372
+ except queue.Empty:
1373
+ pass
1374
+
1375
+ # === 폴백 요약기 === 삭제 (8.20)
1376
+ # def _fallback_summarize(frame_bgr):
1377
+ # try:
1378
+ # r = easy_reader.readtext(frame_bgr, detail=1)
1379
+ # tokens = [re.sub(r"[^가-힣0-9A-Za-z]", "", t).strip() for (_b,t,_c) in r]
1380
+ # tokens = [t for t in tokens if t]
1381
+ # if not tokens:
1382
+ # return "눈에 띄는 텍스트가 없습니다."
1383
+ # top = ", ".join(tokens[:5])
1384
+ # return f"화면에서 텍스트가 보입니다: {top}"
1385
+ # except Exception:
1386
+ # return "장면을 요약할 수 없습니다."
1387
+
1388
+ # ===== Main loop =====
1389
+ print("실시간 시작. 'q' 종료 / 'o' OCR ON/OFF / 't' HUD / 's' TTS / 'y' YOLO 입력 / 'p' YOLO PNG 저장")
1390
+ print("모드 전환: '1' 조작 모드 / '2' 보기 모드(상황 설명)")# / '3' 안내 모드(목표로 이동 안내)") # <<< CHANGED
1391
+ print("ROI 조절: '[' 너비-, ']' 너비+, ';' 높이-, \"'\" 높이+ / 'r' 기본값 복원")
1392
+ #print("GUIDE: '3' 안내 모드 / 'v' 음성으로 목표 지정 / 'f' 문자 입력 / 'c' 목표 취소")
1393
+
1394
+ s_ema=1.0; theta_ema=0.0; tx_ema=0.0; ty_ema=0.0
1395
+ prev_gray_full=None; prev_gray_klt=None
1396
+ prev_gray_s=None; prev_pts=None
1397
+ # <실험2> 손가락 없을 때 TTS 잘못 안내
1398
+ # [CASE 1] KLT OFF & FINGER_STALE_MS 800 -> 2000 (손가락 잘 안 따라올 수 있음)
1399
+ # [CASE 2] KLT ON & (1초 내내 KLT만 썼으면 finger_is_fresh = False)
1400
+ # [CASE 3] YOLO 연속 n번 해야 KLT ON
1401
+
1402
+ # 수정7: KLT 단독 추적 시작 시간 기록 (위 상황의 CASE2에 해당)
1403
+ klt_only_start_ts = 0.0
1404
+
1405
+ finger_src="NONE"; yolo_last_conf=None; klt_draw_pts=None; yolo_box_count=None; yolo_last_in=None
1406
+
1407
+ while True:
1408
+ ret, frame_cap = cap.read()
1409
+ if not ret: break
1410
+
1411
+ frame_work = frame_cap if WORK_SCALE==1.0 else cv2.resize(frame_cap, None, fx=WORK_SCALE, fy=WORK_SCALE, interpolation=cv2.INTER_AREA)
1412
+ H,W = frame_work.shape[:2]
1413
+ frame_for_ocr=frame_work.copy()
1414
+ frame_disp=frame_work.copy()
1415
+
1416
+ # 안내용 최신 프레임 공유
1417
+ with _latest_frame_lock:
1418
+ _latest_frame_for_info = frame_work.copy()
1419
+
1420
+ gray=cv2.cvtColor(frame_work, cv2.COLOR_BGR2GRAY)
1421
+ gray_klt=_build_gray_for_klt(gray) if USE_KLT_FALLBACK else gray
1422
+
1423
+ # ---- Global SIM ----
1424
+ gray_s=cv2.resize(gray, None, fx=FLOW_DS, fy=FLOW_DS, interpolation=cv2.INTER_AREA)
1425
+ M_s=None; did_motion=False
1426
+ if prev_gray_s is not None:
1427
+ reseed=((frame_idx % RESEED_INTERVAL_FRAMES)==0)
1428
+ if reseed: prev_pts=None
1429
+ M_s, next_pts = estimate_similarity_small(prev_gray_s, gray_s, prev_pts)
1430
+ prev_pts=next_pts
1431
+ prev_gray_s=gray_s
1432
+
1433
+ if M_s is not None:
1434
+ s_step,R_step,t_step_s=project_to_similarity(M_s)
1435
+ s_step=max(1.0-MAX_SCALE_STEP, min(1.0+MAX_SCALE_STEP, s_step))
1436
+ theta_step=angle_from_R(R_step)
1437
+ theta_step=max(-math.radians(MAX_ROT_STEP_DEG), min(math.radians(MAX_ROT_STEP_DEG), theta_step))
1438
+ s_ema=(1-EMA_ALPHA_SIM)*s_ema+EMA_ALPHA_SIM*s_step
1439
+ theta_ema=(theta_ema+((theta_step-theta_ema+math.pi)%(2*math.pi)-math.pi)*EMA_ALPHA_SIM)
1440
+ tx_ema=(1-EMA_ALPHA_SIM)*tx_ema+EMA_ALPHA_SIM*float(t_step_s[0])
1441
+ ty_ema=(1-EMA_ALPHA_SIM)*ty_ema+EMA_ALPHA_SIM*float(t_step_s[1])
1442
+ transform_overlays_similarity(overlays, s_ema, theta_ema, (tx_ema,ty_ema))
1443
+ did_motion=True
1444
+ else:
1445
+ if USE_ORB_FALLBACK and (prev_gray_full is not None):
1446
+ M2=orb_similarity(prev_gray_full, gray)
1447
+ if M2 is not None:
1448
+ s2,R2,t2s=project_to_similarity(M2)
1449
+ s2=max(1.0-MAX_SCALE_STEP, min(1.0+MAX_SCALE_STEP, s2))
1450
+ theta2=angle_from_R(R2)
1451
+ theta2=max(-math.radians(MAX_ROT_STEP_DEG), min(math.radians(MAX_ROT_STEP_DEG), theta2))
1452
+ s_ema=(1-EMA_ALPHA_SIM)*s_ema+EMA_ALPHA_SIM*s2
1453
+ theta_ema=(theta_ema+((theta2-theta_ema+math.pi)%(2*math.pi)-math.pi)*EMA_ALPHA_SIM)
1454
+ tx_ema=(1-EMA_ALPHA_SIM)*tx_ema+EMA_ALPHA_SIM*float(t2s[0])*FLOW_DS
1455
+ ty_ema=(1-EMA_ALPHA_SIM)*ty_ema+EMA_ALPHA_SIM*float(t2s[1])*FLOW_DS
1456
+ transform_overlays_similarity(overlays, s_ema, theta_ema, (tx_ema,ty_ema))
1457
+ did_motion=True
1458
+
1459
+ if did_motion:
1460
+ M2_step=np.array([[math.cos(theta_ema)*s_ema, -math.sin(theta_ema)*s_ema, float(tx_ema)/FLOW_DS],
1461
+ [math.sin(theta_ema)*s_ema, math.cos(theta_ema)*s_ema, float(ty_ema)/FLOW_DS]], dtype=np.float32)
1462
+ else:
1463
+ M2_step=np.array([[1,0,0],[0,1,0]], dtype=np.float32)
1464
+ if frame_idx>0:
1465
+ sim_steps.append((frame_idx-1, frame_idx, np.vstack([M2_step, [0,0,1]]).astype(np.float32)))
1466
+ if len(sim_steps)>SIM_HIST_MAX: sim_steps.pop(0)
1467
+
1468
+ # ---- YOLO finger (ASYNC) ----
1469
+ now=time.time()
1470
+
1471
+ with mode_lock:
1472
+ in_op_or_guide = (mode_state == MODE_OP) or (mode_state == MODE_GUIDE) #보기 모드에서 YOLO가 비동기로 실행 안되게 막음 (8.21)
1473
+
1474
+ if in_op_or_guide: #보기 모드에서 YOLO가 비동기로 실행 안되게 막음(8.21)
1475
+ if yolo_in_q.empty():
1476
+ try: yolo_in_q.put_nowait(frame_work.copy())
1477
+ except queue.Full: pass
1478
+ try: det=yolo_out_q.get_nowait()
1479
+ except queue.Empty: det=None
1480
+ else:
1481
+ det=None
1482
+
1483
+ finger_is_fresh=False
1484
+ finger_src="NONE"
1485
+ yolo_last_conf=None
1486
+ klt_draw_pts=None
1487
+ yolo_box_count=None
1488
+
1489
+ if isinstance(det, dict):
1490
+ xy=det.get('xy')
1491
+ if xy is not None:
1492
+ fx,fy=int(xy[0]), int(xy[1])
1493
+ if last_finger_xy is None:
1494
+ filt=np.array([fx,fy], dtype=np.float32)
1495
+ else:
1496
+ filt=(1-EMA_ALPHA_FINGER)*np.array(last_finger_xy,dtype=np.float32)+EMA_ALPHA_FINGER*np.array([fx,fy],dtype=np.float32)
1497
+ last_finger_xy=(int(filt[0]), int(filt[1]))
1498
+ finger_last_seen=now; finger_is_fresh=True; finger_src="YOLO"
1499
+ yolo_last_conf=float(det.get('conf',0.0))
1500
+ if USE_KLT_FALLBACK:
1501
+ klt_pts_prev=_klt_seed_ring(last_finger_xy)
1502
+ klt_lost_frames=0; #frames_since_reseed=0 삭제(8.20)
1503
+ if isinstance(det, dict) and det.get('yolo_in') is not None:
1504
+ yolo_last_in=det['yolo_in']
1505
+ if YOLO_SHOW_INPUT: cv2.imshow(YOLO_INPUT_WIN, yolo_last_in)
1506
+ if YOLO_DEBUG:
1507
+ rb=det.get('raw_boxes',[])
1508
+ yolo_box_count=len(rb)
1509
+ if YOLO_DRAW_ALL:
1510
+ for (x1,y1,x2,y2,conf,cls_id) in rb:
1511
+ cv2.rectangle(frame_disp,(int(x1),int(y1)),(int(x2),int(y2)),(0,200,255),1)
1512
+ cv2.putText(frame_disp,f"{conf:.2f}/{cls_id}",(int(x1),max(0,int(y1)-3)),
1513
+ cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,200,255),1,cv2.LINE_AA)
1514
+ # 수정5: KLT execution
1515
+ if USE_KLT_FALLBACK and not finger_is_fresh and (prev_gray_klt is not None) and (klt_pts_prev is not None):
1516
+ klt_xy, klt_pts_next = klt_track_multi(prev_gray_klt, gray_klt, klt_pts_prev, W, H)
1517
+ if klt_xy is not None:
1518
+ # KLT 추적 성공
1519
+ last_finger_xy = klt_xy
1520
+ klt_pts_prev = klt_pts_next
1521
+ klt_draw_pts = klt_pts_next # 화면 표시용
1522
+
1523
+ finger_last_seen = now
1524
+ finger_is_fresh = True # KLT가 찾았어도 'fresh'로 간주하여 OCR 등 후속 로직 실행
1525
+ finger_src = "KLT"
1526
+ klt_lost_frames = 0
1527
+ else:
1528
+ # KLT 추적 실패
1529
+ klt_lost_frames += 1
1530
+ if klt_lost_frames > KLT_LOSS_GRACE:
1531
+ klt_pts_prev = None # 추적점이 너무 오래되었으므로 초기화
1532
+ #
1533
+ # 수정7: KLT 단독 추적 시간제한
1534
+ KLT_TIMEOUT_SEC = 1.0
1535
+ if finger_src == "YOLO":
1536
+ klt_only_start_ts = 0.0 # YOLO가 잡았으면 타이머 리셋
1537
+ elif finger_src == "KLT":
1538
+ if klt_only_start_ts == 0.0:
1539
+ klt_only_start_ts = now # KLT 추적 시작, 타이머 개시
1540
+ # KLT 추적이 1초 이상 지속되면 포인트 무효화
1541
+ elif (now - klt_only_start_ts) > KLT_TIMEOUT_SEC:
1542
+ last_finger_xy = None # 손가락 좌표 삭제
1543
+ finger_is_fresh = False # tts 방지
1544
+ klt_pts_prev = None # klt execution 방지
1545
+ klt_only_start_ts = 0.0 # 타이머 리셋
1546
+ finger_src = "NONE"
1547
+ else: # "NONE"
1548
+ klt_only_start_ts = 0.0 # 아무것도 못 잡았으면 타이머 리셋
1549
+
1550
+ # ---- 모드 분기 ----
1551
+ with mode_lock:
1552
+ mode_now = mode_state
1553
+
1554
+ # 제거1 --- 중복 키 입력 제거 ---
1555
+
1556
+ # ---- ROI & OCR + 근접 읽기 (OP 모드에서만) ----
1557
+ roi=None; protected_boxes=[]; protected_ids=[]
1558
+ if mode_now == MODE_OP and finger_is_fresh and (last_finger_xy is not None):
1559
+ fx, fy = last_finger_xy
1560
+ roi = clamp_rect(int(fx-ROI_W//2), int(fy-ROI_H//2), ROI_W, ROI_H, W, H)
1561
+ last_roi = roi
1562
+ last_roi_active_until = now + ROI_KEEPALIVE_GRACE_SEC
1563
+
1564
+ rx,ry,rw,rh=roi
1565
+ for it in overlays:
1566
+ c=poly_center(it['poly'])
1567
+ if (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh):
1568
+ it['expiry']=max(it.get('expiry', now), now + BASE_TTL)# 시간 연장 방식 통일 (8.20)
1569
+
1570
+ for it in overlays:
1571
+ bx,by,bw,bh=rect_from_poly(it['poly'])
1572
+ if fingertip_overlaps_box((fx,fy),(bx,by,bw,bh)):
1573
+ protected_boxes.append((bx,by,bw,bh))
1574
+ protected_ids.append(it.get('id'))
1575
+ it['expiry']=max(it.get('expiry', now), now + BASE_TTL)
1576
+ it['pin_until']=now+PIN_GRACE_SEC
1577
+
1578
+ # 근접 읽기(TTS) - OP 모드에서만
1579
+ overlap_items=[]
1580
+ for it in overlays:
1581
+ bx,by,bw,bh=rect_from_poly(it['poly'])
1582
+ if fingertip_overlaps_box((fx,fy),(bx,by,bw,bh)):
1583
+ overlap_items.append(it)
1584
+ near=None; bestd=1e9
1585
+ for it in overlap_items:
1586
+ c=poly_center(it['poly']); d=np.hypot(c[0]-fx, c[1]-fy)
1587
+ if d<bestd: bestd=d; near=it
1588
+
1589
+ if near is not None:
1590
+ txt=str(near.get('text','')).strip()
1591
+ conf=float(near.get('conf',0.0))
1592
+ speak_ok=(conf>=TTS_CONF) or (_has_korean(txt) and (conf>=TTS_CONF_FALLBACK))
1593
+ note=""
1594
+ low_conf=(_has_korean(txt) and conf<TTS_CONF_FALLBACK) or (not _has_korean(txt) and TTS_CONF)
1595
+ if low_conf: note=(note+f" | low-conf({conf:.2f})") if note else f"low-conf({conf:.2f})"
1596
+ say_txt=None
1597
+ if speak_ok and txt:
1598
+ dict_thr=DICT_THRESHOLD_LOWCONF if low_conf else DICT_THRESHOLD
1599
+ mapped, sc = map_to_dict_canon(txt, threshold=dict_thr)
1600
+ if mapped:
1601
+ say_txt=mapped; note=(note+f" | dict:{sc:.0f}") if note else f"dict:{sc:.0f}"
1602
+ elif not STRICT_DICT_ONLY:
1603
+ thr=JAMO_THRESHOLD_LOWCONF if low_conf else JAMO_THRESHOLD
1604
+ fixed, changed = correct_text(txt, threshold=thr)
1605
+ say_txt=fixed if changed else txt
1606
+ if changed: note=(note+" | spellfix") if note else "spellfix"
1607
+ if say_txt:
1608
+ set_tts_target(say_txt, note=note,force=True); tts_last_seen_target_ts=now #선점발화 (8.21)
1609
+ else:
1610
+ if STRICT_DICT_ONLY and (speak_ok and txt):
1611
+ note=(note+" | no-dict") if note else "no-dict"
1612
+ set_tts_target(None, note=note); tts_current_display=txt
1613
+ else:
1614
+ if (now - tts_last_seen_target_ts) > TTS_TARGET_STICKY_SEC:
1615
+ set_tts_target(None, note="")
1616
+
1617
+ # ---- OCR 스케줄 (OP 모드에서만) ----
1618
+ if OCR_ENABLED:
1619
+ want_period=BASE_OCR_PERIOD
1620
+ roi_labels=[it for it in overlays if (roi[0]<=poly_center(it['poly'])[0]<=roi[0]+roi[2]
1621
+ and roi[1]<=poly_center(it['poly'])[1]<=roi[1]+roi[3])]
1622
+ roi_moved_fast=(last_roi is None) or (iou(last_roi, roi) < 0.6)
1623
+ roi_empty=(len(roi_labels)==0)
1624
+ roi_stale=(len(roi_labels)>0 and all((now - it.get('time',now) > STALE_AGE_SEC) or
1625
+ (it.get('conf',0)<LOW_CONF_TH) for it in roi_labels))
1626
+ if roi_moved_fast or roi_empty or roi_stale:
1627
+ want_period=min(want_period, EXTRA_OCR_PERIOD)
1628
+
1629
+ if (now - last_ocr_time) >= want_period and task_q.qsize()==0:
1630
+ gx,gy,gw,gh=roi
1631
+ g_roi=gray[gy:gy+gh, gx:gx+gw]
1632
+ blur_ok=(variance_of_laplacian(g_roi)>=BLUR_VAR_THRESH) or roi_empty
1633
+ avg_step=math.hypot(tx_ema, ty_ema)/max(1e-6, FLOW_DS)
1634
+ if blur_ok and avg_step>MOTION_GATE_PX: blur_ok=False
1635
+ if blur_ok:
1636
+ rects_to_run=[roi]
1637
+ try:
1638
+ task_q.put_nowait({
1639
+ 'frame_work': frame_for_ocr.copy(),
1640
+ 'rects': rects_to_run,
1641
+ 'roi': roi,
1642
+ 'frame_idx': frame_idx,
1643
+ })
1644
+ last_ocr_time=now; last_roi=roi
1645
+ except queue.Full:
1646
+ pass
1647
+
1648
+ elif mode_now == MODE_OP and (last_roi is not None) and (now <= last_roi_active_until):
1649
+ # YOLO가 잠깐 끊겨도 최근 ROI 내부 항목들의 TTL을 유지/초기화
1650
+ rx,ry,rw,rh = last_roi
1651
+ for it in overlays:
1652
+ c = poly_center(it['poly'])
1653
+ if (rx<=c[0]<=rx+rw) and (ry<=c[1]<=ry+rh):
1654
+ it['expiry'] = max(it.get('expiry', now), now + BASE_TTL)
1655
+ else:
1656
+ # INFO 모드에서는 근접 읽기/ROI OCR 모두 비활성화
1657
+ if mode_now == MODE_OP:
1658
+ pass
1659
+
1660
+ # 손가락이 사라진 뒤에도 근접 읽기가 남아 반복되는 것 방지
1661
+ if mode_now == MODE_OP and not finger_is_fresh: #손가락이 사라진 뒤에도 근접 읽기가 남아 반복되는것을 제거 (8.21)
1662
+ if (time.time() - tts_last_seen_target_ts) > TTS_TARGET_STICKY_SEC:
1663
+ set_tts_target(None, note="")
1664
+
1665
+ # ---- OCR 결과 병합 (OP 모드에서만) ----
1666
+ if mode_now == MODE_OP:
1667
+ try:
1668
+ while True:
1669
+ res=result_q.get_nowait()
1670
+ if 'dt_ms' in res:
1671
+ if OCR_EMA is None: OCR_EMA=res['dt_ms']
1672
+ else: OCR_EMA=(1-OCR_EMA_ALPHA)*OCR_EMA + OCR_EMA_ALPHA*res['dt_ms']
1673
+ if res.get('new_items'):
1674
+ def _T_from_to(a,b):
1675
+ if b<=a: return np.eye(3,dtype=np.float32)
1676
+ T=np.eye(3,dtype=np.float32)
1677
+ for (src,dst,M3) in sim_steps:
1678
+ if a < dst <= b: T = M3 @ T
1679
+ return T
1680
+ T_cap2now=_T_from_to(res.get('frame_idx',frame_idx), frame_idx)
1681
+ def _apply(poly, M3):
1682
+ P=poly.astype(np.float32)
1683
+ return (P @ M3[:2,:2].T) + M3[:2,2]
1684
+ roi_now=_rect_aabb_after_M(res['roi'], T_cap2now, W, H)
1685
+ new_items=[]
1686
+ for ni in res['new_items']:
1687
+ ni['poly']=_apply(ni['poly'], T_cap2now)
1688
+ bx,by,bw,bh=bbox_of_poly(ni['poly'])
1689
+ cx,cy=bx+bw/2, by+bh/2
1690
+ gx,gy,gw,gh=roi_now
1691
+ if gx<=cx<=gx+gw and gy<=cy<=gy+gh:
1692
+ new_items.append(ni)
1693
+ overlays=merge_update_overlays(overlays, new_items, roi_now, now_ts=time.time(),
1694
+ iou_th=MERGE_IOU_TH, center_dist_th=MERGE_CENTER_DIST)
1695
+ except queue.Empty:
1696
+ pass
1697
+
1698
+ # ---- GUIDE MODE tick (OP 모드에서만) ----
1699
+ if mode_now == MODE_OP:
1700
+ guide_tick(now, last_finger_xy if finger_is_fresh else None, overlays)
1701
+
1702
+ # ---- Prune & render ----
1703
+ now2=time.time()
1704
+ if (now2-last_prune) >= PRUNE_TIMEOUT_SEC: #prune 주기 변수화 (8.20)
1705
+ overlays = dedupe_same_text_overlays(overlays)
1706
+ active_roi = None
1707
+ if mode_now == MODE_OP:
1708
+ if finger_is_fresh and roi is not None:
1709
+ active_roi = roi
1710
+ elif (last_roi is not None) and (now2 <= last_roi_active_until):
1711
+ active_roi = last_roi
1712
+ overlays = prune_overlays(overlays, now2, active_roi=active_roi)
1713
+ last_prune=now2
1714
+
1715
+ if roi is not None and (mode_now == MODE_OP) and finger_is_fresh and last_finger_xy is not None:
1716
+ cv2.rectangle(frame_disp, (roi[0],roi[1]), (roi[0]+roi[2], roi[1]+roi[3]), (120,120,255), 1)
1717
+
1718
+ if last_finger_xy is not None:
1719
+ color=(0,255,0) if finger_src=="YOLO" else ((255,0,255) if finger_src=="KLT" else (160,160,160))
1720
+ cv2.circle(frame_disp, last_finger_xy, 9, color, -1)
1721
+ if finger_src=="KLT" and YOLO_DEBUG and klt_draw_pts is not None:
1722
+ for p in klt_draw_pts[:60]:
1723
+ cv2.circle(frame_disp, (int(p[0,0]), int(p[0,1])), 2, (180,0,180), -1)
1724
+
1725
+ # 안내 목표 강조(OP 모드에서만)
1726
+ if (mode_now == MODE_OP) and GUIDE_MODE and GUIDE_TARGET_ITEM is not None:
1727
+ highlight_guide_target(frame_disp, GUIDE_TARGET_ITEM)
1728
+
1729
+ # 오버레이 렌더링(OP 모드에서만)
1730
+ if mode_now == MODE_OP:
1731
+ draw_overlays(frame_disp, overlays, now2)
1732
+
1733
+ if SHOW_TTS_HINT:
1734
+ mode_txt = "MODE: OP" if mode_now == MODE_OP else f"MODE: INFO({int(INFO_PERIOD_SEC)}s)"
1735
+ l1=f"{mode_txt} | OCR: {'ON' if (OCR_ENABLED and mode_now==MODE_OP) else 'OFF'} | {OCR_ENGINE}"
1736
+ if OCR_EMA is not None and mode_now==MODE_OP: l1+=f" ~{int(OCR_EMA)} ms"
1737
+ l1+=f" TTS: {'ON' if TTS_ENABLE else 'OFF'}"
1738
+ src_txt=f"SRC: {finger_src}"
1739
+ if finger_src=="YOLO" and yolo_last_conf is not None: src_txt+=f" conf={yolo_last_conf:.2f}"
1740
+ if finger_src=="KLT" and klt_draw_pts is not None: src_txt+=f" klt_pts={len(klt_draw_pts)}"
1741
+ age_ms=int((now - finger_last_seen)*1000.0) if last_finger_xy is not None else -1
1742
+ if age_ms>=0: src_txt+=f" age={age_ms} ms"
1743
+ if yolo_box_count is not None: src_txt+=f" boxes={yolo_box_count}"
1744
+ roi_txt=f"ROI: {ROI_W}x{ROI_H} ([ ] width ; ' height)"
1745
+
1746
+ # <<< CHANGED: 현재 target 없더라도 마지막 발화 문구를 HUD에 유지
1747
+ say_txt = (tts_current_display.strip() or tts_last_spoken_text.strip())
1748
+ say_line=f"SAY: {say_txt}" if say_txt else "SAY: (none)"
1749
+ if tts_current_note: say_line+=f" [{tts_current_note}]"
1750
+
1751
+ guide_txt = f"GUIDE: {'ON' if (mode_now == MODE_OP and GUIDE_MODE) else 'OFF'}"
1752
+ if (mode_now == MODE_OP) and GUIDE_TARGET:
1753
+ guide_txt += f" target='{GUIDE_TARGET}'"
1754
+ if (mode_now == MODE_OP) and GUIDE_TARGET_ITEM is not None:
1755
+ cx, cy = map(int, _overlay_center(GUIDE_TARGET_ITEM))
1756
+ guide_txt += f" tgt@({cx},{cy})"
1757
+
1758
+ lines=[guide_txt, l1, src_txt, roi_txt, say_line]
1759
+
1760
+ img_rgb=cv2.cvtColor(frame_disp, cv2.COLOR_BGR2RGB)
1761
+ pil=Image.fromarray(img_rgb); draw=ImageDraw.Draw(pil)
1762
+ font_path=None
1763
+ for p in [r"C:\Windows\Fonts\malgun.ttf", r"C:\Windows\Fonts\NanumGothic.ttf",
1764
+ r"C:\Windows\Fonts\NotoSansCJKkr-Regular.otf",
1765
+ "/usr/share/fonts/truetype/noto/NotoSansCJKkr-Regular.ttc"]:
1766
+ if os.path.isfile(p): font_path=p; break
1767
+ font=ImageFont.truetype(font_path, 22) if font_path else ImageFont.load_default()
1768
+
1769
+ pad_x,pad_y,gap=10,8,4
1770
+ widths=[draw.textlength(s, font=font) for s in lines]
1771
+ tw=int(max(widths)) if widths else 0; lh=24
1772
+ th=lh*len(lines)+(len(lines)-1)*gap
1773
+ x0,y0=8,6
1774
+ bg=Image.new("RGBA",(tw+pad_x*2, th+pad_y*2),(0,0,0,180))
1775
+ pil.paste(bg,(x0,y0),bg)
1776
+ y=y0+pad_y
1777
+ for s in lines:
1778
+ draw.text((x0+pad_x,y), s, font=font, fill=(255,255,255), stroke_width=2, stroke_fill=(0,0,0))
1779
+ y+=lh+gap
1780
+ frame_disp[:]=cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
1781
+
1782
+ disp_scale=min(1.0, DISPLAY_MAX_W/float(W))
1783
+ vis=frame_disp if disp_scale==1.0 else cv2.resize(frame_disp, None, fx=disp_scale, fy=disp_scale, interpolation=cv2.INTER_AREA)
1784
+ cv2.imshow(WINDOW_NAME, vis)
1785
+
1786
+ key=cv2.waitKey(1)&0xFF
1787
+ if key==ord('q'): break
1788
+ elif key==ord('o'):
1789
+ OCR_ENABLED = not OCR_ENABLED
1790
+ drain_queue(task_q)
1791
+ last_ocr_time = 0.0 if OCR_ENABLED else time.time()
1792
+ print(f"[OCR] {'ENABLED' if OCR_ENABLED else 'DISABLED'}")
1793
+ elif key==ord('t'):
1794
+ SHOW_TTS_HINT = not SHOW_TTS_HINT
1795
+ print(f"[HUD] {'ON' if SHOW_TTS_HINT else 'OFF'}")
1796
+ elif key==ord('s'):
1797
+ TTS_ENABLE = not TTS_ENABLE
1798
+ set_tts_target(None, note="")
1799
+ print(f"[TTS] {'ENABLED' if TTS_ENABLE else 'DISABLED'}")
1800
+ elif key==ord('y'):
1801
+ YOLO_SHOW_INPUT = not YOLO_SHOW_INPUT
1802
+ if not YOLO_SHOW_INPUT:
1803
+ try: cv2.destroyWindow(YOLO_INPUT_WIN)
1804
+ except: pass
1805
+ print(f"[YOLO] INPUT PREVIEW {'ON' if YOLO_SHOW_INPUT else 'OFF'}")
1806
+ elif key==ord('p'):
1807
+ if yolo_last_in is not None:
1808
+ os.makedirs('yolo_inputs', exist_ok=True)
1809
+ fname=time.strftime("yolo_inputs/%Y%m%d_%H%M%S.png")
1810
+ cv2.imwrite(fname, yolo_last_in)
1811
+ print(f"[YOLO] saved input preview -> {fname}")
1812
+ else:
1813
+ print("[YOLO] no input to save yet")
1814
+ # --- ROI 크기 조절 ---
1815
+ elif key==ord('['): # width -
1816
+ ROI_W=max(MIN_ROI_W, ROI_W-40)
1817
+ elif key==ord(']'): # width +
1818
+ ROI_W=min(W, ROI_W+40)
1819
+ elif key==ord(';'): # height -
1820
+ ROI_H=max(MIN_ROI_H, ROI_H-30)
1821
+ elif key==ord("'"): # height +
1822
+ ROI_H=min(H, ROI_H+30)
1823
+ elif key==ord('r'): # reset ROI
1824
+ ROI_W, ROI_H = 420, 420
1825
+ print("[ROI] reset to 420x420")
1826
+
1827
+ # ---- 모드 전환 키 ---- ---- 모드 전환 키 ----
1828
+ elif key == ord('1'):
1829
+ _enter_op_mode()
1830
+ print("[MODE] OP")
1831
+
1832
+ elif key == ord('2'):
1833
+ _enter_info_mode()
1834
+ print("[MODE] INFO")
1835
+
1836
+ elif key == ord('3'):
1837
+ _enter_guide_mode()
1838
+ print("[MODE] GUIDE")
1839
+
1840
+ # ---- GUIDE mode keys ----
1841
+ elif key == ord('c'):
1842
+ GUIDE_TARGET = None
1843
+ GUIDE_TARGET_ITEM = None
1844
+ set_tts_target("목표를 취소했습니다.", note="guide cancel")
1845
+ print("[GUIDE] target cleared")
1846
+
1847
+ elif key == ord('v'):
1848
+ if mode_state == MODE_GUIDE:
1849
+ text = stt_listen_once(timeout=4, phrase_time_limit=4) if (USE_STT and _STT_OK) else None
1850
+ if text:
1851
+ print(f"[STT] heard: {text}")
1852
+ set_guide_target_from_text(text)
1853
+ else:
1854
+ print("[STT] no text")
1855
+ else:
1856
+ set_tts_target("먼저 3번을 눌러 안내 모드를 켜 주세요.", note="guide")
1857
+
1858
+ elif key == ord('f'):
1859
+ if mode_state == MODE_GUIDE:
1860
+ try:
1861
+ print("\n[GUIDE] 입력 예시: '세탁', '건조맞춤' ...")
1862
+ user_in = input("[GUIDE] 목표 단어 입력: ").strip()
1863
+ if user_in:
1864
+ set_guide_target_from_text(user_in)
1865
+ except Exception:
1866
+ pass
1867
+ else:
1868
+ set_tts_target("먼저 3번을 눌러 안내 모드를 켜 주세요.", note="guide")
1869
+
1870
+
1871
+ frame_idx+=1
1872
+ prev_gray_full=gray.copy()
1873
+ prev_gray_klt=gray_klt.copy()
1874
+
1875
+ # cleanup
1876
+ task_q.put(None)
1877
+ _info_stop.set()
1878
+ if TTS_ENABLE and 'tts' in globals() and tts:
1879
+ tts_stop.set()
1880
+ # tts_q.put(None) 삭제 (8.20)
1881
+ try: tts.close()
1882
+ except Exception: pass
1883
+ yolo_stop.set()
1884
+ cap.release()
1885
+ cv2.destroyAllWindows()
tts_reader.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_reader.py — import해서 쓰는 모듈 버전
2
+ import os, time, threading, queue, hashlib
3
+ from typing import Optional, Iterable
4
+ import pygame
5
+ from google.cloud import texttospeech
6
+
7
+
8
+ def _is_korean(s: str) -> bool:
9
+ return any('가' <= ch <= '힣' for ch in (s or ""))
10
+
11
+
12
+ class TTSReader:
13
+ """
14
+ - say(text): 비동기 합성+재생 (메인 루프 non-blocking)
15
+ - 같은 문구 과도 반복 방지(cooldown_sec)
16
+ - 텍스트별 mp3 캐시(tts_cache/)로 재사용
17
+ - 한/영 자동 보이스 선택
18
+ - ignore/min_len로 노이즈 필터 가능
19
+ - credentials_path를 넘기지 않으면 GOOGLE_APPLICATION_CREDENTIALS 환경변수 사용
20
+ """
21
+ def __init__(
22
+ self,
23
+ *,
24
+ credentials_path: Optional[str] = None,
25
+ cache_dir: str = "tts_cache",
26
+ cooldown_sec: float = 1.2,
27
+ speaking_rate: float = 1.05,
28
+ pitch: float = 0.0,
29
+ ko_voice: str = "ko-KR-Standard-A",
30
+ en_voice: str = "en-US-Standard-C",
31
+ min_len: int = 2,
32
+ ignore: Optional[Iterable[str]] = None,
33
+ ):
34
+ # 인증
35
+ if credentials_path:
36
+ self.client = texttospeech.TextToSpeechClient.from_service_account_file(credentials_path)
37
+ else:
38
+ self.client = texttospeech.TextToSpeechClient()
39
+
40
+ # 기본 필터
41
+ self.ignore = set(["", None, "None", "hand not detected", "hand detected, but ocr doesn't exist"])
42
+ if ignore:
43
+ self.ignore |= set(ignore)
44
+ self.min_len = min_len
45
+
46
+ # 보이스/오디오 설정
47
+ self.ko_voice = ko_voice
48
+ self.en_voice = en_voice
49
+ self.speaking_rate = speaking_rate
50
+ self.pitch = pitch
51
+ self.cooldown_sec = cooldown_sec
52
+
53
+ # 캐시
54
+ self.cache_dir = cache_dir
55
+ os.makedirs(self.cache_dir, exist_ok=True)
56
+
57
+ # 상태
58
+ self.last_text = ""
59
+ self.last_time = 0.0
60
+ self._running = True
61
+
62
+ # 재생 스레드
63
+ self.q = queue.Queue()
64
+ if not pygame.mixer.get_init():
65
+ pygame.mixer.init()
66
+ target_fn = getattr(self, '_worker', None)
67
+ if target_fn is None:
68
+ # 안전장치: 동일 로직의 임시 워커 생성
69
+ def target_fn():
70
+ while self._running:
71
+ text = self.q.get()
72
+ if text is None:
73
+ break
74
+ try:
75
+ path = self._synth_if_needed(text)
76
+ self._play(path)
77
+ except Exception as e:
78
+ print(f"[TTS] error: {e}")
79
+ self.worker = threading.Thread(target=target_fn, daemon=True)
80
+ self.worker.start()
81
+
82
+ # 컨텍스트 매니저 지원 (선택)
83
+ def __enter__(self):
84
+ return self
85
+ def __exit__(self, exc_type, exc, tb):
86
+ self.close()
87
+
88
+ def close(self):
89
+ """앱 종료 시 호출(선택)."""
90
+ self._running = False
91
+ self.q.put(None)
92
+ try:
93
+ self.worker.join(timeout=2.0)
94
+ except Exception:
95
+ pass
96
+ # pygame.mixer.quit() # 앱 전체에서 mixer 공유 시 보통 유지
97
+
98
+ # ---------- public API ----------
99
+ def say(self, text: Optional[str]) -> bool:
100
+ """
101
+ 텍스트를 읽도록 큐에 추가. 스킵되면 False, 큐에 들어가면 True.
102
+ 디바운스/필터/길이 조건을 통과해야 읽음.
103
+ """
104
+ text = (text or "").strip()
105
+ # 추가 1: 한국어일 때만 읽기
106
+ if not _is_korean(text):
107
+ return False
108
+
109
+ if not text or text in self.ignore or len(text) < self.min_len:
110
+ return False
111
+
112
+ now = time.time()
113
+ if text == self.last_text and (now - self.last_time) < self.cooldown_sec:
114
+ return False
115
+
116
+ self.last_text = text
117
+ self.last_time = now
118
+ self.q.put(text)
119
+ return True
120
+
121
+ def say_if_close(self, text: Optional[str], distance: float, threshold: float = 100.0) -> bool:
122
+ """
123
+ 손가락-텍스트 거리가 threshold보다 가까울 때만 읽고 싶을 때 사용.
124
+ """
125
+ if distance is None or distance >= threshold:
126
+ return False
127
+ return self.say(text)
128
+ # 추가 2: 큐 비우기
129
+ def clear_queue(self):
130
+ """큐에 대기 중인 모든 TTS 요청을 비웁니다."""
131
+ with self.q.mutex:
132
+ self.q.queue.clear()
133
+
134
+ # ---------- internals ----------
135
+ def _voice(self, text: str):
136
+ if _is_korean(text):
137
+ return texttospeech.VoiceSelectionParams(language_code="ko-KR", name=self.ko_voice)
138
+ return texttospeech.VoiceSelectionParams(language_code="en-US", name=self.en_voice)
139
+
140
+ def _audio_cfg(self):
141
+ return texttospeech.AudioConfig(
142
+ audio_encoding=texttospeech.AudioEncoding.MP3,
143
+ speaking_rate=self.speaking_rate,
144
+ pitch=self.pitch,
145
+ )
146
+
147
+ def _cache_path(self, text: str) -> str:
148
+ h = hashlib.sha1(text.encode("utf-8")).hexdigest()
149
+ return os.path.join(self.cache_dir, f"{h}.mp3")
150
+
151
+ def _synth_if_needed(self, text: str) -> str:
152
+ path = self._cache_path(text)
153
+ if not os.path.exists(path):
154
+ req = texttospeech.SynthesisInput(text=text)
155
+ resp = self.client.synthesize_speech(input=req, voice=self._voice(text), audio_config=self._audio_cfg())
156
+ with open(path, "wb") as f:
157
+ f.write(resp.audio_content)
158
+ return path
159
+
160
+ def _play(self, path: str):
161
+ pygame.mixer.music.load(path)
162
+ pygame.mixer.music.play()
163
+ while pygame.mixer.music.get_busy() and self._running:
164
+ time.sleep(0.03)
165
+
166
+ def _worker(self):
167
+ while self._running:
168
+ text = self.q.get()
169
+ if text is None:
170
+ break
171
+ try:
172
+ path = self._synth_if_needed(text)
173
+ self._play(path)
174
+ except Exception as e:
175
+ print(f"[TTS] error: {e}")
176
+
177
+ def stop(self):
178
+ try:
179
+ import pygame
180
+ pygame.mixer.music.stop()
181
+ except Exception:
182
+ pass
183
+
184
+ def cancel(self):
185
+ try: self.stop()
186
+ except Exception: pass
187
+
188
+ def flush(self):
189
+ try: self.stop()
190
+ except Exception: pass
191
+
192
+ def is_busy(self):
193
+ try:
194
+ import pygame
195
+ return pygame.mixer.music.get_busy()
196
+ except Exception:
197
+ return False