Hyungseoky commited on
Commit
4efdf15
ยท
verified ยท
1 Parent(s): 0bd2528

Upload 10 files

Browse files
nu/0.ingest_daily_LLS.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ingest_daily_lls_simple.py
2
+ import os
3
+ import sys
4
+ import warnings
5
+ import numpy as np
6
+ import pandas as pd
7
+ import urllib3
8
+ from datetime import datetime, timedelta
9
+ import trino
10
+ from trino.auth import BasicAuthentication
11
+
12
+ warnings.filterwarnings("ignore")
13
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
14
+
15
+ # ์ถœ๋ ฅ ๋””๋ ‰ํ„ฐ๋ฆฌ ์ƒ์„ฑ
16
+ os.makedirs("./daily", exist_ok=True)
17
+ sys.path.append(os.getcwd())
18
+
19
+
20
+ # ============================================================
21
+ # 1. ์„ค์ • ์ •๋ณด
22
+ # ============================================================
23
+ conn_info = {
24
+ 'host': "aidp-trino-analysis.sksiltron.co.kr",
25
+ 'port': 31085,
26
+ 'catalog': "iceberg",
27
+ 'schema': "ibg_lake"
28
+ }
29
+
30
+ login_info = {
31
+ 'user': '257285',
32
+ 'pw': 'tjrdlA1!',
33
+ }
34
+
35
+ # ๋ฐ์ดํ„ฐ ์ถ”์ถœ ์„ค์ •
36
+ data_info = {
37
+ 'wafer_type': 'EPI',
38
+ }
39
+
40
+
41
+ # ๊ธฐ๊ฐ„ ์„ค์ •: 20240101 \~ ์˜ค๋Š˜
42
+ START_DATE = "20240101"
43
+ END_DATE = datetime.now().strftime("%Y%m%d")
44
+
45
+
46
+ # ============================================================
47
+ # 2. Trino ์—ฐ๊ฒฐ
48
+ # ============================================================
49
+ def get_trino_connection():
50
+ return trino.dbapi.connect(
51
+ host=conn_info['host'],
52
+ port=conn_info['port'],
53
+ user=login_info['user'],
54
+ catalog=conn_info['catalog'],
55
+ schema=conn_info["schema"],
56
+ http_scheme="https",
57
+ auth=BasicAuthentication(login_info['user'], login_info['pw']),
58
+ verify=False
59
+ )
60
+
61
+ # ============================================================
62
+ # 3. ์ฟผ๋ฆฌ ์‹คํ–‰
63
+ # ============================================================
64
+ def make_df(conn, query):
65
+ cursor = conn.cursor()
66
+ cursor.execute(query)
67
+ columns = [col[0] for col in cursor.description]
68
+ results = cursor.fetchall()
69
+ cursor.close()
70
+ return pd.DataFrame(results, columns=columns)
71
+
72
+ # ============================================================
73
+ # 4. roughbin_no ๋งคํ•‘ ํ•จ์ˆ˜ (์ „์ฒ˜๋ฆฌ์šฉ)
74
+ # ============================================================
75
+ def map_roughbin_no(roughbin):
76
+ roughbin_mapping = {
77
+ 0: 'LPD', 100: 'LPD-N', 110: 'Micro-Scratch', 111: 'Void', 115: 'PID',
78
+ 120: 'LPD-E', 130: 'LPD-S', 140: 'LLPD', 141: 'Air Pocket', 150: 'DIC-Unique',
79
+ 160: 'Stain', 170: 'COP', 200: 'Cluster Area', 205: 'Extended Defects',
80
+ 210: 'Scratch', 220: 'Slipline', 230: 'Line', 231: 'Area', 233: 'Radial',
81
+ 234: 'Ring', 512: 'Residue', 520: 'Boat Mark', 902: 'Streak', 999: 'Nuisance',
82
+ 990: 'LPD Nuisance', 991: 'PPD Nuisance', 501: 'Haze Slipline', 502: 'Hazeline',
83
+ 600: 'Grid', 700: 'ROI', 800: 'X Section'
84
+ }
85
+ if pd.isna(roughbin):
86
+ return None
87
+ try:
88
+ roughbin = int(roughbin)
89
+ except:
90
+ return None
91
+ if roughbin in roughbin_mapping:
92
+ return roughbin_mapping[roughbin]
93
+ if 541 <= roughbin <= 548:
94
+ return 'Haze Slipline'
95
+ if 531 <= roughbin <= 538:
96
+ return 'Hazeline'
97
+ if 601 <= roughbin <= 609:
98
+ return 'Grid'
99
+ if 701 <= roughbin <= 709:
100
+ return 'ROI'
101
+ if 801 <= roughbin <= 809:
102
+ return 'X Section'
103
+ return 'Unknown'
104
+
105
+
106
+ # ============================================================
107
+ # 5. ์ผ์ž๋ณ„ ์ฟผ๋ฆฌ ํ…œํ”Œ๋ฆฟ (์šด์˜ ๊ธฐ์ค€ 07:00 ๋ฐ˜์˜)
108
+ # ============================================================
109
+ LLS_DAILY_QUERY = """
110
+ SELECT
111
+ B.SUBLOT_ID, B.CAR_ID, B.SCAN_TIME, B.EQP_ID AS EQP_ID_8210, B.EQP_ID_8030,
112
+ B.ROUTE_ID, B.PROD_ID, B.HIS_REGIST_DTTM AS HIS_REGIST_DTTM_8030,
113
+ HIS_REGIST_DTTM_Pcounter, B.WAF_ID, B.DEFECT_CNT, B.OPE_ID,
114
+ A.coor_x, A.coor_y, A.d_size, A.test_no, A.roughbin_no
115
+ FROM
116
+ iceberg.ibg_lake.DAS_DEFECT A,
117
+ (
118
+ SELECT
119
+ B.HIS_REGIST_DTTM AS HIS_REGIST_DTTM_Pcounter,
120
+ C.SUBLOT_ID, B.WAF_ID, B.SCANINDEX, B.EQP_ID, C.EQP_ID AS EQP_ID_8030,
121
+ B.PT_D, C.BASE_DT, C.CAR_ID, C.SLOT_NO, B.RECIP_ID, B.DEFECT_CNT,
122
+ B.PROD_ID, B.ROUTE_ID, C.HIS_REGIST_DTTM, B.SCAN_TIME, B.OPE_ID
123
+ FROM
124
+ iceberg.ibg_lake.das_inspection B,
125
+ (
126
+ SELECT
127
+ SUBLOT_ID, WAF_ID, EQP_ID, HIS_REGIST_DTTM, CAR_ID, SLOT_NO, BASE_DT
128
+ FROM
129
+ iceberg.ibg_lake.odb_dwaf_ope_his
130
+ WHERE 1=1
131
+ AND BASE_DT BETWEEN '{search_min_date}' AND '{search_max_date}'
132
+ AND OPE_ID = '8030'
133
+ AND SUBLOT_ID NOT LIKE '7V%'
134
+ AND SUBLOT_ID NOT LIKE '7G%'
135
+ AND HIS_CAT = 'OC'
136
+ ) C
137
+ WHERE 1=1
138
+ AND C.WAF_ID = B.WAF_ID
139
+ AND B.OPE_ID = '8210'
140
+ AND (
141
+ B.ROUTE_ID LIKE 'WF7EP%' OR
142
+ B.ROUTE_ID LIKE '%RW%' OR
143
+ B.ROUTE_ID = 'SUB-LLS-10EA' OR
144
+ B.ROUTE_ID = 'SUB-LLS-T-OV' OR
145
+ B.ROUTE_ID = 'SUB-SLIP-LLS'
146
+ )
147
+ ORDER BY B.SCAN_TIME DESC
148
+ ) B
149
+ WHERE 1=1
150
+ AND A.SCANINDEX = B.SCANINDEX
151
+ AND A.TEST_NO IN (1, 6)
152
+ AND B.HIS_REGIST_DTTM >= '{start_timestamp}'
153
+ AND B.HIS_REGIST_DTTM < '{end_timestamp}'
154
+ """
155
+
156
+
157
+ # ============================================================
158
+ # 6. ์ผ์ž๋ณ„ ์ˆ˜์ง‘ ํ•จ์ˆ˜
159
+ # ============================================================
160
+ def ingest_single_day(target_date: str):
161
+ output_path = f"./daily/{target_date}.parquet"
162
+ if os.path.exists(output_path):
163
+ print(f"[SKIP] ์ด๋ฏธ ์กด์žฌํ•จ: {output_path}")
164
+ return True
165
+
166
+ # ์šด์˜ ๊ธฐ์ค€ ์‹œ๊ฐ„ ๊ณ„์‚ฐ (07:00 ๊ธฐ์ค€)
167
+ start_dt = datetime.strptime(target_date, "%Y%m%d") + timedelta(hours=7) # YYYYMMDD 07:00:00
168
+ end_dt = start_dt + timedelta(days=1) # YYYYMMDD+1 07:00:00
169
+
170
+ search_min_date = (start_dt - timedelta(days=1)).strftime("%Y%m%d") # BASE_DT ๊ฒ€์ƒ‰ ๋ฒ”์œ„ ํ™•์žฅ
171
+ search_max_date = end_dt.strftime("%Y%m%d")
172
+ start_timestamp = start_dt.strftime("%Y%m%d%H%M%S") # YYYYMMDD070000
173
+ end_timestamp = end_dt.strftime("%Y%m%d%H%M%S") # YYYYMMDD+1 070000
174
+
175
+ query = LLS_DAILY_QUERY.format(
176
+ search_min_date=search_min_date,
177
+ search_max_date=search_max_date,
178
+ start_timestamp=start_timestamp,
179
+ end_timestamp=end_timestamp
180
+ )
181
+
182
+ conn = None
183
+ try:
184
+ conn = get_trino_connection()
185
+ print(f"[{target_date}] ์ฟผ๋ฆฌ ์‹คํ–‰...")
186
+ df = make_df(conn, query)
187
+ print(f"[{target_date}] ์กฐํšŒ ์™„๋ฃŒ: {len(df):,}๊ฑด")
188
+
189
+ if len(df) == 0:
190
+ print(f"[{target_date}] ๋ฐ์ดํ„ฐ ์—†์Œ โ†’ ๋นˆ ํŒŒ์ผ ์ €์žฅ")
191
+ df = pd.DataFrame()
192
+
193
+ # ์ „์ฒ˜๋ฆฌ
194
+ df['HIS_REGIST_DTTM_8030'] = df['HIS_REGIST_DTTM_8030'].astype(str).str[:14]
195
+ df['HIS_REGIST_DTTM_Pcounter'] = df['HIS_REGIST_DTTM_Pcounter'].astype(str).str[:14]
196
+ df['d_size'] = pd.to_numeric(df['d_size'], errors='coerce')
197
+ df = df[~(df['DEFECT_CNT'] >= 9999)].copy()
198
+ df['coor_x'] = df['coor_x'] / 1000 - 150
199
+ df['coor_y'] = df['coor_y'] / 1000 - 150
200
+ df['coor_x'] = df['coor_x'].astype(float)
201
+ df['coor_y'] = df['coor_y'].astype(float)
202
+ df['ANGLE'] = (np.arctan2(df['coor_y'], df['coor_x']) / np.pi * 180 + 360) % 360
203
+ df['DISTANCE'] = np.sqrt(df['coor_x'] ** 2 + df['coor_y'] ** 2)
204
+ df['LLS ๊ตฌ๋ถ„'] = df['roughbin_no'].apply(map_roughbin_no)
205
+ df['Particle ๋ถ„ํฌ'] = np.select(
206
+ condlist=[df['DISTANCE'] <= 105, df['DISTANCE'] > 105],
207
+ choicelist=["๋ฉด๋‚ด", "Edge"],
208
+ default="๊ธฐํƒ€"
209
+ )
210
+
211
+ # d_size ํ•„ํ„ฐ
212
+ D_SIZE_MIN = 0.037
213
+ df = df[df['d_size'] >= D_SIZE_MIN].reset_index(drop=True)
214
+ print(f"[{target_date}] ์ „์ฒ˜๋ฆฌ ์™„๋ฃŒ: {len(df):,}๊ฑด")
215
+
216
+ # ์ €์žฅ
217
+ df.to_parquet(output_path, index=False)
218
+ print(f"[{target_date}] ์ €์žฅ ์™„๋ฃŒ: {output_path}")
219
+ return True
220
+
221
+ except Exception as e:
222
+ print(f"[ERROR] {target_date} ์‹คํŒจ: {str(e)}")
223
+ error_df = pd.DataFrame([{"date": target_date, "error": str(e)}])
224
+ error_df.to_parquet(f"../data/daily/daily/error_{target_date}.parquet", index=False)
225
+ return False
226
+ finally:
227
+ if conn:
228
+ conn.close()
229
+
230
+
231
+ # ============================================================
232
+ # 7. ์ „์ฒด ๊ธฐ๊ฐ„ ์‹คํ–‰
233
+ # ============================================================
234
+ def ingest_historical_range():
235
+ start_dt = datetime.strptime(START_DATE, "%Y%m%d")
236
+ end_dt = datetime.strptime(END_DATE, "%Y%m%d")
237
+ total_days = (end_dt - start_dt).days + 1
238
+
239
+ print(f"=== LLS ์ผ์ž๋ณ„ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ์‹œ์ž‘ ===")
240
+ print(f"๊ธฐ๊ฐ„: {START_DATE} ~ {END_DATE} ({total_days}์ผ)")
241
+ print(f"์šด์˜ ๊ธฐ์ค€: 07:00 ๊ธฐ์ค€ (ex. 20240101 = 2024-01-01 07:00:00 \~ 2024-01-02 06:59:59)")
242
+ print(f"์ €์žฅ ์œ„์น˜: ./daily/")
243
+
244
+ success_count = 0
245
+ fail_count = 0
246
+
247
+ for i in range(total_days):
248
+ target_date = (start_dt + timedelta(days=i)).strftime("%Y%m%d")
249
+ if ingest_single_day(target_date):
250
+ success_count += 1
251
+ else:
252
+ fail_count += 1
253
+
254
+ print(f"=== ์ˆ˜์ง‘ ์™„๋ฃŒ ===")
255
+ print(f"์„ฑ๊ณต: {success_count}, ์‹คํŒจ: {fail_count}, ์ด: {total_days}")
256
+
257
+
258
+ # ============================================================
259
+ # 8. ์‹คํ–‰
260
+ # ============================================================
261
+ # if __name__ == "__main__":
262
+ ingest_historical_range()
nu/README.md ADDED
@@ -0,0 +1,623 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLS ๊ฒฐํ•จ ํŒจํ„ด ์ž๋™ ๋ถ„์„ ์‹œ์Šคํ…œ
2
+
3
+ > ๋ฐ˜๋„์ฒด ์›จ์ดํผ LLS(Laser Light Scattering) ๊ฒ€์‚ฌ ๋ฐ์ดํ„ฐ์—์„œ
4
+ > **ํ™˜ํ˜• / ์„ ํ˜• / ๊ตฐ์ง‘** ๊ฒฐํ•จ ํŒจํ„ด์„ ์ž๋™ ๊ฒ€์ถœํ•˜๊ณ ,
5
+ > ๊ณต์ • ์„ค๋น„ ์ ‘์ด‰ ๊ฐ๋„์™€ ๋งค์นญํ•ด **์›์ธ ํ›„๋ณด ๋ถ€์œ„**๋ฅผ ์ถ”์ ํ•˜๋Š” ๋ถ„์„ ํŒŒ์ดํ”„๋ผ์ธ.
6
+
7
+ ---
8
+
9
+ ## ๋ชฉ์ฐจ
10
+ 1. [๊ฐœ์š” ๋ฐ ๋ชฉ์ ](#1-๊ฐœ์š”-๋ฐ-๋ชฉ์ )
11
+ 2. [์ „์ฒด ์•„ํ‚คํ…์ฒ˜](#2-์ „์ฒด-์•„ํ‚คํ…์ฒ˜)
12
+ 3. [๋ฐ์ดํ„ฐ ํ๋ฆ„](#3-๋ฐ์ดํ„ฐ-ํ๋ฆ„)
13
+ 4. [๋ชจ๋“ˆ๋ณ„ ์ƒ์„ธ](#4-๋ชจ๋“ˆ๋ณ„-์ƒ์„ธ)
14
+ 5. [์„ค์ • ํŒŒ์ผ](#5-์„ค์ •-ํŒŒ์ผ-lls_configjson)
15
+ 6. [์‚ฌ์šฉ๋ฒ•](#6-์‚ฌ์šฉ๋ฒ•)
16
+ 7. [์ถœ๋ ฅ ๊ตฌ์กฐ](#7-์ถœ๋ ฅ-๊ตฌ์กฐ)
17
+ 8. [ํŒจํ„ด ๋ถ„๋ฅ˜ ์•Œ๊ณ ๋ฆฌ์ฆ˜](#8-ํŒจํ„ด-๋ถ„๋ฅ˜-์•Œ๊ณ ๋ฆฌ์ฆ˜)
18
+ 9. [Contact ๋งคํ•‘ ๋กœ์ง](#9-contact-๋งคํ•‘-๋กœ์ง)
19
+ 10. [๊ฒ€์ฆ ๋ฐ ํ…Œ์ŠคํŠธ](#10-๊ฒ€์ฆ-๋ฐ-ํ…Œ์ŠคํŠธ)
20
+ 11. [ํ™•์žฅ/ํŠœ๋‹ ๊ฐ€์ด๋“œ](#11-ํ™•์žฅํŠœ๋‹-๊ฐ€์ด๋“œ)
21
+
22
+ ---
23
+
24
+ ## 1. ๊ฐœ์š” ๋ฐ ๋ชฉ์ 
25
+
26
+ ### ๋ฌธ์ œ ์ •์˜
27
+ ๋ฐ˜๋„์ฒด ์›จ์ดํผ๋Š” ๋‹ค์ˆ˜์˜ ๊ณต์ • ๋‹จ๊ณ„๋ฅผ ๊ฑฐ์น˜๋Š” ๋™์•ˆ ๋‹ค์–‘ํ•œ ์„ค๋น„ ๋ถ€์œ„์™€ ์ ‘์ด‰ํ•˜๋ฉฐ, ์ผ๋ถ€ ์ ‘์ด‰์€ ๊ฒฐํ•จ(particle, scratch ๋“ฑ)์„ ์œ ๋ฐœํ•ฉ๋‹ˆ๋‹ค. LLS ๊ฒ€์‚ฌ๊ธฐ๋Š” ์›จ์ดํผ ํ‘œ๋ฉด ๊ฒฐํ•จ์˜ ์ขŒํ‘œ/ํฌ๊ธฐ/ํƒ€์ž…์„ ์ธก์ •ํ•˜์ง€๋งŒ, **์–ด๋А ์„ค๋น„์˜ ์–ด๋А ๋ถ€์œ„๊ฐ€ ์›์ธ์ธ์ง€**๋Š” ์ž๋™์œผ๋กœ ์•Œ๋ ค์ฃผ์ง€ ์•Š์Šต๋‹ˆ๋‹ค.
28
+
29
+ ### ๋ณธ ์‹œ์Šคํ…œ์˜ ์—ญํ• 
30
+ 1. ์ผ์ž๋ณ„ LLS ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ๋ฅผ ์ž๋™ ์ˆ˜์ง‘ยท์ „์ฒ˜๋ฆฌ
31
+ 2. ๋™์ผ ์บ๋ฆฌ์–ด(CAR_ID) ๋˜๋Š” ๋™์ผ ์ผ์ž์˜ ๊ฒฐํ•จ์„ ํ†ตํ•ฉํ•˜์—ฌ **๊ณตํ†ต ํŒจํ„ด**์„ ๊ฒ€์ถœ
32
+ 3. ๊ฒ€์ถœ๋œ ํŒจํ„ด์˜ ์œ„์น˜(centroid)๋ฅผ ์‚ฌ์ „ ์ •์˜๋œ **์„ค๋น„ ์ ‘์ด‰ ๊ฐ๋„ ํ…Œ์ด๋ธ”**๊ณผ ๋งค์นญํ•˜์—ฌ ์›์ธ ํ›„๋ณด ๋ถ€์œ„ ์‚ฐ์ถœ
33
+ 4. ๊ฒฐ๊ณผ๋ฅผ CSV + ์›จ์ดํผ ๋งต ์ด๋ฏธ์ง€๋กœ ์‹œ๊ฐํ™”ํ•˜์—ฌ ์šด์˜์ž๊ฐ€ ๋น ๋ฅด๊ฒŒ ๊ฒ€ํ†  ๊ฐ€๋Šฅํ•˜๋„๋ก ์ถœ๋ ฅ
34
+
35
+ ### ํ•ต์‹ฌ ๊ฐ€์ •
36
+ - **๊ณตํ†ต ์œ„์น˜์— ๋ฐ˜๋ณต๋˜๋Š” ๊ฒฐํ•จ = ์˜๋ฏธ ์žˆ๋Š” ํŒจํ„ด**: ๋‹จ๋ฐœ์„ฑ ๊ฒฐํ•จ์€ ๋…ธ์ด์ฆˆ๋กœ ๊ฐ„์ฃผ.
37
+ - **๊ฒฐํ•จ ๋ฐœ์ƒ ์œ„์น˜๋Š” ์„ค๋น„ ์ ‘์ด‰ ์œ„์น˜์™€ ๊ฐ•ํ•œ ์ƒ๊ด€๊ด€๊ณ„**: edge ์˜์—ญ ์ ‘์ด‰๋ถ€์™€ ๊ฒฐํ•จ centroid์˜ ๊ธฐํ•˜ํ•™์  ๊ทผ์ ‘๋„๋กœ ํ›„๋ณด ์ถ”์ •.
38
+
39
+ ---
40
+
41
+ ## 2. ์ „์ฒด ์•„ํ‚คํ…์ฒ˜
42
+
43
+ ```
44
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
45
+ โ”‚ Trino DB (iceberg.ibg_lake) โ”‚
46
+ โ”‚ das_inspection / das_defect / odb_dwaf_ope_his โ”‚
47
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
48
+ โ”‚
49
+ โ–ผ โ‘  ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ (์ผ์ž๋ณ„)
50
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
51
+ โ”‚ 0.ingest_daily_LLS.py โ”‚
52
+ โ”‚ (Trino โ†’ parquet) โ”‚
53
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
54
+ โ”‚
55
+ โ–ผ
56
+ ./daily/YYYYMMDD.parquet
57
+ โ”‚
58
+ โ–ผ โ‘ก ํŒจํ„ด ๋ถ„์„ + Contact ๋งคํ•‘
59
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
60
+ โ”‚ pattern_analyzer.py โ”‚
61
+ โ”‚ LLSPatternAnalyzer โ”‚
62
+ โ”‚ โ”‚
63
+ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚
64
+ โ”‚ โ”‚ utils.WaferUtils โ”‚ โ”‚ โ† ์ „์ฒ˜๋ฆฌ/์‹œ๊ฐํ™”
65
+ โ”‚ โ”‚ pattern_detection. โ”‚ โ”‚ โ† ํŒจํ„ด ๋ถ„๋ฅ˜
66
+ โ”‚ โ”‚ PatternDetector โ”‚ โ”‚
67
+ โ”‚ โ”‚ contact_mapper. โ”‚ โ”‚ โ† ์„ค๋น„ ๋ถ€์œ„ ๋งคํ•‘
68
+ โ”‚ โ”‚ ContactMapper โ”‚ โ”‚
69
+ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚
70
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
71
+ โ”‚
72
+ โ–ผ โ‘ข ๊ฒฐ๊ณผ ์ถœ๋ ฅ
73
+ ./result_daily/
74
+ โ”œโ”€โ”€ by_cst/*.csv (Mode 2: CST ร— ์‹œ๊ฐ)
75
+ โ”œโ”€โ”€ daily_agg/*.csv (Mode 1: ์ผ์ž ํ†ตํ•ฉ)
76
+ โ”œโ”€โ”€ figures_by_cst/ (CST๋ณ„ wafer map)
77
+ โ”œโ”€โ”€ figures_daily/ (์ผ์ž๋ณ„ wafer map)
78
+ โ””โ”€โ”€ LLS_*_full_analysis.csv (ํ†ตํ•ฉ ๊ฒฐ๊ณผ)
79
+ ```
80
+
81
+ ### ๋ชจ๋“ˆ ์˜์กด์„ฑ ๊ทธ๋ž˜ํ”„
82
+
83
+ ```
84
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
85
+ โ”‚ lls_config.json โ”‚
86
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
87
+ โ”‚
88
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
89
+ โ”‚ โ–ผ โ”‚
90
+ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚
91
+ โ”‚ โ”‚ pattern_analyzer.py โ”‚ โ”‚
92
+ โ”‚ โ”‚ LLSPatternAnalyzer โ”‚ โ”‚
93
+ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚
94
+ โ”‚ โ”‚ โ”‚
95
+ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚
96
+ โ”‚ โ–ผ โ–ผ โ–ผ โ”‚
97
+ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚
98
+ โ”‚ โ”‚utils.pyโ”‚ โ”‚pattern_ โ”‚ โ”‚contact_mapper. โ”‚ โ”‚
99
+ โ”‚ โ”‚Wafer โ”‚ โ”‚ detection.py โ”‚ โ”‚py โ”‚ โ”‚
100
+ โ”‚ โ”‚Utils โ”‚ โ”‚PatternDetector โ”‚ โ”‚ContactMapper โ”‚ โ”‚
101
+ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚
102
+ โ”‚ โ”‚
103
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
104
+ โ–ผ
105
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
106
+ โ”‚ contact_angle.csv โ”‚
107
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
108
+ ```
109
+
110
+ ---
111
+
112
+ ## 3. ๋ฐ์ดํ„ฐ ํ๋ฆ„
113
+
114
+ ### Stage 1: ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ (`0.ingest_daily_LLS.py`)
115
+
116
+ | ๋‹จ๊ณ„ | ์ฒ˜๋ฆฌ |
117
+ |------|------|
118
+ | 1. Trino ์—ฐ๊ฒฐ | BasicAuth + HTTPS, verify=False |
119
+ | 2. ์šด์˜์ผ ์ •์˜ | 07:00 ๊ธฐ์ค€ (`YYYYMMDD 07:00:00` ~ `YYYYMMDD+1 06:59:59`) |
120
+ | 3. ์ฟผ๋ฆฌ ์‹คํ–‰ | OPE_ID 8030(๊ณต์ •) โ†” 8210(LLS ๊ฒ€์‚ฌ) JOIN, ROUTE_ID ํ•„ํ„ฐ๋ง |
121
+ | 4. ์ „์ฒ˜๋ฆฌ | ์ขŒํ‘œ ๋ณ€ํ™˜(`/1000 - 150`), d_size ํ•„ํ„ฐ(`>=0.037`), ANGLE/DISTANCE ๊ณ„์‚ฐ, roughbin ๋งคํ•‘, LLS ๊ตฌ๋ถ„, Particle ๋ถ„ํฌ ๋ผ๋ฒจ๋ง |
122
+ | 5. ์ €์žฅ | `./daily/YYYYMMDD.parquet` |
123
+
124
+ ### Stage 2: ํŒจํ„ด ๋ถ„์„ (`pattern_analyzer.py`)
125
+
126
+ ```
127
+ parquet ๋กœ๋“œ
128
+ โ”‚
129
+ โ–ผ
130
+ [for each date]
131
+ โ”‚
132
+ โ–ผ
133
+ ๋ชจ๋“œ ๋ถ„๊ธฐ
134
+ โ”‚
135
+ โ”œโ”€โ”€ Mode 2 (by_cst) โ”œโ”€โ”€ Mode 1 (daily aggregated)
136
+ โ”‚ [for each CAR_ID] โ”‚ ํ•˜๋ฃจ ์ „์ฒด ๊ฒฐํ•จ ํ†ตํ•ฉ
137
+ โ”‚ [for each HIS_REGIST_DTTM] โ”‚
138
+ โ”‚ โ”‚
139
+ โ–ผ โ–ผ
140
+ fine-grid ํ• ๋‹น (cell_size_mm) โ†โ”˜
141
+ โ”‚
142
+ โ–ผ
143
+ n1 ํ•„ํ„ฐ (cell๋‹น unique wafer โ‰ฅ n1_min_wafers)
144
+ โ”‚
145
+ โ–ผ
146
+ n2 ํ•„ํ„ฐ (cell๋‹น ๊ฒฐํ•จ ์ˆ˜ โ‰ฅ n2_min_cell_defects)
147
+ โ”‚
148
+ โ–ผ
149
+ PatternDetector.classify()
150
+ โ€ข HDBSCAN โ†’ DBSCAN fallback (1์ฐจ ํด๋Ÿฌ์Šคํ„ฐ๋ง)
151
+ โ€ข LOF outlier ์ œ๊ฑฐ (2์ฐจ)
152
+ โ€ข ํŒจํ„ด ํŒ์ •: ํ™˜ํ˜• โ†’ ์„ ํ˜• โ†’ ๊ตฐ์ง‘(์„œ๋ธŒ๋ถ„๋ฅ˜)
153
+ โ”‚
154
+ โ–ผ
155
+ ContactMapper.map_pattern()
156
+ โ€ข ํ™˜ํ˜• โ†’ Line ์ œ์™ธ ์ „์ฒด (๊ฐ๋„ ๋ฌด๊ด€)
157
+ โ€ข ์„ ํ˜• โ†’ Line ๋งˆํ‚น, |P.x - centroid_x|
158
+ โ€ข ๊ตฐ์ง‘ โ†’ Line ์ œ์™ธ, dist(P, centroid)
159
+ โ”‚
160
+ โ–ผ
161
+ record ์ƒ์„ฑ + CSV ์ €์žฅ + wafer map ์ด๋ฏธ์ง€ ์ƒ์„ฑ
162
+ ```
163
+
164
+ ---
165
+
166
+ ## 4. ๋ชจ๋“ˆ๋ณ„ ์ƒ์„ธ
167
+
168
+ ### 4.1 `utils.py` โ€” `WaferUtils`
169
+
170
+ ์›จ์ดํผ ๊ฒฐํ•จ ์ฒ˜๋ฆฌ ๊ณต์šฉ ์œ ํ‹ธ๋ฆฌํ‹ฐ (๋ชจ๋“  ๋ฉ”์„œ๋“œ static).
171
+
172
+ | ๋ฉ”์„œ๋“œ | ์—ญํ•  |
173
+ |--------|------|
174
+ | `setup_korean_font()` | matplotlib์— ํ•œ๊ธ€ ํฐํŠธ ๋“ฑ๋ก (Malgun Gothic ์šฐ์„ ) |
175
+ | `load_config(path)` | JSON ์„ค์ • ๋กœ๋“œ |
176
+ | `map_roughbin_no(rb)` | ๊ฒ€์‚ฌ๊ธฐ raw ์ฝ”๋“œ โ†’ ์šด์˜ ๊ฒฐํ•จ ๋ถ„๋ฅ˜๋ช… |
177
+ | `add_zone_labels(df, inner_radius)` | `{Inner\|Outer}_{์‹œ๊ณ„์œ„์น˜}` ๋ผ๋ฒจ ๋ถ€์—ฌ |
178
+ | `assign_fine_grid(df, cell_size_mm)` | ๊ฒฐํ•จ์„ ๊ฒฉ์ž cell์— ํ• ๋‹น |
179
+ | `get_cell_wafer_counts(df)` | cell๋ณ„ unique wafer/๊ฒฐํ•จ ์ˆ˜ ์ง‘๊ณ„ |
180
+ | `filter_by_cell_wafer_count(df, n1_min_wafer)` | n1 ํ•„ํ„ฐ ์ ์šฉ |
181
+ | `summarize_filtering_result(orig, filt)` | ํ•„ํ„ฐ ์ „ํ›„ ์š”์•ฝ ํ†ต๊ณ„ |
182
+ | `plot_wafer_map(...)` | ์›จ์ดํผ ๋งต ์‹œ๊ฐํ™” (์‚ฐ์ ๋„ + ๋™์‹ฌ์› + ์‹œ๊ณ„ ๊ทธ๋ฆฌ๋“œ + centroid ๋งˆํ‚น) |
183
+
184
+ **์ƒ์ˆ˜**:
185
+ - `ROUGHBIN_MAPPING`: roughbin_no โ†’ ๊ฒฐํ•จ๋ช… dict
186
+ - `CLOCK_LABELS`: `["12", "01", ..., "11"]`
187
+ - `PATTERN_COLORS`: ํŒจํ„ด๋ณ„ ์‹œ๊ฐํ™” ์ƒ‰์ƒ
188
+
189
+ ### 4.2 `pattern_detection.py` โ€” `PatternDetector`
190
+
191
+ LLS ๊ฒฐํ•จ ํŒจํ„ด ์ž๋™ ๋ถ„๋ฅ˜๊ธฐ. config๋ฅผ ์ฃผ์ž…๋ฐ›์•„ ์‚ฌ์šฉ.
192
+
193
+ **ํ•ต์‹ฌ API**: `classify(df) โ†’ (result_df, dominant_zone, pattern_list, centroid)`
194
+
195
+ **๋ถ„๋ฅ˜ ํŒŒ์ดํ”„๋ผ์ธ**:
196
+ 1. **1์ฐจ ํด๋Ÿฌ์Šคํ„ฐ๋ง** (`_cluster_hdbscan`) โ€” HDBSCAN์œผ๋กœ outlier(-1) ์ œ๊ฑฐ
197
+ - ๋ชจ๋‘ outlier๋ฉด DBSCAN fallback (`_cluster_dbscan_fallback`)
198
+ 2. **2์ฐจ outlier ์ œ๊ฑฐ** (`_apply_lof`) โ€” LOF๋กœ ์ง€์—ญ ๋ฐ€๋„ ๊ธฐ๋ฐ˜ ์ถ”๊ฐ€ ์ œ๊ฑฐ
199
+ 3. **ํŒจํ„ด ํŒ์ •** (์šฐ๏ฟฝ๏ฟฝ๏ฟฝ์ˆœ์œ„ ์ˆœ):
200
+ - **ํ™˜ํ˜•** (`_is_ring`): PCA ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€ โ†’ main r-band ์ถ”์ถœ โ†’ ๊ฐ๋„/sector ์ปค๋ฒ„๋ฆฌ์ง€ โ†’ ์› ํ”ผํŒ… RMSE โ†’ ์ค‘์‹ฌ ์›์  ๊ทผ์ ‘
201
+ - **์„ ํ˜•** (`_is_linear_set`): PCA eigenvalue ratio โ†’ ์ง์„  ํŽธ์ฐจ โ†’ gap ratio
202
+ - **๊ตฐ์ง‘** (`_classify_cluster_or_sub_linear`): DBSCAN ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ โ†’ compactness/PCA๋กœ ์žฌํŒ์ •
203
+ 4. **dominant_zone** (`_dominant_zone`) โ€” inlier ์ค‘ ๊ฐ€์žฅ ๋นˆ๋ฒˆํ•œ zone_label
204
+ 5. **centroid** (`_zone_centroid`) โ€” dominant zone ์ ๋“ค์˜ ํ‰๊ท 
205
+
206
+ ### 4.3 `contact_mapper.py` โ€” `ContactMapper`
207
+
208
+ ํŒจํ„ด centroid๋ฅผ contact_angle.csv์™€ ๋งค์นญ.
209
+
210
+ **ํ•ต์‹ฌ API**: `map_pattern(pattern, centroid_x, centroid_y) โ†’ DataFrame`
211
+
212
+ **์ขŒํ‘œ๊ณ„**: ๋ชจ๋“  ๊ฐ๋„๋Š” **์ˆ˜ํ•™(atan2) ๋ฐ˜์‹œ๊ณ„**, 0ยฐ=3์‹œ ๋ฐฉํ–ฅ.
213
+
214
+ `Contact_Angle ฮธ` โ†’ edge ์ขŒํ‘œ ๋ณ€ํ™˜: `P = (150ยทcos ฮธ, 150ยทsin ฮธ)`
215
+
216
+ | ํŒจํ„ด | ํ›„๋ณด ํ–‰ | ๊ฑฐ๋ฆฌ ์ •์˜ |
217
+ |------|---------|-----------|
218
+ | ์„ ํ˜• | `Defect_Direction == "Line"` | `\|P.x - centroid_x\|` |
219
+ | ๊ตฐ์ง‘ | `Defect_Direction != "Line"` | `โˆš((P.x-cx)ยฒ + (P.y-cy)ยฒ)` |
220
+ | ํ™˜ํ˜• | `Defect_Direction != "Line"` | ๊ฑฐ๋ฆฌ ๊ณ„์‚ฐ ์•ˆ ํ•จ (์ „์ฒด ๋ฐ˜ํ™˜) |
221
+
222
+ `tolerance_mm` ์ดํ•˜ ํ†ต๊ณผ ํ›„๋ณด๋งŒ ๊ฐ€๊นŒ์šด ์ˆœ ์ •๋ ฌ ๋ฐ˜ํ™˜.
223
+
224
+ ### 4.4 `pattern_analyzer.py` โ€” `LLSPatternAnalyzer`
225
+
226
+ ์œ„ 3๊ฐœ ํด๋ž˜์Šค๋ฅผ ์กฐํ•ฉํ•˜๋Š” ์ผ์ž๋ณ„ ์ผ๊ด„ ์ฒ˜๋ฆฌ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ดํ„ฐ.
227
+
228
+ **ํ•ต์‹ฌ API**: `run(mode) โ†’ DataFrame`
229
+
230
+ **๋‘ ๊ฐ€์ง€ ๋ชจ๋“œ**:
231
+ - `"by_cst"`: CAR_ID ร— HIS_REGIST_DTTM ๊ทธ๋ฃน๋ณ„ (์„ธ๋ฐ€)
232
+ - `"daily"`: ํ•˜๋ฃจ ์ „์ฒด ํ†ตํ•ฉ (ํŠธ๋ Œ๋“œ)
233
+
234
+ **Mode 1๋งŒ์˜ ํŠน์ง•**:
235
+ - `is_significant` ํ”Œ๋ž˜๊ทธ๋กœ ์œ ์˜/๋น„์œ ์˜ ๊ตฌ๋ถ„
236
+ - ๋ถ„๋ฅ˜ ์‹คํŒจํ•ด๋„ ํ•„ํ„ฐ๋ง๋œ ๊ฒฐํ•จ์€ `filtered_defects/*.parquet`์— ํ•ญ์ƒ ์ €์žฅ
237
+ - ์‹œ๊ฐํ™”๋Š” `significant/` vs `others/` ํด๋” ๋ถ„๋ฆฌ
238
+
239
+ ---
240
+
241
+ ## 5. ์„ค์ • ํŒŒ์ผ (`lls_config.json`)
242
+
243
+ ```jsonc
244
+ {
245
+ "preprocessing": {
246
+ "d_size_min": 0.037, // d_size ํ•„ํ„ฐ ์ตœ์†Œ๊ฐ’ (mm)
247
+ "inner_radius_mm": 105.0, // Inner/Outer zone ๊ฒฝ๊ณ„
248
+ "cell_size_mm": 3.0, // fine-grid cell ํฌ๊ธฐ
249
+ "n1_min_wafers": 2, // cell๋‹น ์ตœ์†Œ unique wafer ์ˆ˜
250
+ "n2_min_zone_defects": 3 // cell๋‹น ์ตœ์†Œ ๊ฒฐํ•จ ์ˆ˜
251
+ // (๊ตฌ๋ฒ„์ „ ํ˜ธํ™˜: n2_min_cell_defects๋„ ์ธ์‹)
252
+ },
253
+ "clustering": {
254
+ "min_cluster_size": 3, // HDBSCAN ์ตœ์†Œ ํด๋Ÿฌ์Šคํ„ฐ ํฌ๊ธฐ
255
+ "min_samples": 2,
256
+ "cluster_selection_method": "leaf",
257
+ "dbscan_eps": 20.0, // 1์ฐจ fallback DBSCAN eps
258
+ "cluster_dbscan_eps": 15.0 // ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ DBSCAN eps
259
+ },
260
+ "lof": {
261
+ "lof_min_points": 5, // LOF ์ ์šฉ ์ตœ์†Œ inlier ์ˆ˜
262
+ "lof_n_neighbors": 13,
263
+ "lof_contamination": 0.07
264
+ },
265
+ "cluster": {
266
+ "cluster_compactness_radius": 25.0 // ๊ตฐ์ง‘ compactness ํ•œ๊ณ„
267
+ },
268
+ "linear": {
269
+ "linear_pca_ratio_min": 2.5, // ์„ ํ˜• ํŒ์ • PCA โˆšeig_ratio ์ตœ์†Œ
270
+ "linear_max_deviation": 30.0, // ์ง์„  ํ‰๊ท  ํŽธ์ฐจ ํ•œ๊ณ„ (mm)
271
+ "linear_min_length": 5.0, // ์„ ํ˜• ์ตœ์†Œ ๊ธธ์ด (mm)
272
+ "linear_max_gap_ratio": 0.5,
273
+ "centroid_linear_min_length": 20.0,
274
+ "centroid_linear_pca_min": 2.0,
275
+ "centroid_linear_dev_max": 25.0
276
+ },
277
+ "ring": {
278
+ "ring_min_points": 10,
279
+ "ring_band_width": 3.0, // r-ํžˆ์Šคํ† ๊ทธ๋žจ bin ํญ (mm)
280
+ "ring_r_absolute_tolerance": 10.0,
281
+ "ring_min_angular_coverage": 200.0,
282
+ "ring_min_sectors": 7, // 12 sector ์ค‘ ์ตœ์†Œ ๋ช‡ ๊ฐœ
283
+ "ring_fit_rmse_max": 10
284
+ // (์„ ํƒ) "ring_pca_ratio_max": 2.5 // ํ™˜ํ˜• ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€ ์ž„๊ณ„
285
+ },
286
+ "misc": {
287
+ "min_points_for_clustering": 3
288
+ },
289
+ // (์„ ํƒ) Mode 1 ์œ ์˜ ํŒจํ„ด ํŒ์ •
290
+ "mode_daily": {
291
+ "min_defect_count": 30,
292
+ "min_wafer_count": 3,
293
+ "excluded_patterns": ["Others", "์ •์ƒ/๋ฏธ๋‹ฌ", "๋ฐ์ดํ„ฐ ์—†์Œ", "None"]
294
+ },
295
+ // (์„ ํƒ) Contact ๋งคํ•‘
296
+ "contact_mapping": {
297
+ "tolerance_mm": 30.0,
298
+ "top_n": 5
299
+ }
300
+ }
301
+ ```
302
+
303
+ ---
304
+
305
+ ## 6. ์‚ฌ์šฉ๋ฒ•
306
+
307
+ ### CLI ์‹คํ–‰
308
+
309
+ ```bash
310
+ # ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
311
+ python 0.ingest_daily_LLS.py
312
+
313
+ # ํŒจํ„ด ๋ถ„์„ (Mode 2: ๊ธฐ๋ณธ)
314
+ python pattern_analyzer.py
315
+ python pattern_analyzer.py by_cst
316
+
317
+ # ํŒจํ„ด ๋ถ„์„ (Mode 1: daily ํ†ตํ•ฉ)
318
+ python pattern_analyzer.py daily
319
+ ```
320
+
321
+ ### Python API
322
+
323
+ ```python
324
+ from pattern_analyzer import LLSPatternAnalyzer
325
+
326
+ analyzer = LLSPatternAnalyzer(
327
+ config_path="./lls_config.json",
328
+ daily_input_dir="./daily",
329
+ output_dir="./result_daily",
330
+ contact_csv="./contact_angle.csv",
331
+ )
332
+ df_daily = analyzer.run(mode="daily") # ์ผ์ž๋ณ„ ํ†ตํ•ฉ
333
+ df_by_cst = analyzer.run(mode="by_cst") # CST ร— ์Šค์บ”์‹œ๊ฐ
334
+ ```
335
+
336
+ ### ๋‹จ๋… ํด๋ž˜์Šค ์‚ฌ์šฉ
337
+
338
+ ```python
339
+ # ํŒจํ„ด ๋ถ„๋ฅ˜๋งŒ
340
+ from pattern_detection import PatternDetector
341
+ from utils import WaferUtils
342
+
343
+ config = WaferUtils.load_config("./lls_config.json")
344
+ detector = PatternDetector(config)
345
+ result_df, dominant_zone, pattern_list, centroid = detector.classify(df_sample)
346
+
347
+ # Contact ๋งคํ•‘๋งŒ
348
+ from contact_mapper import ContactMapper
349
+
350
+ cm = ContactMapper("./contact_angle.csv", tolerance_mm=30.0)
351
+ candidates = cm.map_pattern("๊ตฐ์ง‘", centroid_x=130, centroid_y=0)
352
+
353
+ # ์ „์ฒ˜๋ฆฌ๋งŒ
354
+ df_grid = WaferUtils.assign_fine_grid(df_raw, cell_size_mm=3.0)
355
+ df_zone = WaferUtils.add_zone_labels(df_grid, inner_radius=105.0)
356
+ df_filt = WaferUtils.filter_by_cell_wafer_count(df_zone, n1_min_wafer=2)
357
+ ```
358
+
359
+ ์ž์„ธํ•œ ์˜ˆ์ œ๋Š” `demo_LLS_pattern_analysis.ipynb` ์ฐธ์กฐ.
360
+
361
+ ---
362
+
363
+ ## 7. ์ถœ๋ ฅ ๊ตฌ์กฐ
364
+
365
+ ```
366
+ ./result_daily/
367
+ โ”œโ”€โ”€ by_cst/ # Mode 2 ์ผ์ž๋ณ„ CST ๊ฒฐ๊ณผ
368
+ โ”‚ โ””โ”€โ”€ YYYYMMDD_LLS_CST_analysis.csv
369
+ โ”‚
370
+ โ”œโ”€โ”€ daily_agg/ # Mode 1 ์ผ์ž๋ณ„ ํ†ตํ•ฉ ๊ฒฐ๊ณผ
371
+ โ”‚ โ”œโ”€โ”€ YYYYMMDD_LLS_daily_analysis.csv
372
+ โ”‚ โ””โ”€โ”€ filtered_defects/
373
+ โ”‚ โ””โ”€โ”€ YYYYMMDD_filtered.parquet # ํ•„ํ„ฐ ํ†ต๊ณผ ๊ฒฐํ•จ (๋ถ„๋ฅ˜ ๋ฌด๊ด€)
374
+ โ”‚
375
+ โ”œโ”€โ”€ figures_by_cst/ # Mode 2 ์‹œ๊ฐํ™”
376
+ โ”‚ โ””โ”€โ”€ YYYYMMDD/
377
+ โ”‚ โ””โ”€โ”€ {CST_ID}_{HIS_REGIST_DTTM}.jpg
378
+ โ”‚
379
+ โ”œโ”€โ”€ figures_daily/ # Mode 1 ์‹œ๊ฐํ™”
380
+ โ”‚ โ”œโ”€โ”€ significant/DAILY_YYYYMMDD.jpg # ์œ ์˜ ํŒจํ„ด
381
+ โ”‚ โ””โ”€โ”€ others/DAILY_YYYYMMDD.jpg # ๋น„์œ ์˜
382
+ โ”‚
383
+ โ”œโ”€โ”€ config_used/ # ์žฌํ˜„์„ฑ์šฉ config ๋ฐฑ์—…
384
+ โ”‚ โ””โ”€โ”€ YYYYMMDD_HHMMSS_config.json
385
+ โ”‚
386
+ โ”œโ”€โ”€ LLS_by_cst_full_analysis.csv # ์ „์ฒด ๊ธฐ๊ฐ„ ํ†ตํ•ฉ ๊ฒฐ๊ณผ (Mode 2)
387
+ โ””โ”€โ”€ LLS_daily_agg_full_analysis.csv # ์ „์ฒด ๊ธฐ๊ฐ„ ํ†ตํ•ฉ ๊ฒฐ๊ณผ (Mode 1)
388
+ ```
389
+
390
+ ### CSV ์ปฌ๋Ÿผ (record ๋‹จ์œ„)
391
+
392
+ | ์ปฌ๋Ÿผ | ์˜๋ฏธ |
393
+ |------|------|
394
+ | `status` | ์ฒ˜๋ฆฌ ์ƒํƒœ ("Success") |
395
+ | `mode` | "by_cst" / "daily" |
396
+ | `is_significant` | (Mode 1๋งŒ) ์œ ์˜ ํŒจํ„ด ์—ฌ๋ถ€ |
397
+ | `CST_ID` | ์บ๋ฆฌ์–ด ID (Mode 1์€ "ALL") |
398
+ | `HIS_REGIST_DTTM` | 8030 ๊ณต์ • ์‹œ๊ฐ (Mode 1์€ ์ผ์ž) |
399
+ | `EQP_NM_8030` | 8030 ๊ณต์ • ์„ค๋น„๋ช… (mode ๊ฐ’) |
400
+ | `analysis_date` | ๋ถ„์„ ์ผ์ž |
401
+ | `wafer_count` | ๊ทธ๋ฃน ๋‚ด unique wafer ์ˆ˜ |
402
+ | `defect_count` | ๋ถ„๋ฅ˜ ๋Œ€์ƒ ๊ฒฐํ•จ ์ˆ˜ |
403
+ | `overall_pattern` | ๊ฒ€์ถœ๋œ ํŒจํ„ด ("ํ™˜ํ˜•"/"์„ ํ˜•"/"๊ตฐ์ง‘"/"Others") |
404
+ | `overall_dominant_zone` | ์ฃผ์š” zone ๋ผ๋ฒจ |
405
+ | `main_centroid_x`, `_y` | ํŒจํ„ด ์ค‘์‹ฌ ์ขŒํ‘œ (mm) |
406
+ | `main_centroid_Angle` | ์ค‘์‹ฌ ๊ฐ๋„ (math ๋ฐ˜์‹œ๊ณ„, 0~360ยฐ) |
407
+ | `main_centroid_Distance` | ์›์ ์—์„œ ์ค‘์‹ฌ๊นŒ์ง€ ๊ฑฐ๋ฆฌ |
408
+ | `contact_candidate_count` | ๋งค์นญ๋œ ํ›„๋ณด ์ด ๊ฐœ์ˆ˜ |
409
+ | `contact_candidates` | "EQP_TYPE:Part \| ..." top-N ์š”์•ฝ |
410
+
411
+ ---
412
+
413
+ ## 8. ํŒจํ„ด ๋ถ„๋ฅ˜ ์•Œ๊ณ ๋ฆฌ์ฆ˜
414
+
415
+ ### 8.1 ์ „์ฒ˜๋ฆฌ (n1, n2 ํ•„ํ„ฐ)
416
+
417
+ ```
418
+ [์›๋ณธ ๊ฒฐํ•จ]
419
+ โ”‚
420
+ โ–ผ
421
+ fine-grid ํ• ๋‹น: ์ขŒํ‘œ โ†’ cell_id ("cx_cy" ํ˜•์‹)
422
+ โ”‚
423
+ โ–ผ
424
+ n1 ํ•„ํ„ฐ: ๊ฐ™์€ cell์—์„œ ๊ฒฐํ•จ์ด ๋ฐœ์ƒํ•œ wafer ์ˆ˜ โ‰ฅ n1_min_wafers
425
+ โ†ณ ์˜๋„: "์—ฌ๋Ÿฌ wafer์— ๊ณตํ†ต ๋ฐœ์ƒํ•œ ์œ„์น˜๋งŒ ์˜๋ฏธ ์žˆ๋‹ค"
426
+ โ”‚
427
+ โ–ผ
428
+ n2 ํ•„ํ„ฐ: cell๋‹น ๊ฒฐํ•จ ์ˆ˜ โ‰ฅ n2_min_cell_defects
429
+ โ†ณ ์˜๋„: "์ถฉ๋ถ„ํ•œ ๊ฒฐํ•จ์ด ์‘์ง‘๋œ cell๋งŒ ๋ถ„๋ฅ˜ ๋Œ€์ƒ"
430
+ ```
431
+
432
+ ### 8.2 ํŒจํ„ด ํŒ์ • ์šฐ์„ ์ˆœ์œ„
433
+
434
+ ```
435
+ โ”Œโ”€ HDBSCAN inlier ์ถ”์ถœ
436
+ โ”‚
437
+ โ–ผ
438
+ LOF๋กœ 2์ฐจ outlier ์ œ๊ฑฐ
439
+ โ”‚
440
+ โ–ผ
441
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
442
+ โ”‚ ํ™˜ํ˜• ๊ฒ€์‚ฌ โ”‚ (PCA ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€ โ†’ main band โ†’ ์› ํ”ผํŒ…)
443
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
444
+ โ”‚ โ”€yesโ”€โ†’ ["ํ™˜ํ˜•"], centroid=inlier ํ‰๊ท 
445
+ โ”‚
446
+ โ”‚ no
447
+ โ–ผ
448
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
449
+ โ”‚ ์„ ํ˜• ๊ฒ€์‚ฌ โ”‚ (์ „์ฒด inlier PCA + ํŽธ์ฐจ + gap)
450
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
451
+ โ”‚ โ”€yesโ”€โ†’ ["์„ ํ˜•"], centroid=dominant zone ํ‰๊ท 
452
+ โ”‚
453
+ โ”‚ no
454
+ โ–ผ
455
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
456
+ โ”‚ ๊ตฐ์ง‘ ๋ถ„์„ โ”‚ (DBSCAN ์„œ๋ธŒ โ†’ ์ค‘์‹ฌ์  ์ผ์ง์„ ? / ๊ฐ์ž ๋ถ„๋ฅ˜)
457
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
458
+ โ–ผ
459
+ ["๊ตฐ์ง‘"] ๋˜๋Š” sub-clusters ๋‹ค์ˆ˜๊ฒฐ
460
+ ```
461
+
462
+ ### 8.3 ํ™˜ํ˜• ๊ฒ€์ถœ โ€” 5๋‹จ๊ณ„ ๊ฒŒ์ดํŠธ
463
+
464
+ 1. **ํฌ์ธํŠธ ์ˆ˜**: `n >= ring_min_points`
465
+ 2. **PCA ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€**: `โˆš(eig_ratio) < ring_pca_ratio_max` (์›์  ํ†ต๊ณผ ์„ ํ˜• false-positive ๋ฐฉ์ง€)
466
+ 3. **r-band ์‘์ง‘**: top r-bin ์ ๋“ค์˜ `r.max - r.min โ‰ค ring_r_absolute_tolerance`
467
+ 4. **๊ฐ๋„ ์ปค๋ฒ„๋ฆฌ์ง€**: `circular_range >= ring_min_angular_coverage` AND `sector >= ring_min_sectors`
468
+ 5. **์› ํ”ผํŒ…**: `RMSE <= ring_fit_rmse_max` AND `์ค‘์‹ฌ์  ์›์  ๊ฑฐ๋ฆฌ <= 10mm`
469
+
470
+ ### 8.4 ์„ ํ˜• ๊ฒ€์ถœ
471
+
472
+ - PCA 1์ฐจ eigenvalue / 2์ฐจ eigenvalue ๋น„์œจ์˜ sqrt โ‰ฅ `linear_pca_ratio_min`
473
+ - ์ฃผ์ถ• ์ง๊ฐ๋ฐฉํ–ฅ ํ‰๊ท  ํŽธ์ฐจ โ‰ค `linear_max_deviation`
474
+ - ์ฃผ์ถ• ํˆฌ์˜ ํ›„ ์ตœ๋Œ€ gap / ์ „์ฒด ๊ธธ์ด โ‰ค `linear_max_gap_ratio`
475
+ - ์ „์ฒด ๊ธธ์ด โ‰ฅ `linear_min_length`
476
+
477
+ ### 8.5 ๊ตฐ์ง‘ ๊ฒ€์ถœ
478
+
479
+ - ํ™˜ํ˜•/์„ ํ˜• ๋ชจ๋‘ ์•„๋‹Œ ๊ฒฝ์šฐ default
480
+ - ์„œ๋ธŒ DBSCAN์œผ๋กœ ๋ถ„ํ• 
481
+ - ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ โ‰ฅ2 + ์ค‘์‹ฌ์ ๋“ค์ด ์ผ์ง์„  โ†’ ์„ ํ˜• ์žฌ๋ถ„๋ฅ˜
482
+ - ๊ทธ ์™ธ: ๊ฐ ๏ฟฝ๏ฟฝ๏ฟฝ๋ธŒ๋ฅผ ๊ตฐ์ง‘/์„ ํ˜•์œผ๋กœ ๋ผ๋ฒจ๋ง ํ›„ ๋‹ค์ˆ˜๊ฒฐ
483
+
484
+ ---
485
+
486
+ ## 9. Contact ๋งคํ•‘ ๋กœ์ง
487
+
488
+ ### 9.1 ์ขŒํ‘œ ๋ณ€ํ™˜
489
+
490
+ ```
491
+ Contact_Angle ฮธ (math ๋ฐ˜์‹œ๊ณ„ ๊ธฐ์ค€)
492
+ โ”‚
493
+ โ–ผ
494
+ edge ์ขŒํ‘œ: P = (150ยทcos ฮธ, 150ยทsin ฮธ)
495
+ ```
496
+
497
+ ์˜ˆ์‹œ:
498
+ - ฮธ=0ยฐ โ†’ P=(150, 0) (3์‹œ)
499
+ - ฮธ=90ยฐ โ†’ P=(0, 150) (12์‹œ)
500
+ - ฮธ=180ยฐ โ†’ P=(-150, 0) (9์‹œ)
501
+ - ฮธ=270ยฐ โ†’ P=(0, -150) (6์‹œ)
502
+
503
+ ### 9.2 ํŒจํ„ด๋ณ„ ๊ฑฐ๋ฆฌ
504
+
505
+ | ํŒจํ„ด | ๊ฑฐ๋ฆฌ | ์˜๋ฏธ |
506
+ |------|------|------|
507
+ | **์„ ํ˜•** | `\|P.x - centroid_x\|` | Y์ถ• ํ‰ํ–‰์„ ๊ณผ contact point์˜ ์ˆ˜ํ‰ ๊ฑฐ๋ฆฌ |
508
+ | **๊ตฐ์ง‘** | `โˆš((P.x-cx)ยฒ + (P.y-cy)ยฒ)` | centroid์™€ contact point์˜ 2D ๊ฑฐ๋ฆฌ |
509
+ | **ํ™˜ํ˜•** | ๊ณ„์‚ฐ ์•ˆ ํ•จ | ๊ฐ๋„ ๋ฌด๊ด€, ์ „์ฒด ํ›„๋ณด ๋ฐ˜ํ™˜ |
510
+
511
+ ### 9.3 ํ›„๋ณด ํ•„ํ„ฐ๋ง/์ •๋ ฌ
512
+
513
+ 1. tolerance ํ†ต๊ณผ: `distance_mm <= tolerance_mm`
514
+ 2. ๊ฐ€๊นŒ์šด ์ˆœ ์ •๋ ฌ
515
+ 3. top-N ์š”์•ฝ: `"EQP_TYPE:Part | EQP_TYPE:Part | ..."`
516
+
517
+ ### 9.4 `contact_angle.csv` ํ˜•์‹
518
+
519
+ | ์ปฌ๋Ÿผ | ๋‚ด์šฉ | ๋น„๊ณ  |
520
+ |------|------|------|
521
+ | `EQP_TYPE` | ์„ค๋น„ ํƒ€์ž… | EPI_REACTOR, EBIS, AVIS, SP3, Wafersight, Wafersight2 |
522
+ | `Part` | ์„ค๋น„ ๋‚ด ๋ถ€์œ„ | FI Robot, Aligner, Chuck, Edge Stage ๋“ฑ |
523
+ | `Contact_Angle` | ๊ฐ๋„ (0~360ยฐ) | math ๋ฐ˜์‹œ๊ณ„ ๊ธฐ์ค€ |
524
+ | `Defect_Direction` | "Line" ๋˜๋Š” ๊ณต๋ž€ | Line=์„ ํ˜• ๋งค์นญ์šฉ, ๊ณต๋ž€=๊ตฐ์ง‘/ํ™˜ํ˜•์šฉ |
525
+
526
+ ์›๋ณธ CSV 4๋ฒˆ์งธ ์ปฌ๋Ÿผ ํ—ค๋”๊ฐ€ ์ธ์ฝ”๋”ฉ ๊นจ์ง€๋Š” ๊ฒฝ์šฐ ์œ„์น˜ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฐ•์ œ rename.
527
+
528
+ ---
529
+
530
+ ## 10. ๊ฒ€์ฆ ๋ฐ ํ…Œ์ŠคํŠธ
531
+
532
+ ### 10.1 ํ•ฉ์„ฑ ๋ฐ์ดํ„ฐ ๊ฒ€์ฆ (`test_synthetic_7days.py`)
533
+
534
+ 7์ผ์น˜ ์˜๋„์  ํŒจํ„ด์„ ๊ฐ€์ง„ ํ•ฉ์„ฑ ๋ฐ์ดํ„ฐ๋กœ ๋ถ„์„๊ธฐ ๋™์ž‘ ํ™•์ธ.
535
+
536
+ | ์ผ์ž | ์˜๋„ ํŒจํ„ด | ๊ธฐ๋Œ€ contact ๋งค์นญ |
537
+ |------|----------|-------------------|
538
+ | 20260101 | ํ™˜ํ˜• r=85 | Line ์ œ์™ธ ์ „์ฒด |
539
+ | 20260102 | ์„ ํ˜• x=38 | EPI:FI Robot 75ยฐ, T/C Blade 75ยฐ ๋“ฑ (cosโ‰ˆ0.26) |
540
+ | 20260103 | ๊ตฐ์ง‘ (135, 0) | EPI:FI FOUP 0ยฐ, EBIS:Edge Stage 0ยฐ |
541
+ | 20260104 | ๊ตฐ์ง‘ (0, 140) | EBIS:Edge Stage 90ยฐ, Back Stage 90ยฐ |
542
+ | 20260105 | ๊ตฐ์ง‘ (-135, 0) | EPI:FI FOUP 180ยฐ, EBIS:Edge Stage 180ยฐ |
543
+ | 20260106 | ์„ ํ˜• x=-141 | AVIS:Flipper 158ยฐ/197ยฐ |
544
+ | 20260107 | ๊ตฐ์ง‘ (50, 50) | **๋งค์นญ ์—†์Œ** (negative test) |
545
+
546
+ ์‹คํ–‰:
547
+ ```bash
548
+ python test_synthetic_7days.py
549
+ ```
550
+
551
+ ### 10.2 ์‹ค์Šต ๋…ธํŠธ๋ถ (`demo_LLS_pattern_analysis.ipynb`)
552
+
553
+ ๊ฐ ํด๋ž˜์Šค ๋‹จ๋… ์‚ฌ์šฉ๋ฒ• + ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ ์‹œ์—ฐ.
554
+
555
+ ```bash
556
+ jupyter notebook demo_LLS_pattern_analysis.ipynb
557
+ ```
558
+
559
+ ---
560
+
561
+ ## 11. ํ™•์žฅ/ํŠœ๋‹ ๊ฐ€์ด๋“œ
562
+
563
+ ### 11.1 ์ž„๊ณ„์น˜ ํŠœ๋‹ ์ ˆ์ฐจ
564
+
565
+ 1. **ํ•ฉ์„ฑ ๋ฐ์ดํ„ฐ๋กœ baseline ํ™•๋ณด** โ€” `test_synthetic_7days.py` ๊ฒฐ๊ณผ๊ฐ€ ์˜๋„์™€ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธ
566
+ 2. **์‹ค์ œ ๋ฐ์ดํ„ฐ sample ๋ถ„์„** โ€” `LLS_by_cst_full_analysis.csv`์˜ ํŒจํ„ด ๋ถ„ํฌ ๊ฒ€ํ† 
567
+ 3. **์˜ค๋ถ„๋ฅ˜ ์ผ€์ด์Šค ๊ฒ€ํ† ** โ€” `figures_by_cst/`์—์„œ ์˜์‹ฌ ์‚ฌ๋ก€ ์‹œ๊ฐ ํ™•์ธ
568
+ 4. **config ์กฐ์ • ํ›„ ์žฌ๋ถ„์„** โ€” ํ•œ ๋ฒˆ์— ํ•˜๋‚˜์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋งŒ ๋ณ€๊ฒฝ ๊ถŒ์žฅ
569
+
570
+ ### 11.2 ์ž์ฃผ ์กฐ์ •ํ•˜๋Š” ํŒŒ๋ผ๋ฏธํ„ฐ
571
+
572
+ | ์ฆ์ƒ | ์กฐ์ • |
573
+ |------|------|
574
+ | ๋„ˆ๋ฌด ๋งŽ์€ noise๊ฐ€ inlier๋กœ ์žกํž˜ | `lof_contamination` โ†‘, `n1_min_wafers` โ†‘ |
575
+ | ํ™˜ํ˜• false positive (์„ ํ˜•์ด ํ™˜ํ˜•์œผ๋กœ) | `ring_pca_ratio_max` โ†“ |
576
+ | ์„ ํ˜• underdetect | `linear_pca_ratio_min` โ†“, `linear_max_deviation` โ†‘ |
577
+ | ๊ตฐ์ง‘์ด ๋„ˆ๋ฌด ์ž˜๊ฒŒ ์ชผ๊ฐœ์ง | `cluster_dbscan_eps` โ†‘ |
578
+ | Contact ํ›„๋ณด๊ฐ€ ๋„ˆ๋ฌด ๋งŽ์Œ | `contact_mapping.tolerance_mm` โ†“ |
579
+ | Contact ํ›„๋ณด๊ฐ€ ์•ˆ ์žกํž˜ | `contact_mapping.tolerance_mm` โ†‘ |
580
+
581
+ ### 11.3 ์ƒˆ ํŒจํ„ด ํƒ€์ž… ์ถ”๊ฐ€
582
+
583
+ 1. `pattern_detection.PatternDetector`์— `_is_my_pattern()` ๋ฉ”์„œ๋“œ ์ถ”๊ฐ€
584
+ 2. `classify()`์˜ ํŒ์ • ์šฐ์„ ์ˆœ์œ„์— ๋ผ์›Œ ๋„ฃ๊ธฐ
585
+ 3. `utils.PATTERN_COLORS`์— ์ƒ‰์ƒ ์ถ”๊ฐ€
586
+ 4. `contact_mapper`์˜ ๋งค์นญ ๊ทœ์น™ ํ™•์žฅ (ํ•„์š” ์‹œ)
587
+ 5. `mode_daily.excluded_patterns`์— ์ถ”๊ฐ€ (ํ•„์š” ์‹œ)
588
+
589
+ ### 11.4 ์ƒˆ ์„ค๋น„ ๋ถ€์œ„ ์ถ”๊ฐ€
590
+
591
+ 1. `contact_angle.csv`์— ํ–‰ ์ถ”๊ฐ€ (EQP_TYPE, Part, Contact_Angle, Defect_Direction)
592
+ 2. ๋ณ„๋„ ์ฝ”๋“œ ์ˆ˜์ • ์—†์ด ์ž๋™ ๋ฐ˜์˜๋จ
593
+
594
+ ---
595
+
596
+ ## ๋ถ€๋ก A. ํŒŒ์ผ ๋ชฉ๋ก
597
+
598
+ | ํŒŒ์ผ | ์ข…๋ฅ˜ | ์„ค๋ช… |
599
+ |------|------|------|
600
+ | `lls_config.json` | ์„ค์ • | ๋ถ„์„ ํŒŒ๋ผ๋ฏธํ„ฐ |
601
+ | `contact_angle.csv` | ๋ฐ์ดํ„ฐ | ์„ค๋น„ ์ ‘์ด‰ ๊ฐ๋„ ํ…Œ์ด๋ธ” |
602
+ | `utils.py` | ์ฝ”๋“œ | `WaferUtils` ํด๋ž˜์Šค |
603
+ | `pattern_detection.py` | ์ฝ”๋“œ | `PatternDetector` ํด๋ž˜์Šค |
604
+ | `contact_mapper.py` | ์ฝ”๋“œ | `ContactMapper` ํด๋ž˜์Šค |
605
+ | `pattern_analyzer.py` | ์ฝ”๋“œ | `LLSPatternAnalyzer` ํด๋ž˜์Šค (๋ฉ”์ธ) |
606
+ | `0.ingest_daily_LLS.py` | ์ฝ”๋“œ | Trino โ†’ parquet ์ˆ˜์ง‘ |
607
+ | `test_synthetic_7days.py` | ํ…Œ์ŠคํŠธ | ํ•ฉ์„ฑ ๋ฐ์ดํ„ฐ ๊ฒ€์ฆ |
608
+ | `demo_LLS_pattern_analysis.ipynb` | ๋ฌธ์„œ | ์‹ค์Šต ๋…ธํŠธ๋ถ |
609
+ | `README.md` | ๋ฌธ์„œ | ๋ณธ ๋ฌธ์„œ |
610
+
611
+ ## ๋ถ€๋ก B. ์ขŒํ‘œยท๊ฐ๋„ ๊ทœ์•ฝ ์ •๋ฆฌ
612
+
613
+ - **์ขŒํ‘œ ๋‹จ์œ„**: mm
614
+ - **์›จ์ดํผ ๋ฐ˜์ง€๋ฆ„**: 150 mm (0.0 ~ 150.0)
615
+ - **์›์ **: ์›จ์ดํผ ์ค‘์‹ฌ
616
+ - **๊ฐ๋„ (analyzer ๋‚ด๋ถ€)**: math ๋ฐ˜์‹œ๊ณ„, 0ยฐ=3์‹œ(+x), 90ยฐ=12์‹œ(+y)
617
+ - **๊ฐ๋„ (์‹œ๊ฐํ™” ๋ผ๋ฒจ)**: ์‹œ๊ณ„๋ฐฉํ–ฅ, 12์‹œ=0ยฐ, 1์‹œ=30ยฐ, ..., 11์‹œ=330ยฐ
618
+ - **๊ฐ๋„ (contact_angle.csv)**: math ๋ฐ˜์‹œ๊ณ„ (analyzer์™€ ๋™์ผ)
619
+ - **Zone ๋ผ๋ฒจ ํ˜•์‹**: `{Inner|Outer}_{์‹œ๊ณ„์œ„์น˜}` (์˜ˆ: `Inner_03` = 3์‹œ ๋ฐฉํ–ฅ ๋‚ด์ธก)
620
+
621
+ ---
622
+
623
+ *Last updated: 2026-05-12*
nu/contact_angle.csv ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EQP_TYPE,Part,Contact_Angle,Defect ๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ ๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ
2
+ EPI_REACTOR,FI FOUP,180,
3
+ EPI_REACTOR,FI FOUP,0,
4
+ EPI_REACTOR,FI Robot,255,Line
5
+ EPI_REACTOR,FI Robot,130,Line
6
+ EPI_REACTOR,FI Robot,50,Line
7
+ EPI_REACTOR,FI Robot,285,Line
8
+ EPI_REACTOR,LDLK Indexer Pin,205,Line
9
+ EPI_REACTOR,LDLK Indexer Pin,155,Line
10
+ EPI_REACTOR,LDLK Indexer Pin,25,Line
11
+ EPI_REACTOR,LDLK Indexer Pin,335,Line
12
+ EPI_REACTOR,T/C Blade,255,Line
13
+ EPI_REACTOR,T/C Blade,285,Line
14
+ EPI_REACTOR,T/C Blade,105,Line
15
+ EPI_REACTOR,T/C Blade,75,Line
16
+ Wafersight,Robot,125,Line
17
+ Wafersight,Robot,55,Line
18
+ Wafersight,Robot,270,Line
19
+ Wafersight,Robot,250,Line
20
+ Wafersight,Robot,290,Line
21
+ Wafersight,Aligner,194,
22
+ Wafersight,Aligner,167,
23
+ Wafersight,Aligner,13,
24
+ Wafersight,Aligner,346,
25
+ Wafersight,Buffer,195,Line
26
+ Wafersight,Buffer,90,Line
27
+ Wafersight,Buffer,345,Line
28
+ Wafersight,Pallet,225,Line
29
+ Wafersight,Pallet,135,Line
30
+ Wafersight,Pallet,0,Line
31
+ Wafersight2,Robot,119,Line
32
+ Wafersight2,Robot,64,Line
33
+ Wafersight2,Robot,270,Line
34
+ Wafersight2,Robot,250,Line
35
+ Wafersight2,Robot,290,Line
36
+ Wafersight2,Aligner,194,
37
+ Wafersight2,Aligner,167,
38
+ Wafersight2,Aligner,13,
39
+ Wafersight2,Aligner,346,
40
+ Wafersight2,Buffer,195,Line
41
+ Wafersight2,Buffer,90,Line
42
+ Wafersight2,Buffer,345,Line
43
+ Wafersight2,Pallet,225,Line
44
+ Wafersight2,Pallet,135,Line
45
+ Wafersight2,Pallet,0,Line
46
+ AVIS,Robot,105,Line
47
+ AVIS,Robot,75,Line
48
+ AVIS,Robot,270,Line
49
+ AVIS,Robot,255,Line
50
+ AVIS,Robot,285,Line
51
+ AVIS,Flipper,197,Line
52
+ AVIS,Flipper,158,Line
53
+ AVIS,Flipper,19,Line
54
+ AVIS,Flipper,339,Line
55
+ AVIS,Inspection,234,Line
56
+ AVIS,Inspection,226,Line
57
+ AVIS,Inspection,131,Line
58
+ AVIS,Inspection,124,Line
59
+ AVIS,Inspection,54,Line
60
+ AVIS,Inspection,45,Line
61
+ AVIS,Inspection,319,Line
62
+ AVIS,Inspection,313,Line
63
+ EBIS,Robot,261,Line
64
+ EBIS,Robot,248,Line
65
+ EBIS,Robot,235,Line
66
+ EBIS,Robot,131,Line
67
+ EBIS,Robot,41,Line
68
+ EBIS,Robot,305,Line
69
+ EBIS,Robot,292,Line
70
+ EBIS,Robot,278,Line
71
+ EBIS,Edge Stage,240,
72
+ EBIS,Edge Stage,210,
73
+ EBIS,Edge Stage,180,
74
+ EBIS,Edge Stage,120,
75
+ EBIS,Edge Stage,90,
76
+ EBIS,Edge Stage,60,
77
+ EBIS,Edge Stage,0,
78
+ EBIS,Edge Stage,330,
79
+ EBIS,Edge Stage,240,
80
+ EBIS,Back Stage,209,
81
+ EBIS,Back Stage,90,
82
+ EBIS,Back Stage,329,
83
+ SP3,Robot,270,Line
84
+ SP3,Robot,251,Line
85
+ SP3,Robot,119,Line
86
+ SP3,Robot,64,Line
87
+ SP3,Robot,291,Line
88
+ SP3,Aligner,227,Line
89
+ SP3,Aligner,211,Line
90
+ SP3,Aligner,157,Line
91
+ SP3,Aligner,151,Line
92
+ SP3,Aligner,107,Line
93
+ SP3,Aligner,47,Line
94
+ SP3,Aligner,31,Line
95
+ SP3,Aligner,347,Line
96
+ SP3,Aligner,331,Line
97
+ SP3,Aligner,287,Line
98
+ SP3,Chuck,232,Line
99
+ SP3,Chuck,193,Line
100
+ SP3,Chuck,112,Line
101
+ SP3,Chuck,73,Line
102
+ SP3,Chuck,352,Line
103
+ SP3,Chuck,313,Line
nu/contact_mapper.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # contact_mapper.py
2
+ """
3
+ LLS ํŒจํ„ด(centroid + ํŒจํ„ด ํƒ€์ž…) โ†’ ์„ค๋น„ ๋ถ€์œ„ ํ›„๋ณด ๋งคํ•‘.
4
+
5
+ ๊ฒ€์ถœ๋œ ํŒจํ„ด์˜ ๋ฐœ์ƒ ์œ„์น˜๋ฅผ `contact_angle.csv` (๊ฐ ์„ค๋น„/Part์˜ ์ ‘์ด‰ ๊ฐ๋„ ํ…Œ์ด๋ธ”)์—
6
+ ์กฐํšŒํ•˜์—ฌ, ๊ฒฐํ•จ์„ ์œ ๋ฐœํ–ˆ์„ ๊ฐ€๋Šฅ์„ฑ์ด ์žˆ๋Š” ์„ค๋น„ ๋ถ€์œ„ ํ›„๋ณด ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
7
+
8
+ ์ขŒํ‘œ๊ณ„ / ๊ฐ๋„ ๊ทœ์•ฝ
9
+ ------------------
10
+ - ๋ชจ๋“  ๊ฐ๋„๋Š” **์ˆ˜ํ•™(atan2) ๋ฐ˜์‹œ๊ณ„ ๊ธฐ์ค€**, 0ยฐ=3์‹œ ๋ฐฉํ–ฅ(+x).
11
+ - `Contact_Angle ฮธ`๋ฅผ ์›จ์ดํผ edge (r=150mm) ์ขŒํ‘œ๋กœ ๋ณ€ํ™˜:
12
+ P_contact = (150ยทcos ฮธ, 150ยทsin ฮธ)
13
+
14
+ ํŒจํ„ด๋ณ„ ๋งค์นญ ๊ทœ์น™
15
+ ----------------
16
+ +--------+--------------------+-------------------------------------------+
17
+ | ํŒจํ„ด | ํ›„๋ณด ํ–‰ (CSV) | ๊ฑฐ๋ฆฌ ์ •์˜ |
18
+ +========+====================+===========================================+
19
+ | ์„ ํ˜• | Defect_Direction | |P.x - centroid_x| (Y์ถ• ํ‰ํ–‰์„ ๊ณผ P์˜ ์ˆ˜ํ‰๊ฑฐ๋ฆฌ) |
20
+ | | == "Line" ๋งŒ | |
21
+ +--------+--------------------+-------------------------------------------+
22
+ | ๊ตฐ์ง‘ | Defect_Direction | โˆš((P.x-cx)ยฒ + (P.y-cy)ยฒ) (์œ ํด๋ฆฌ๋“œ) |
23
+ | | != "Line" ๋งŒ | |
24
+ +--------+--------------------+-------------------------------------------+
25
+ | ํ™˜ํ˜• | Defect_Direction | ๊ฑฐ๋ฆฌ ๊ณ„์‚ฐ ์•ˆ ํ•จ (๊ฐ๋„ ๋ฌด๊ด€, ์ „์ฒด ๋ฐ˜ํ™˜) |
26
+ | | != "Line" ๋งŒ | |
27
+ +--------+--------------------+-------------------------------------------+
28
+
29
+ CSV ํ˜•์‹ (`contact_angle.csv`)
30
+ ------------------------------
31
+ 1์—ด: EQP_TYPE (์˜ˆ: EPI_REACTOR, EBIS, AVIS, SP3, Wafersight...)
32
+ 2์—ด: Part (์˜ˆ: FI Robot, Aligner, Chuck, Edge Stage...)
33
+ 3์—ด: Contact_Angle (0~360ยฐ, math ๊ธฐ์ค€)
34
+ 4์—ด: Defect_Direction (Line | ๊ณต๋ž€)
35
+
36
+ * ์›๋ณธ CSV์˜ 4์—ด ํ—ค๋”๊ฐ€ ์ธ์ฝ”๋”ฉ ๊นจ์ง(์˜ˆ: "Defect ๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ ๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ") ๊ฐ€๋Šฅํ•˜๋ฏ€๋กœ
37
+ ``_load_contact_table()``์—์„œ ์ปฌ๋Ÿผ ์œ„์น˜ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฐ•์ œ renameํ•œ๋‹ค.
38
+ """
39
+ from __future__ import annotations
40
+
41
+ import os
42
+ from typing import Optional
43
+
44
+ import numpy as np
45
+ import pandas as pd
46
+
47
+
48
+ # ----------------------------------------------------------------------
49
+ # ์ƒ์ˆ˜
50
+ # ----------------------------------------------------------------------
51
+ # ์›จ์ดํผ ๋ฐ˜์ง€๋ฆ„ (mm). edge contact point ์ขŒํ‘œ ์‚ฐ์ถœ ์‹œ ์‚ฌ์šฉ.
52
+ WAFER_RADIUS_MM = 150.0
53
+
54
+ # CSV 4๋ฒˆ์งธ ์ปฌ๋Ÿผ (์›๋ณธ ํ—ค๋”๊ฐ€ ๊นจ์งˆ ์ˆ˜ ์žˆ์–ด ๋‚ด๋ถ€์—์„œ ๊ฐ•์ œ ์‚ฌ์šฉํ•  ๋ณ„์นญ).
55
+ CONTACT_COL_DIRECTION = "Defect_Direction"
56
+
57
+
58
+ # ----------------------------------------------------------------------
59
+ # ๋‚ด๋ถ€ ํ—ฌํผ
60
+ # ----------------------------------------------------------------------
61
+ def _load_contact_table(csv_path: str) -> pd.DataFrame:
62
+ """
63
+ contact_angle.csv ๋กœ๋“œ ๋ฐ ์ •๊ทœํ™”.
64
+
65
+ - ์ปฌ๋Ÿผ๋ช…์€ **์œ„์น˜ ๊ธฐ๋ฐ˜**์œผ๋กœ ๊ฐ•์ œ rename โ†’ EQP_TYPE / Part / Contact_Angle /
66
+ Defect_Direction (์›๋ณธ ํ—ค๋” ๊นจ์ง ๋Œ€์‘).
67
+ - Contact_Angle์„ numeric์œผ๋กœ ๋ณ€ํ™˜, ๊ฒฐ์ธก ํ–‰ ์ œ๊ฑฐ.
68
+ - Defect_Direction์€ NaN โ†’ "" ๋ณ€ํ™˜ + ์–‘๋ ๊ณต๋ฐฑ strip.
69
+ """
70
+ df = pd.read_csv(csv_path, encoding="utf-8", header=0)
71
+ cols = list(df.columns)
72
+ rename_map = {
73
+ cols[0]: "EQP_TYPE",
74
+ cols[1]: "Part",
75
+ cols[2]: "Contact_Angle",
76
+ cols[3]: CONTACT_COL_DIRECTION,
77
+ }
78
+ df = df.rename(columns=rename_map)
79
+ df["Contact_Angle"] = pd.to_numeric(df["Contact_Angle"], errors="coerce")
80
+ df = df.dropna(subset=["Contact_Angle"]).reset_index(drop=True)
81
+ df[CONTACT_COL_DIRECTION] = (
82
+ df[CONTACT_COL_DIRECTION].fillna("").astype(str).str.strip()
83
+ )
84
+ return df
85
+
86
+
87
+ # ======================================================================
88
+ # ContactMapper
89
+ # ======================================================================
90
+ class ContactMapper:
91
+ """
92
+ ์„ค๋น„ ์ ‘์ด‰ ๊ฐ๋„ ํ…Œ์ด๋ธ”์„ ํ™œ์šฉํ•œ ํŒจํ„ด-์„ค๋น„ ๋งคํ•‘๊ธฐ.
93
+
94
+ Parameters
95
+ ----------
96
+ csv_path : str
97
+ ``contact_angle.csv`` ๊ฒฝ๋กœ.
98
+ tolerance_mm : float
99
+ Edge contact point๋กœ๋ถ€ํ„ฐ์˜ ๋งค์นญ ํ—ˆ์šฉ ๊ฑฐ๋ฆฌ (mm ๋‹จ์œ„ grid).
100
+ ์„ ํ˜•: x์ถ• ๊ฑฐ๋ฆฌ, ๊ตฐ์ง‘: 2D ์œ ํด๋ฆฌ๋“œ ๊ฑฐ๋ฆฌ์— ์ ์šฉ.
101
+
102
+ Attributes
103
+ ----------
104
+ df : pd.DataFrame
105
+ ์ •๊ทœํ™”๋œ contact ํ…Œ์ด๋ธ” (`contact_x`, `contact_y` ์ปฌ๋Ÿผ ํฌํ•จ).
106
+
107
+ Examples
108
+ --------
109
+ >>> cm = ContactMapper("./contact_angle.csv", tolerance_mm=30.0)
110
+ >>> cm.map_pattern("์„ ํ˜•", centroid_x=38.0, centroid_y=0.0)
111
+ >>> cm.map_pattern("๊ตฐ์ง‘", centroid_x=130, centroid_y=0)
112
+ >>> cm.map_pattern("ํ™˜ํ˜•")
113
+ """
114
+
115
+ def __init__(self, csv_path: str, tolerance_mm: float = 30.0):
116
+ if not os.path.exists(csv_path):
117
+ raise FileNotFoundError(f"contact CSV ์—†์Œ: {csv_path}")
118
+ self.tolerance_mm = tolerance_mm
119
+ self.df = _load_contact_table(csv_path)
120
+
121
+ # edge contact point ์ขŒํ‘œ ์‚ฌ์ „ ๊ณ„์‚ฐ (math ๋ฐ˜์‹œ๊ณ„ ๊ธฐ์ค€)
122
+ theta_rad = np.deg2rad(self.df["Contact_Angle"].values)
123
+ self.df["contact_x"] = WAFER_RADIUS_MM * np.cos(theta_rad)
124
+ self.df["contact_y"] = WAFER_RADIUS_MM * np.sin(theta_rad)
125
+
126
+ # ------------------------------------------------------------------
127
+ # ๋‚ด๋ถ€ ํ–‰ ํ•„ํ„ฐ
128
+ # ------------------------------------------------------------------
129
+ def _line_rows(self) -> pd.DataFrame:
130
+ """Defect_Direction == 'Line' ํ–‰๋งŒ."""
131
+ return self.df[self.df[CONTACT_COL_DIRECTION].str.lower() == "line"].copy()
132
+
133
+ def _non_line_rows(self) -> pd.DataFrame:
134
+ """Defect_Direction != 'Line' ํ–‰๋งŒ (๊ณต๋ž€ ๋“ฑ)."""
135
+ return self.df[self.df[CONTACT_COL_DIRECTION].str.lower() != "line"].copy()
136
+
137
+ def _all_rows(self) -> pd.DataFrame:
138
+ """์ „์ฒด ํ–‰ (ํ˜„์žฌ ๋ฏธ์‚ฌ์šฉ, ํ™•์žฅ์šฉ)."""
139
+ return self.df.copy()
140
+
141
+ # ------------------------------------------------------------------
142
+ # ํŒจํ„ด๋ณ„ ๋งค์นญ ๋ฉ”์„œ๋“œ
143
+ # ------------------------------------------------------------------
144
+ def match_linear(self, centroid_x: float) -> pd.DataFrame:
145
+ """
146
+ ์„ ํ˜• ํŒจํ„ด ๋งค์นญ.
147
+
148
+ ํ•ด์„: ๊ฒฐํ•จ์ด (centroid_x, *)๋ฅผ ์ง€๋‚˜๋Š” **Y์ถ• ํ‰ํ–‰์„ ** ํ˜•ํƒœ.
149
+ ๊ฑฐ๋ฆฌ: ``|P.x - centroid_x|`` (= |150ยทcos ฮธ - centroid_x|)
150
+
151
+ Parameters
152
+ ----------
153
+ centroid_x : float
154
+ ํŒจํ„ด centroid์˜ x์ขŒํ‘œ.
155
+
156
+ Returns
157
+ -------
158
+ pd.DataFrame
159
+ tolerance ํ†ต๊ณผ ํ–‰๋งŒ, distance_mm ์˜ค๋ฆ„์ฐจ์ˆœ.
160
+ """
161
+ rows = self._line_rows()
162
+ if rows.empty:
163
+ return rows
164
+ rows = rows.assign(distance_mm=np.abs(rows["contact_x"] - centroid_x))
165
+ return rows[rows["distance_mm"] <= self.tolerance_mm].sort_values("distance_mm")
166
+
167
+ def match_cluster(self, centroid_x: float, centroid_y: float) -> pd.DataFrame:
168
+ """
169
+ ๊ตฐ์ง‘ ํŒจํ„ด ๋งค์นญ.
170
+
171
+ ํ•ด์„: ๊ฒฐํ•จ์ด (cx, cy) ํ•œ ์ ์— ์‘์ง‘.
172
+ ๊ฑฐ๋ฆฌ: ``โˆš((P.x-cx)ยฒ + (P.y-cy)ยฒ)`` (2D ์œ ํด๋ฆฌ๋“œ)
173
+
174
+ Note: ๊ตฐ์ง‘์€ Line์ด ์•„๋‹Œ ํ–‰๋งŒ (์„ ํ˜• ์ „์šฉ ์ ‘์ด‰๋ถ€ ์ œ์™ธ).
175
+ """
176
+ rows = self._non_line_rows()
177
+ rows = rows.assign(
178
+ distance_mm=np.sqrt(
179
+ (rows["contact_x"] - centroid_x) ** 2
180
+ + (rows["contact_y"] - centroid_y) ** 2
181
+ )
182
+ )
183
+ return rows[rows["distance_mm"] <= self.tolerance_mm].sort_values("distance_mm")
184
+
185
+ def match_ring(self) -> pd.DataFrame:
186
+ """
187
+ ํ™˜ํ˜• ํŒจํ„ด ๋งค์นญ.
188
+
189
+ ํ•ด์„: ๊ฒฐํ•จ์ด ์ „ ๊ฐ๋„์— ๋ถ„ํฌ โ†’ ํŠน์ • ์œ„์น˜ ๋งค์นญ ๋ถˆ๊ฐ€.
190
+ ๋ฐ˜ํ™˜: Line ํ‘œ์‹œ ์—†๋Š” ์ „์ฒด ํ–‰ (๊ฐ๋„ ๋ฌด๊ด€). distance_mm๋Š” NaN.
191
+ """
192
+ return self._non_line_rows().assign(distance_mm=np.nan)
193
+
194
+ # ------------------------------------------------------------------
195
+ # ํ†ตํ•ฉ ๋””์ŠคํŒจ์ฒ˜
196
+ # ------------------------------------------------------------------
197
+ def map_pattern(
198
+ self,
199
+ pattern: str,
200
+ centroid_x: Optional[float] = None,
201
+ centroid_y: Optional[float] = None,
202
+ ) -> pd.DataFrame:
203
+ """
204
+ ํŒจํ„ด๋ช…์— ๋”ฐ๋ผ ์ ์ ˆํ•œ ๋งค์นญ ๋ฉ”์„œ๋“œ ํ˜ธ์ถœ.
205
+
206
+ Parameters
207
+ ----------
208
+ pattern : str
209
+ "ํ™˜ํ˜•" / "์„ ํ˜•" / "๊ตฐ์ง‘" (์‰ผํ‘œ ๊ฒฐํ•ฉ ๋ฌธ์ž์—ด๋„ ํ—ˆ์šฉ โ€” ์ฒซ ํ† ํฐ๋งŒ ์‚ฌ์šฉ).
210
+ centroid_x, centroid_y : float, optional
211
+ ํ™˜ํ˜•์ด ์•„๋‹Œ ๊ฒฝ์šฐ ํ•„์ˆ˜.
212
+
213
+ Returns
214
+ -------
215
+ pd.DataFrame
216
+ ์ปฌ๋Ÿผ: EQP_TYPE, Part, Contact_Angle, Defect_Direction,
217
+ contact_x, contact_y, distance_mm.
218
+ ๋งค์นญ ์—†์œผ๋ฉด ๋นˆ DF.
219
+ """
220
+ if pattern is None:
221
+ return pd.DataFrame()
222
+ pat = (pattern.split(",")[0].strip()
223
+ if isinstance(pattern, str) else str(pattern))
224
+
225
+ if pat == "ํ™˜ํ˜•":
226
+ return self.match_ring()
227
+ if centroid_x is None or centroid_y is None:
228
+ return pd.DataFrame()
229
+ if pat == "์„ ํ˜•":
230
+ return self.match_linear(centroid_x)
231
+ if pat == "๊ตฐ์ง‘":
232
+ return self.match_cluster(centroid_x, centroid_y)
233
+ return pd.DataFrame()
234
+
235
+ # ------------------------------------------------------------------
236
+ # ์š”์•ฝ ํ—ฌํผ
237
+ # ------------------------------------------------------------------
238
+ def summarize_candidates(
239
+ self, candidates: pd.DataFrame, top_n: int = 5
240
+ ) -> str:
241
+ """
242
+ ํ›„๋ณด DF๋ฅผ "EQP_TYPE:Part | ..." ํ˜•์‹์˜ ์š”์•ฝ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜.
243
+
244
+ Parameters
245
+ ----------
246
+ candidates : pd.DataFrame
247
+ ``map_pattern()`` ๋ฐ˜ํ™˜ DF (์ด๋ฏธ distance_mm ์˜ค๋ฆ„์ฐจ์ˆœ).
248
+ top_n : int
249
+ ์ƒ์œ„ ๋ช‡ ๊ฐœ๋ฅผ ํฌํ•จํ• ์ง€.
250
+ """
251
+ if candidates.empty:
252
+ return ""
253
+ head = candidates.head(top_n)
254
+ return " | ".join(f"{r['EQP_TYPE']}:{r['Part']}" for _, r in head.iterrows())
nu/demo_LLS_pattern_analysis.ipynb ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# LLS ๊ฒฐํ•จ ํŒจํ„ด ๋ถ„์„ ์‹ค์Šต ๋…ธํŠธ๋ถ\n",
8
+ "\n",
9
+ "์ด ๋…ธํŠธ๋ถ์€ `LLSPatternAnalyzer`, `PatternDetector`, `ContactMapper`, `WaferUtils` 4๊ฐœ ํด๋ž˜์Šค ์‚ฌ์šฉ๋ฒ•์„ ๋‹จ๊ณ„๋ณ„๋กœ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค.\n",
10
+ "\n",
11
+ "## ๋ชฉ์ฐจ\n",
12
+ "1. ํ™˜๊ฒฝ ์ค€๋น„ ๋ฐ import\n",
13
+ "2. ํ•ฉ์„ฑ ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ (7์ผ์น˜)\n",
14
+ "3. `LLSPatternAnalyzer` ์ดˆ๊ธฐํ™”\n",
15
+ "4. Mode 1 (daily aggregated) ์‹คํ–‰\n",
16
+ "5. Mode 2 (by_cst) ์‹คํ–‰\n",
17
+ "6. ๊ฒฐ๊ณผ CSV ์กฐํšŒ\n",
18
+ "7. ์‹œ๊ฐํ™” ์ด๋ฏธ์ง€ ํ‘œ์‹œ\n",
19
+ "8. `PatternDetector` ๋‹จ๋… ์‚ฌ์šฉ\n",
20
+ "9. `ContactMapper` ๋‹จ๋… ์‚ฌ์šฉ\n",
21
+ "10. `WaferUtils` ์œ ํ‹ธ ๋‹จ๋… ํ™œ์šฉ"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "markdown",
26
+ "metadata": {},
27
+ "source": [
28
+ "## 1. ํ™˜๊ฒฝ ์ค€๋น„ ๋ฐ import\n",
29
+ "\n",
30
+ "ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ„ฐ๋ฆฌ์— `utils.py`, `pattern_detection.py`, `contact_mapper.py`, `pattern_analyzer.py`, `lls_config.json`, `contact_angle.csv` ๊ฐ€ ์žˆ์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค."
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "import os\n",
40
+ "import sys\n",
41
+ "import shutil\n",
42
+ "import numpy as np\n",
43
+ "import pandas as pd\n",
44
+ "import matplotlib.pyplot as plt\n",
45
+ "from IPython.display import Image, display\n",
46
+ "\n",
47
+ "# ์ž‘์—… ๋””๋ ‰ํ„ฐ๋ฆฌ: ์ด ๋…ธํŠธ๋ถ์ด ์œ„์น˜ํ•œ ํด๋”(scr)๋กœ ์ด๋™\n",
48
+ "if not os.path.exists('./lls_config.json'):\n",
49
+ " os.chdir(os.path.dirname(os.path.abspath('demo_LLS_pattern_analysis.ipynb')))\n",
50
+ "sys.path.append(os.getcwd())\n",
51
+ "\n",
52
+ "from utils import WaferUtils\n",
53
+ "from pattern_detection import PatternDetector, classify_wafer_patterns\n",
54
+ "from contact_mapper import ContactMapper\n",
55
+ "from pattern_analyzer import LLSPatternAnalyzer\n",
56
+ "\n",
57
+ "print('โœ… ๋ชจ๋“ˆ import ์™„๋ฃŒ')\n",
58
+ "print(' cwd =', os.getcwd())"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "metadata": {},
64
+ "source": [
65
+ "## 2. ํ•ฉ์„ฑ ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ (7์ผ์น˜)\n",
66
+ "\n",
67
+ "์‹ค์ œ Trino DB ์—ฐ๊ฒฐ ์—†์ด๋„ ๋ถ„์„๊ธฐ๋ฅผ ์‹œ์—ฐํ•˜๊ธฐ ์œ„ํ•ด ์˜๋„์  ํŒจํ„ด์„ ๊ฐ€์ง„ 7์ผ์น˜ parquet์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.\n",
68
+ "๊ฐ ์ผ์ž๋งˆ๋‹ค ๋‹ค๋ฅธ contact angle๊ณผ ๋งค์นญ๋˜๋„๋ก ์ขŒํ‘œ๋ฅผ ๋ฐฐ์น˜ํ•ฉ๋‹ˆ๋‹ค."
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "# ์ž‘์—… ๋””๋ ‰ํ„ฐ๋ฆฌ ์ค€๋น„\n",
78
+ "WORK_ROOT = './_demo_run'\n",
79
+ "DAILY_DIR = os.path.join(WORK_ROOT, 'daily')\n",
80
+ "OUTPUT_DIR = os.path.join(WORK_ROOT, 'result')\n",
81
+ "if os.path.exists(WORK_ROOT):\n",
82
+ " shutil.rmtree(WORK_ROOT)\n",
83
+ "os.makedirs(DAILY_DIR, exist_ok=True)\n",
84
+ "\n",
85
+ "\n",
86
+ "def base_record(car_id, waf_id, dttm, eqp='EQP_A'):\n",
87
+ " return {\n",
88
+ " 'SUBLOT_ID': f'S{car_id}', 'CAR_ID': car_id, 'SCAN_TIME': dttm,\n",
89
+ " 'EQP_ID_8210': 'L01', 'EQP_ID_8030': eqp,\n",
90
+ " 'ROUTE_ID': 'WF7EP01', 'PROD_ID': 'P001',\n",
91
+ " 'HIS_REGIST_DTTM_8030': dttm, 'HIS_REGIST_DTTM_Pcounter': dttm,\n",
92
+ " 'WAF_ID': waf_id, 'DEFECT_CNT': 100, 'OPE_ID': '8210',\n",
93
+ " 'test_no': 1, 'roughbin_no': 0, 'd_size': 0.05,\n",
94
+ " 'LLS ๊ตฌ๋ถ„': 'LPD', 'Particle ๋ถ„ํฌ': '๋ฉด๋‚ด',\n",
95
+ " }\n",
96
+ "\n",
97
+ "\n",
98
+ "def make_ring(n=400, radius=85, noise=3.0):\n",
99
+ " th = np.random.uniform(0, 2*np.pi, n)\n",
100
+ " r = np.random.normal(radius, noise, n)\n",
101
+ " return r*np.cos(th), r*np.sin(th)\n",
102
+ "\n",
103
+ "\n",
104
+ "def make_vertical_line(n=300, x_center=38, y_range=(-60, 60), noise=3.0):\n",
105
+ " y = np.random.uniform(*y_range, n)\n",
106
+ " x = np.full(n, x_center) + np.random.normal(0, noise, n)\n",
107
+ " return x, y\n",
108
+ "\n",
109
+ "\n",
110
+ "def make_cluster(n=300, center=(130, 0), spread=10):\n",
111
+ " x = np.random.normal(center[0], spread, n)\n",
112
+ " y = np.random.normal(center[1], spread, n)\n",
113
+ " return x, y\n",
114
+ "\n",
115
+ "\n",
116
+ "def build_day(day_idx, date_str, pattern_specs, wafers_per_cst=5, n_csts=2):\n",
117
+ " rows = []\n",
118
+ " for cst_i in range(n_csts):\n",
119
+ " car_id = f'C{day_idx:02d}{cst_i}'\n",
120
+ " dttm = f'2026010{day_idx}120000'\n",
121
+ " px, py = pattern_specs[cst_i % len(pattern_specs)]\n",
122
+ " for w in range(wafers_per_cst):\n",
123
+ " waf_id = f'{car_id}W{w:02d}'\n",
124
+ " jx = px + np.random.normal(0, 1.5, len(px))\n",
125
+ " jy = py + np.random.normal(0, 1.5, len(py))\n",
126
+ " for x, y in zip(jx, jy):\n",
127
+ " rec = base_record(car_id, waf_id, dttm)\n",
128
+ " rec['coor_x'] = float(x); rec['coor_y'] = float(y)\n",
129
+ " rec['ANGLE'] = (np.arctan2(y, x)/np.pi*180 + 360) % 360\n",
130
+ " rec['DISTANCE'] = float(np.sqrt(x**2 + y**2))\n",
131
+ " rows.append(rec)\n",
132
+ " return pd.DataFrame(rows)\n",
133
+ "\n",
134
+ "\n",
135
+ "np.random.seed(42)\n",
136
+ "day_configs = {\n",
137
+ " '20260101': [make_ring(400, 85), make_ring(400, 85)], # ํ™˜ํ˜•\n",
138
+ " '20260102': [make_vertical_line(300, 38), make_vertical_line(300, 38)], # ์„ ํ˜• x=38\n",
139
+ " '20260103': [make_cluster(300, (135, 0), 8), make_cluster(300, (135, 0), 8)], # ๊ตฐ์ง‘ @ 3์‹œ edge\n",
140
+ " '20260104': [make_cluster(300, (0, 140), 8), make_cluster(300, (0, 140), 8)], # ๊ตฐ์ง‘ @ 12์‹œ edge\n",
141
+ " '20260105': [make_cluster(300, (-135, 0), 8), make_cluster(300, (-135, 0), 8)], # ๊ตฐ์ง‘ @ 9์‹œ edge\n",
142
+ " '20260106': [make_vertical_line(300, -141), make_vertical_line(300, -141)], # ์„ ํ˜• x=-141\n",
143
+ " '20260107': [make_cluster(300, (50, 50), 10), make_cluster(300, (50, 50), 10)], # ๊ตฐ์ง‘ (๋งค์นญ ์—†์Œ)\n",
144
+ "}\n",
145
+ "for i, (date, specs) in enumerate(day_configs.items(), start=1):\n",
146
+ " df = build_day(i, date, specs, wafers_per_cst=5, n_csts=2)\n",
147
+ " df.to_parquet(os.path.join(DAILY_DIR, f'{date}.parquet'), index=False)\n",
148
+ " print(f' ์ƒ์„ฑ {date}: {len(df):>5}๊ฑด, CST {df[\"CAR_ID\"].nunique()}, WAF {df[\"WAF_ID\"].nunique()}')"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "markdown",
153
+ "metadata": {},
154
+ "source": [
155
+ "## 3. `LLSPatternAnalyzer` ์ดˆ๊ธฐํ™”\n",
156
+ "\n",
157
+ "- `config_path` : ๋ถ„์„ ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ๋‹ด๊ธด JSON\n",
158
+ "- `daily_input_dir` : ์ผ์ž๋ณ„ parquet ํด๋”\n",
159
+ "- `output_dir` : ๊ฒฐ๊ณผ ์ €์žฅ ๋ฃจํŠธ\n",
160
+ "- `contact_csv` : contact ๋งคํ•‘ ํ™œ์„ฑํ™” (None์ด๋ฉด ๋น„ํ™œ์„ฑ)"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": null,
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "analyzer = LLSPatternAnalyzer(\n",
170
+ " config_path='./lls_config.json',\n",
171
+ " daily_input_dir=DAILY_DIR,\n",
172
+ " output_dir=OUTPUT_DIR,\n",
173
+ " contact_csv='./contact_angle.csv',\n",
174
+ " setup_font=True,\n",
175
+ ")\n",
176
+ "print('cell_size_mm =', analyzer.cell_size_mm)\n",
177
+ "print('n1_min_wafers =', analyzer.n1_min_wafers)\n",
178
+ "print('n2_min_cell_def =', analyzer.n2_min_cell_defects)\n",
179
+ "print('inner_radius_mm =', analyzer.inner_radius_mm)\n",
180
+ "print('contact_tol_mm =', analyzer.contact_tolerance_mm)"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "markdown",
185
+ "metadata": {},
186
+ "source": [
187
+ "## 4. Mode 1 (daily aggregated) ์‹คํ–‰\n",
188
+ "\n",
189
+ "ํ•˜๋ฃจ ์ „์ฒด ๊ฒฐํ•จ์„ 1ํšŒ ๋ถ„๋ฅ˜ + contact ๋งคํ•‘."
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": null,
195
+ "metadata": {},
196
+ "outputs": [],
197
+ "source": [
198
+ "df_daily = analyzer.run(mode='daily')\n",
199
+ "cols = ['analysis_date', 'overall_pattern', 'is_significant',\n",
200
+ " 'wafer_count', 'defect_count',\n",
201
+ " 'main_centroid_x', 'main_centroid_y',\n",
202
+ " 'contact_candidate_count', 'contact_candidates']\n",
203
+ "with pd.option_context('display.max_colwidth', 120, 'display.width', 180):\n",
204
+ " display(df_daily[cols])"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "markdown",
209
+ "metadata": {},
210
+ "source": [
211
+ "## 5. Mode 2 (by_cst) ์‹คํ–‰\n",
212
+ "\n",
213
+ "CAR_ID ร— HIS_REGIST_DTTM ๊ทธ๋ฃน๋ณ„ ์„ธ๋ฐ€ ๋ถ„๋ฅ˜."
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": null,
219
+ "metadata": {},
220
+ "outputs": [],
221
+ "source": [
222
+ "df_by_cst = analyzer.run(mode='by_cst')\n",
223
+ "cols = ['analysis_date', 'CST_ID', 'overall_pattern',\n",
224
+ " 'main_centroid_x', 'main_centroid_y',\n",
225
+ " 'contact_candidate_count', 'contact_candidates']\n",
226
+ "with pd.option_context('display.max_colwidth', 120, 'display.width', 180):\n",
227
+ " display(df_by_cst[cols])"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "metadata": {},
233
+ "source": [
234
+ "## 6. ๊ฒฐ๊ณผ CSV/parquet ํŒŒ์ผ ํ™•์ธ"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": [
243
+ "for root, _, files in os.walk(OUTPUT_DIR):\n",
244
+ " rel = os.path.relpath(root, OUTPUT_DIR)\n",
245
+ " for f in sorted(files):\n",
246
+ " print(f' {rel}/{f}')"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "markdown",
251
+ "metadata": {},
252
+ "source": [
253
+ "## 7. ์‹œ๊ฐํ™” ์ด๋ฏธ์ง€ ํ‘œ์‹œ\n",
254
+ "\n",
255
+ "Mode 1 ์ผ์ž๋ณ„ ํ†ตํ•ฉ ์ด๋ฏธ์ง€๋ฅผ ๋ชจ๋‘ ํ‘œ์‹œ."
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": null,
261
+ "metadata": {},
262
+ "outputs": [],
263
+ "source": [
264
+ "sig_dir = os.path.join(OUTPUT_DIR, 'figures_daily', 'significant')\n",
265
+ "imgs = sorted(f for f in os.listdir(sig_dir) if f.endswith('.jpg'))\n",
266
+ "for f in imgs:\n",
267
+ " print(f)\n",
268
+ " display(Image(filename=os.path.join(sig_dir, f), width=500))"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "markdown",
273
+ "metadata": {},
274
+ "source": [
275
+ "## 8. `PatternDetector` ๋‹จ๋… ์‚ฌ์šฉ\n",
276
+ "\n",
277
+ "๋ถ„์„๊ธฐ ์—†์ด ๋‹จ์ผ wafer ๊ทธ๋ฃน์„ ์ง์ ‘ ๋ถ„๋ฅ˜ํ•ด๋ณด๊ธฐ."
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "metadata": {},
284
+ "outputs": [],
285
+ "source": [
286
+ "config = WaferUtils.load_config('./lls_config.json')\n",
287
+ "detector = PatternDetector(config)\n",
288
+ "\n",
289
+ "# ํ•˜๋ฃจ์น˜ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ํ›„ ํ•œ CAR_ID๋งŒ ์‚ฌ์šฉ\n",
290
+ "sample_path = os.path.join(DAILY_DIR, '20260103.parquet')\n",
291
+ "df_sample = pd.read_parquet(sample_path)\n",
292
+ "df_sample = df_sample[df_sample['CAR_ID'] == df_sample['CAR_ID'].iloc[0]]\n",
293
+ "\n",
294
+ "result_df, dominant_zone, pattern_list, centroid = detector.classify(df_sample)\n",
295
+ "print('ํŒจํ„ด :', pattern_list)\n",
296
+ "print('Dominant :', dominant_zone)\n",
297
+ "print('Centroid :', centroid)\n",
298
+ "print('Inlier ์ˆ˜ :', int(result_df['inlier'].sum()))"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "markdown",
303
+ "metadata": {},
304
+ "source": [
305
+ "## 9. `ContactMapper` ๋‹จ๋… ์‚ฌ์šฉ\n",
306
+ "\n",
307
+ "์ž„์˜ centroid ์ขŒํ‘œ๋ฅผ ์ž…๋ ฅํ•ด์„œ ๊ฐ€๋Šฅํ•œ ์„ค๋น„ ๋ถ€์œ„ ํ›„๋ณด ์กฐํšŒ."
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": null,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "cm = ContactMapper('./contact_angle.csv', tolerance_mm=30.0)\n",
317
+ "\n",
318
+ "# ๊ตฐ์ง‘ ํŒจํ„ด: (130, 0) ์— ๊ฐ€๊นŒ์šด ๋น„-Line ํ›„๋ณด๋“ค\n",
319
+ "print('โ–ถ ๊ตฐ์ง‘ ๋งค์นญ โ€” centroid=(130, 0)')\n",
320
+ "display(cm.match_cluster(130, 0))\n",
321
+ "\n",
322
+ "# ์„ ํ˜• ํŒจํ„ด: x=38 vertical line\n",
323
+ "print('โ–ถ ์„ ํ˜• ๋งค์นญ โ€” centroid_x=38')\n",
324
+ "display(cm.match_linear(38).head(10))\n",
325
+ "\n",
326
+ "# ํ™˜ํ˜• ํŒจํ„ด: ๊ฐ๋„ ๋ฌด๊ด€ ์ „์ฒด\n",
327
+ "print('โ–ถ ํ™˜ํ˜• ๋งค์นญ (๊ฐ๋„ ๋ฌด๊ด€)')\n",
328
+ "display(cm.match_ring().head(10))"
329
+ ]
330
+ },
331
+ {
332
+ "cell_type": "markdown",
333
+ "metadata": {},
334
+ "source": [
335
+ "## 10. `WaferUtils` ์œ ํ‹ธ ๋‹จ๋… ํ™œ์šฉ\n",
336
+ "\n",
337
+ "ํŒจํ„ด ๋ถ„๋ฅ˜ ์—†์ด ์ „์ฒ˜๋ฆฌ/์‹œ๊ฐํ™”๋งŒ ์ˆ˜ํ–‰."
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": null,
343
+ "metadata": {},
344
+ "outputs": [],
345
+ "source": [
346
+ "df_raw = pd.read_parquet(os.path.join(DAILY_DIR, '20260101.parquet'))\n",
347
+ "\n",
348
+ "# Fine-grid ํ• ๋‹น + Zone ๋ผ๋ฒจ๋ง\n",
349
+ "df_grid = WaferUtils.assign_fine_grid(df_raw, cell_size_mm=3.0)\n",
350
+ "df_zone = WaferUtils.add_zone_labels(df_grid, inner_radius=105.0)\n",
351
+ "\n",
352
+ "print('์ด ๊ฒฐํ•จ :', len(df_zone))\n",
353
+ "print('Unique cells :', df_zone['cell_id'].nunique())\n",
354
+ "print('Zone ๋ถ„ํฌ :')\n",
355
+ "display(df_zone['zone_label'].value_counts().head())\n",
356
+ "\n",
357
+ "# n1 ํ•„ํ„ฐ\n",
358
+ "df_filtered = WaferUtils.filter_by_cell_wafer_count(df_zone, n1_min_wafer=2, cell_size_mm=3.0)\n",
359
+ "summary = WaferUtils.summarize_filtering_result(df_zone, df_filtered)\n",
360
+ "print('\\nํ•„ํ„ฐ๋ง ์š”์•ฝ:', summary)\n",
361
+ "\n",
362
+ "# ์‹œ๊ฐํ™” (์ง์ ‘ ํ˜ธ์ถœ)\n",
363
+ "save_path = os.path.join(WORK_ROOT, 'manual_plot.jpg')\n",
364
+ "WaferUtils.plot_wafer_map(\n",
365
+ " result_df=df_filtered.assign(inlier=True),\n",
366
+ " key='manual_demo',\n",
367
+ " pattern_list=['ํ™˜ํ˜•'],\n",
368
+ " dominant_zone='Inner_03',\n",
369
+ " meta={'wafer_count': df_filtered['WAF_ID'].nunique(),\n",
370
+ " 'EQP_NM_8030': 'EQP_A',\n",
371
+ " 'main_centroid_x': 0, 'main_centroid_y': 0},\n",
372
+ " save_path=save_path,\n",
373
+ ")\n",
374
+ "display(Image(filename=save_path, width=500))"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "markdown",
379
+ "metadata": {},
380
+ "source": [
381
+ "---\n",
382
+ "## ์š”์•ฝ\n",
383
+ "\n",
384
+ "| ํด๋ž˜์Šค | ์—ญํ•  |\n",
385
+ "|--------|------|\n",
386
+ "| `WaferUtils` | ํ•œ๊ธ€ ํฐํŠธ, config ๋กœ๋“œ, fine-grid/zone ๋ผ๋ฒจ๋ง, n1 ํ•„ํ„ฐ, ์‹œ๊ฐํ™” |\n",
387
+ "| `PatternDetector` | HDBSCAN + LOF ๊ธฐ๋ฐ˜ ํ™˜ํ˜•/์„ ํ˜•/๊ตฐ์ง‘ ๋ถ„๋ฅ˜ |\n",
388
+ "| `ContactMapper` | centroid ์ขŒํ‘œ โ†’ contact_angle.csv์˜ ์„ค๋น„ ๋ถ€์œ„ ๋งคํ•‘ |\n",
389
+ "| `LLSPatternAnalyzer` | ์œ„ 3๊ฐœ ํด๋ž˜์Šค๋ฅผ ์กฐํ•ฉํ•œ ์ผ์ž๋ณ„ ์ผ๊ด„ ์ฒ˜๋ฆฌ (Mode 1/2) |\n",
390
+ "\n",
391
+ "ํ”„๋กœ๋•์…˜ ์šด์˜ ์‹œ:\n",
392
+ "1. `0.ingest_daily_LLS.py` ๋กœ Trino โ†’ `./daily/*.parquet` ์ˆ˜์ง‘ \n",
393
+ "2. `python pattern_analyzer.py daily` ๋˜๋Š” `by_cst` ๋กœ ๋ถ„์„ \n",
394
+ "3. ๊ฒฐ๊ณผ CSV/์ด๋ฏธ์ง€ ๊ฒ€ํ† "
395
+ ]
396
+ }
397
+ ],
398
+ "metadata": {
399
+ "kernelspec": {
400
+ "display_name": "Python 3",
401
+ "language": "python",
402
+ "name": "python3"
403
+ },
404
+ "language_info": {
405
+ "name": "python",
406
+ "version": "3.9"
407
+ }
408
+ },
409
+ "nbformat": 4,
410
+ "nbformat_minor": 4
411
+ }
nu/lls_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment": {
3
+ "name": "LLS_CST_Pattern_Analysis",
4
+ "version": "2026.04.09",
5
+ "description": "CST ๋‹จ์œ„ LLS ๊ฒฐํ•จ ๋งต ์ž๋™ ๋ถ„๋ฅ˜ (ํ™˜ํ˜•/์„ ํ˜•/๊ตฐ์ง‘)"
6
+ },
7
+ "preprocessing": {
8
+ "d_size_min": 0.037,
9
+ "inner_radius_mm": 105.0,
10
+ "cell_size_mm": 3.0,
11
+ "n1_min_wafers": 2,
12
+ "n2_min_zone_defects": 3
13
+ },
14
+ "clustering": {
15
+ "min_cluster_size": 3,
16
+ "min_samples": 2,
17
+ "cluster_selection_method": "leaf",
18
+ "dbscan_eps": 20.0,
19
+ "cluster_dbscan_eps": 15.0
20
+ },
21
+ "lof": {
22
+ "lof_min_points": 5,
23
+ "lof_n_neighbors": 13,
24
+ "lof_contamination": 0.07
25
+ },
26
+ "cluster": {
27
+ "cluster_compactness_radius": 25.0
28
+ },
29
+ "linear": {
30
+ "linear_pca_ratio_min": 2.5,
31
+ "linear_max_deviation": 30.0,
32
+ "linear_min_length": 5.0,
33
+ "linear_max_gap_ratio": 0.5,
34
+ "centroid_linear_min_length": 20.0,
35
+ "centroid_linear_pca_min": 2.0,
36
+ "centroid_linear_dev_max": 25.0
37
+ },
38
+ "ring": {
39
+ "ring_min_points": 10,
40
+ "ring_band_width": 3.0,
41
+ "ring_r_absolute_tolerance": 10.0,
42
+ "ring_min_angular_coverage": 200.0,
43
+ "ring_min_sectors": 7,
44
+ "ring_fit_rmse_max": 10
45
+ },
46
+ "misc": {
47
+ "min_points_for_clustering": 3
48
+ }
49
+ }
nu/pattern_analyzer.py ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pattern_analyzer.py
2
+ """
3
+ LLS ์ผ์ž๋ณ„ ๋ฐ์ดํ„ฐ ๋ถ„์„ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ดํ„ฐ.
4
+
5
+ `./daily/YYYYMMDD.parquet` ํ˜•ํƒœ์˜ ์ผ์ž๋ณ„ ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ๋ฅผ ์ผ๊ด„ ์ฒ˜๋ฆฌํ•˜์—ฌ
6
+ ํŒจํ„ด ๋ถ„๋ฅ˜ + Contact ๋งคํ•‘ + ์‹œ๊ฐํ™”๊นŒ์ง€ ํ•œ ๋ฒˆ์— ์ˆ˜ํ–‰ํ•œ๋‹ค.
7
+
8
+ ๋‘ ๊ฐ€์ง€ ์‹คํ–‰ ๋ชจ๋“œ ์ง€์›
9
+ ----------------------
10
+ - ``"by_cst"`` : CAR_ID(์บ๋ฆฌ์–ด) ร— HIS_REGIST_DTTM(์Šค์บ”์‹œ๊ฐ) ๋‹จ์œ„๋กœ ๋ถ„๋ฅ˜.
11
+ ๋™์ผ ์บ๋ฆฌ์–ด ๋‚ด ๋™์ผ ์‹œ๊ฐ ๊ทธ๋ฃน๋ณ„ ํŒจํ„ด ๋ฐœ์ƒ ์ถ”์ ์— ์‚ฌ์šฉ.
12
+ - ``"daily"`` : ํ•˜๋ฃจ ์ „์ฒด ๊ฒฐํ•จ์„ ํ•œ ๊ทธ๋ฃน์œผ๋กœ ํ•ฉ์ณ 1ํšŒ ๋ถ„๋ฅ˜.
13
+ ์ผ์ž๋ณ„ ๊ณต์ • ํŠธ๋ Œ๋“œ/์žฅ๋น„ ์ด์ƒ ์ถ”์ ์— ์‚ฌ์šฉ. ์œ ์˜ ํŒจํ„ด ์—ฌ๋ถ€์™€
14
+ ๋ฌด๊ด€ํ•˜๊ฒŒ ํ•„ํ„ฐ๋ง๋œ ๊ฒฐํ•จ์€ ํ•ญ์ƒ ๋ณด์กด.
15
+
16
+ ๋‚ด๋ถ€ ์˜์กด์„ฑ
17
+ -----------
18
+ - :class:`utils.WaferUtils` : ์ „์ฒ˜๋ฆฌยท์‹œ๊ฐํ™” ์œ ํ‹ธ
19
+ - :func:`pattern_detection.classify_wafer_patterns` : ํŒจํ„ด ๋ถ„๋ฅ˜
20
+ - :class:`contact_mapper.ContactMapper` : ์„ค๋น„ ๋ถ€์œ„ ๋งคํ•‘
21
+
22
+ ์ถœ๋ ฅ ๊ตฌ์กฐ
23
+ ---------
24
+ output_dir/
25
+ โ”œโ”€โ”€ by_cst/{date}_LLS_CST_analysis.csv # Mode 2
26
+ โ”œโ”€โ”€ daily_agg/{date}_LLS_daily_analysis.csv # Mode 1
27
+ โ”œโ”€โ”€ daily_agg/filtered_defects/{date}_filtered.parquet
28
+ โ”œโ”€โ”€ figures_by_cst/{date}/{CST_ID}_{dttm}.jpg
29
+ โ”œโ”€โ”€ figures_daily/{significant|others}/DAILY_{date}.jpg
30
+ โ”œโ”€โ”€ config_used/{ts}_config.json
31
+ โ””โ”€โ”€ LLS_{by_cst|daily_agg}_full_analysis.csv
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import os
36
+ import sys
37
+ import shutil
38
+ import glob
39
+ import warnings
40
+ from datetime import datetime
41
+ from typing import Optional, Literal, List
42
+
43
+ import numpy as np
44
+ import pandas as pd
45
+ import urllib3
46
+ from tqdm import tqdm
47
+
48
+ from utils import (
49
+ setup_korean_font, load_config, add_zone_labels, plot_wafer_map,
50
+ assign_fine_grid, filter_by_cell_wafer_count,
51
+ )
52
+ from pattern_detection import classify_wafer_patterns
53
+ from contact_mapper import ContactMapper
54
+
55
+ warnings.filterwarnings("ignore")
56
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
57
+
58
+ # ์‹คํ–‰ ๋ชจ๋“œ ํƒ€์ž….
59
+ Mode = Literal["by_cst", "daily"]
60
+
61
+ # Mode 1(daily aggregated)์—์„œ '์œ ์˜ ํŒจํ„ด' ํŒ์ •์‹œ ๊ธฐ๋ณธ ์ œ์™ธ ๋ผ๋ฒจ.
62
+ EXCLUDED_PATTERNS_DEFAULT = {"Others", "์ •์ƒ/๋ฏธ๋‹ฌ", "๋ฐ์ดํ„ฐ ์—†์Œ", "None"}
63
+
64
+
65
+ class LLSPatternAnalyzer:
66
+ """
67
+ LLS ๊ฒฐํ•จ ์ผ์ž๋ณ„ ๋ถ„์„ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ดํ„ฐ.
68
+
69
+ Parameters
70
+ ----------
71
+ config_path : str
72
+ ``lls_config.json`` ๊ฒฝ๋กœ.
73
+ daily_input_dir : str
74
+ ์ผ์ž๋ณ„ parquet ํŒŒ์ผ ๋””๋ ‰ํ„ฐ๋ฆฌ. ํŒŒ์ผ๋ช…์€ ``YYYYMMDD.parquet`` ํ˜•์‹์ด์–ด์•ผ ํ•จ.
75
+ output_dir : str
76
+ ๋ชจ๋“  ๊ฒฐ๊ณผ(CSV, parquet, ์ด๋ฏธ์ง€)๊ฐ€ ์ €์žฅ๋  ๋ฃจํŠธ ๋””๋ ‰ํ„ฐ๋ฆฌ.
77
+ contact_csv : str, optional
78
+ ``contact_angle.csv`` ๊ฒฝ๋กœ. None ๋˜๋Š” ํŒŒ์ผ ๋ถ€์žฌ ์‹œ contact ๋งคํ•‘ ๋น„ํ™œ์„ฑํ™”.
79
+ setup_font : bool
80
+ True ๋ฉด ์‹œ์ž‘ ์‹œ ํ•œ๊ธ€ ํฐํŠธ ๋“ฑ๋ก.
81
+
82
+ Attributes
83
+ ----------
84
+ config : dict
85
+ ``lls_config.json`` ํŠธ๋ฆฌ.
86
+ contact_mapper : ContactMapper | None
87
+ contact ๋งคํ•‘ ํ™œ์„ฑํ™” ์‹œ ์ธ์Šคํ„ด์Šค, ์•„๋‹ˆ๋ฉด None.
88
+
89
+ Examples
90
+ --------
91
+ >>> analyzer = LLSPatternAnalyzer(
92
+ ... config_path="./lls_config.json",
93
+ ... daily_input_dir="./daily",
94
+ ... output_dir="./result_daily",
95
+ ... )
96
+ >>> df_daily = analyzer.run(mode="daily") # Mode 1
97
+ >>> df_by_cst = analyzer.run(mode="by_cst") # Mode 2
98
+ """
99
+
100
+ # ------------------------------------------------------------------
101
+ # ์ƒ์„ฑ์ž + ์ดˆ๊ธฐํ™”
102
+ # ------------------------------------------------------------------
103
+ def __init__(
104
+ self,
105
+ config_path: str = "./lls_config.json",
106
+ daily_input_dir: str = "./daily",
107
+ output_dir: str = "./result_daily",
108
+ contact_csv: Optional[str] = "./contact_angle.csv",
109
+ setup_font: bool = True,
110
+ ):
111
+ if setup_font:
112
+ setup_korean_font()
113
+
114
+ self.config_path = config_path
115
+ self.config = load_config(config_path)
116
+ self.daily_input_dir = daily_input_dir
117
+ self.output_dir = output_dir
118
+
119
+ # --- Contact mapper (์„ ํƒ) ---
120
+ cm_cfg = self.config.get("contact_mapping", {})
121
+ self.contact_tolerance_mm = cm_cfg.get("tolerance_mm", 30.0)
122
+ self.contact_top_n = cm_cfg.get("top_n", 5)
123
+ self.contact_mapper: Optional[ContactMapper] = None
124
+ if contact_csv and os.path.exists(contact_csv):
125
+ self.contact_mapper = ContactMapper(
126
+ csv_path=contact_csv,
127
+ tolerance_mm=self.contact_tolerance_mm,
128
+ )
129
+ print(f"โœ… Contact mapper ํ™œ์„ฑํ™”: {contact_csv} (tolerance={self.contact_tolerance_mm}mm)")
130
+
131
+ # --- ์ „์ฒ˜๋ฆฌ ํŒŒ๋ผ๋ฏธํ„ฐ (lls_config.json::preprocessing) ---
132
+ pp = self.config["preprocessing"]
133
+ self.cell_size_mm = pp["cell_size_mm"]
134
+ self.n1_min_wafers = pp["n1_min_wafers"]
135
+ # ๊ตฌ๋ฒ„์ „ config ํ˜ธํ™˜: n2_min_cell_defects ๋˜๋Š” n2_min_zone_defects ๋ชจ๋‘ ์ธ์‹
136
+ self.n2_min_cell_defects = pp.get(
137
+ "n2_min_cell_defects", pp.get("n2_min_zone_defects", 3)
138
+ )
139
+ self.inner_radius_mm = pp["inner_radius_mm"]
140
+
141
+ # --- Mode 1 ์œ ์˜ ํŒจํ„ด ํ•„ํ„ฐ๋ง ์ž„๊ณ„์น˜ (lls_config.json::mode_daily) ---
142
+ md = self.config.get("mode_daily", {})
143
+ self.daily_min_defect_count = md.get("min_defect_count", 30)
144
+ self.daily_min_wafer_count = md.get("min_wafer_count", 3)
145
+ self.daily_excluded_patterns = set(
146
+ md.get("excluded_patterns", list(EXCLUDED_PATTERNS_DEFAULT))
147
+ )
148
+
149
+ self._prepare_output_dirs()
150
+ self._backup_config()
151
+
152
+ def _prepare_output_dirs(self) -> None:
153
+ """์ถœ๋ ฅ ๋””๋ ‰ํ„ฐ๋ฆฌ ์ผ๊ด„ ์ƒ์„ฑ."""
154
+ self.by_cst_dir = os.path.join(self.output_dir, "by_cst")
155
+ self.daily_agg_dir = os.path.join(self.output_dir, "daily_agg")
156
+ self.figures_by_cst_dir = os.path.join(self.output_dir, "figures_by_cst")
157
+ self.figures_daily_dir = os.path.join(self.output_dir, "figures_daily")
158
+ self.config_used_dir = os.path.join(self.output_dir, "config_used")
159
+ for d in [
160
+ self.output_dir, self.by_cst_dir, self.daily_agg_dir,
161
+ self.figures_by_cst_dir, self.figures_daily_dir, self.config_used_dir,
162
+ ]:
163
+ os.makedirs(d, exist_ok=True)
164
+
165
+ def _backup_config(self) -> None:
166
+ """ํ˜„์žฌ ์‚ฌ์šฉ๋œ config๋ฅผ ํƒ€์ž„์Šคํƒฌํ”„ ํŒŒ์ผ๋ช…์œผ๋กœ ๋ฐฑ์—… (์žฌํ˜„์„ฑ ํ™•๋ณด)."""
167
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
168
+ backup_path = os.path.join(self.config_used_dir, f"{ts}_config.json")
169
+ shutil.copy(self.config_path, backup_path)
170
+ print(f"โœ… ์„ค์ • ํŒŒ์ผ ๋ฐฑ์—… ์™„๋ฃŒ: {backup_path}")
171
+
172
+ # ------------------------------------------------------------------
173
+ # ๊ณต์šฉ ์ „์ฒ˜๋ฆฌ ํ—ฌํผ
174
+ # ------------------------------------------------------------------
175
+ def _load_parquet(self, file_path: str) -> Optional[pd.DataFrame]:
176
+ """
177
+ Parquet ๋กœ๋“œ + HIS_REGIST_DTTM_8030 ์ •๊ทœํ™”.
178
+
179
+ Returns
180
+ -------
181
+ Optional[pd.DataFrame]
182
+ ๋กœ๋“œ ์‹คํŒจ ๋˜๋Š” ๋นˆ ๋ฐ์ดํ„ฐ ์‹œ None.
183
+ """
184
+ try:
185
+ df = pd.read_parquet(file_path)
186
+ except Exception as e:
187
+ print(f"โŒ ํŒŒ์ผ ์ฝ๊ธฐ ์‹คํŒจ: {file_path}, ์˜ค๋ฅ˜: {e}")
188
+ return None
189
+ if df.empty:
190
+ return None
191
+ # ํƒ€์ž„์Šคํƒฌํ”„ 14์ž๋ฆฌ(YYYYMMDDHHMMSS)๋กœ ์ž๋ฅด๊ธฐ โ€” ๊ทธ๋ฃนํ™” ํ‚ค ์ผ๊ด€์„ฑ ํ™•๋ณด
192
+ if "HIS_REGIST_DTTM_8030" in df.columns:
193
+ df["HIS_REGIST_DTTM_8030"] = df["HIS_REGIST_DTTM_8030"].astype(str).str[:14]
194
+ return df
195
+
196
+ def _apply_grid_and_n1(self, df: pd.DataFrame) -> pd.DataFrame:
197
+ """fine-grid ํ• ๋‹น + n1 ํ•„ํ„ฐ (cell๋‹น ์ตœ์†Œ wafer ์ˆ˜)."""
198
+ df = assign_fine_grid(df, cell_size_mm=self.cell_size_mm)
199
+ df = filter_by_cell_wafer_count(df, self.n1_min_wafers, cell_size_mm=self.cell_size_mm)
200
+ return df
201
+
202
+ def _apply_n2(self, df: pd.DataFrame) -> pd.DataFrame:
203
+ """n2 ํ•„ํ„ฐ: cell๋‹น ์ตœ์†Œ ๊ฒฐํ•จ ์ˆ˜ ๋ฏธ๋งŒ์ธ cell ์ œ๊ฑฐ."""
204
+ if df.empty:
205
+ return df
206
+ cell_counts = df["cell_id"].value_counts()
207
+ valid_cells = cell_counts[cell_counts >= self.n2_min_cell_defects].index
208
+ return df[df["cell_id"].isin(valid_cells)].copy()
209
+
210
+ def _classify(self, df_group: pd.DataFrame) -> Optional[dict]:
211
+ """
212
+ ํ•œ ๊ทธ๋ฃน์˜ ๊ฒฐํ•จ์„ ํŒจํ„ด ๋ถ„๋ฅ˜ + centroid ์‚ฐ์ถœ.
213
+
214
+ Returns
215
+ -------
216
+ Optional[dict]
217
+ ์„ฑ๊ณต ์‹œ result_df / pattern_list / centroid ๋“ฑ์„ ๋‹ด์€ dict.
218
+ ๊ฒฐํ•จ์ด 0๊ฑด์ด๋ฉด None.
219
+ """
220
+ coords = df_group[["coor_x", "coor_y"]].dropna()
221
+ if len(coords) == 0:
222
+ return None
223
+ df_for_classify = df_group.loc[coords.index].copy()
224
+ result_df, dominant_zone, pattern_list, centroid = classify_wafer_patterns(
225
+ df_for_classify, self.config
226
+ )
227
+ if centroid:
228
+ cx, cy = centroid
229
+ angle = (np.arctan2(cy, cx) / np.pi * 180 + 360) % 360
230
+ distance = round(float(np.sqrt(cx ** 2 + cy ** 2)), 4)
231
+ else:
232
+ angle = distance = None
233
+ return {
234
+ "result_df": result_df,
235
+ "dominant_zone": dominant_zone,
236
+ "pattern_list": pattern_list,
237
+ "centroid": centroid,
238
+ "main_centroid_x": round(centroid[0], 4) if centroid else None,
239
+ "main_centroid_y": round(centroid[1], 4) if centroid else None,
240
+ "main_centroid_Angle": angle,
241
+ "main_centroid_Distance": distance,
242
+ "defect_count": len(coords),
243
+ }
244
+
245
+ @staticmethod
246
+ def _pattern_str(pattern_list) -> str:
247
+ """ํŒจํ„ด ๋ฆฌ์ŠคํŠธ๋ฅผ ์‰ผํ‘œ ๊ฒฐํ•ฉ ๋ฌธ์ž์—ด๋กœ ์ •๊ทœํ™”."""
248
+ if isinstance(pattern_list, list):
249
+ return ", ".join(pattern_list)
250
+ return str(pattern_list)
251
+
252
+ def _attach_contact_candidates(self, record: dict) -> dict:
253
+ """
254
+ record์— contact ๋งคํ•‘ ๊ฒฐ๊ณผ ์ปฌ๋Ÿผ ์ถ”๊ฐ€.
255
+
256
+ ์ถ”๊ฐ€๋˜๋Š” ์ปฌ๋Ÿผ
257
+ - contact_candidate_count : ๋งค์นญ ํ›„๋ณด ์ด ๊ฐœ์ˆ˜
258
+ - contact_candidates : "EQP:Part | ..." ํ˜•์‹ top-N ์š”์•ฝ ๋ฌธ์ž์—ด
259
+ """
260
+ if self.contact_mapper is None:
261
+ return record
262
+ pat = record.get("overall_pattern", "")
263
+ cx = record.get("main_centroid_x")
264
+ cy = record.get("main_centroid_y")
265
+ candidates = self.contact_mapper.map_pattern(pat, centroid_x=cx, centroid_y=cy)
266
+ record["contact_candidate_count"] = int(len(candidates))
267
+ record["contact_candidates"] = self.contact_mapper.summarize_candidates(
268
+ candidates, top_n=self.contact_top_n
269
+ )
270
+ return record
271
+
272
+ def _is_significant(
273
+ self, pattern_list, defect_count: int, wafer_count: int
274
+ ) -> bool:
275
+ """
276
+ Mode 1 '์œ ์˜ ํŒจํ„ด' ํŒ์ •.
277
+
278
+ ์„ธ ์กฐ๊ฑด ๋ชจ๋‘ ์ถฉ์กฑํ•ด์•ผ ์œ ์˜:
279
+ (a) pattern_list๊ฐ€ ์ œ์™ธ ๋ผ๋ฒจ(Others ๋“ฑ)๋กœ๋งŒ ๊ตฌ์„ฑ๋˜์ง€ ์•Š์„ ๊ฒƒ
280
+ (b) defect_count >= daily_min_defect_count
281
+ (c) wafer_count >= daily_min_wafer_count
282
+ """
283
+ patterns = pattern_list if isinstance(pattern_list, list) else [pattern_list]
284
+ if all(p in self.daily_excluded_patterns for p in patterns):
285
+ return False
286
+ if defect_count < self.daily_min_defect_count:
287
+ return False
288
+ if wafer_count < self.daily_min_wafer_count:
289
+ return False
290
+ return True
291
+
292
+ # ------------------------------------------------------------------
293
+ # Mode 2 : by CST ร— scan-time
294
+ # ------------------------------------------------------------------
295
+ def run_by_cst(self, df: pd.DataFrame, date_str: str) -> List[dict]:
296
+ """
297
+ Mode 2 ๋‹จ์ผ ์ผ์ž ์ฒ˜๋ฆฌ: CAR_ID ร— HIS_REGIST_DTTM ๊ทธ๋ฃน๋ณ„ ๋ถ„๋ฅ˜.
298
+
299
+ Parameters
300
+ ----------
301
+ df : pd.DataFrame
302
+ ํ•œ ์ผ์ž ๋ถ„๋Ÿ‰์˜ ๊ฒฐํ•จ DF.
303
+ date_str : str
304
+ 'YYYYMMDD' ์ผ์ž ๋ฌธ์ž์—ด (์ €์žฅ ๊ฒฝ๋กœ์šฉ).
305
+
306
+ Returns
307
+ -------
308
+ List[dict]
309
+ ๊ฐ ๊ทธ๋ฃน๋ณ„ record ๋ฆฌ์ŠคํŠธ. ๋นˆ ๊ฒฐ๊ณผ๋ฉด [].
310
+ """
311
+ daily_results: List[dict] = []
312
+ daily_result_dfs: dict = {}
313
+ figures_dir = os.path.join(self.figures_by_cst_dir, date_str)
314
+ os.makedirs(figures_dir, exist_ok=True)
315
+
316
+ for car_id in tqdm(df["CAR_ID"].unique(), desc=f"{date_str} CST", leave=False):
317
+ df_cst = df[df["CAR_ID"] == car_id].copy()
318
+ if df_cst.empty:
319
+ continue
320
+ df_cst = self._apply_grid_and_n1(df_cst)
321
+ if df_cst.empty:
322
+ continue
323
+ df_cst = add_zone_labels(df_cst, inner_radius=self.inner_radius_mm)
324
+
325
+ for dttm, df_group in df_cst.groupby("HIS_REGIST_DTTM_8030"):
326
+ df_group = self._apply_n2(df_group)
327
+ if df_group.empty:
328
+ continue
329
+
330
+ eqp_series = df_group["EQP_ID_8030"].dropna()
331
+ eqp_nm = eqp_series.mode().iloc[0] if not eqp_series.empty else "Unknown"
332
+
333
+ cls = self._classify(df_group)
334
+ if cls is None:
335
+ continue
336
+
337
+ key = f"{car_id}_{dttm}"
338
+ daily_result_dfs[key] = cls["result_df"]
339
+ rec = {
340
+ "status": "Success",
341
+ "mode": "by_cst",
342
+ "CST_ID": car_id,
343
+ "HIS_REGIST_DTTM": dttm,
344
+ "EQP_NM_8030": eqp_nm,
345
+ "analysis_date": date_str,
346
+ "wafer_count": df_group["WAF_ID"].nunique(),
347
+ "defect_count": cls["defect_count"],
348
+ "overall_pattern": self._pattern_str(cls["pattern_list"]),
349
+ "overall_dominant_zone": cls["dominant_zone"],
350
+ "main_centroid_x": cls["main_centroid_x"],
351
+ "main_centroid_y": cls["main_centroid_y"],
352
+ "main_centroid_Angle": cls["main_centroid_Angle"],
353
+ "main_centroid_Distance": cls["main_centroid_Distance"],
354
+ }
355
+ daily_results.append(self._attach_contact_candidates(rec))
356
+
357
+ if daily_results:
358
+ df_daily = pd.DataFrame(daily_results)
359
+ df_daily.to_csv(
360
+ os.path.join(self.by_cst_dir, f"{date_str}_LLS_CST_analysis.csv"),
361
+ index=False, encoding="utf-8-sig",
362
+ )
363
+ for key, result_df in tqdm(daily_result_dfs.items(),
364
+ desc=f"{date_str} ์‹œ๊ฐํ™”", leave=False):
365
+ meta = next(
366
+ (r for r in daily_results
367
+ if f"{r['CST_ID']}_{r['HIS_REGIST_DTTM']}" == key),
368
+ None,
369
+ )
370
+ if not meta:
371
+ continue
372
+ plot_wafer_map(
373
+ result_df=result_df,
374
+ key=key,
375
+ pattern_list=meta["overall_pattern"],
376
+ dominant_zone=meta["overall_dominant_zone"],
377
+ meta=meta,
378
+ show_mode=False,
379
+ save_path=os.path.join(figures_dir, f"{key}.jpg"),
380
+ )
381
+ return daily_results
382
+
383
+ # ------------------------------------------------------------------
384
+ # Mode 1 : daily aggregated
385
+ # ------------------------------------------------------------------
386
+ def run_daily(self, df: pd.DataFrame, date_str: str) -> List[dict]:
387
+ """
388
+ Mode 1 ๋‹จ์ผ ์ผ์ž ์ฒ˜๋ฆฌ: ํ•˜๋ฃจ ์ „์ฒด ๊ฒฐํ•จ ํ†ตํ•ฉ ํ›„ 1ํšŒ ๋ถ„๋ฅ˜.
389
+
390
+ ํŒจํ„ด ๋ถ„๋ฅ˜ ์„ฑ๊ณต ์—ฌ๋ถ€์™€ ๋ฌด๊ด€ํ•˜๊ฒŒ ``filtered_defects/{date}_filtered.parquet``
391
+ ์— ํ•„ํ„ฐ๋ง๋œ ๊ฒฐํ•จ์„ ํ•ญ์ƒ ๋ณด์กดํ•œ๋‹ค. ์‹œ๊ฐํ™”๋Š” ์œ ์˜ ์—ฌ๋ถ€์— ๋”ฐ๋ผ
392
+ ``figures_daily/significant/`` ๋˜๋Š” ``others/`` ํด๋”๋กœ ๋ถ„๋ฆฌ ์ €์žฅ.
393
+
394
+ Returns
395
+ -------
396
+ List[dict]
397
+ ์„ฑ๊ณต ์‹œ 1๊ฑด record ๋ฆฌ์ŠคํŠธ. ํ•„ํ„ฐ ๋‹จ๊ณ„์—์„œ ๋ชจ๋‘ ์ œ๊ฑฐ๋˜๋ฉด [].
398
+ """
399
+ df_day = df.copy()
400
+ df_day = self._apply_grid_and_n1(df_day)
401
+ if df_day.empty:
402
+ print(f"๐ŸŸก {date_str} n1 ํ•„ํ„ฐ ํ†ต๊ณผ ๊ฒฐํ•จ ์—†์Œ โ†’ ์Šคํ‚ต")
403
+ return []
404
+ df_day = add_zone_labels(df_day, inner_radius=self.inner_radius_mm)
405
+ df_day = self._apply_n2(df_day)
406
+ if df_day.empty:
407
+ print(f"๐ŸŸก {date_str} n2 ํ•„ํ„ฐ ํ†ต๊ณผ ๊ฒฐํ•จ ์—†์Œ โ†’ ์Šคํ‚ต")
408
+ return []
409
+
410
+ wafer_count = df_day["WAF_ID"].nunique()
411
+ cls = self._classify(df_day)
412
+
413
+ # ๋ถ„๋ฅ˜ ์‹คํŒจํ•ด๋„ ํ•„ํ„ฐ๋ง๋œ ๊ฒฐํ•จ์€ ์œ ์ง€ (์‚ฌ์šฉ์ž ์š”๊ตฌ์‚ฌํ•ญ)
414
+ if cls is None:
415
+ result_df = df_day.assign(inlier=False)
416
+ pattern_list = ["None"]
417
+ dominant_zone = "N/A"
418
+ defect_count = len(df_day)
419
+ centroid_fields = {
420
+ "main_centroid_x": None, "main_centroid_y": None,
421
+ "main_centroid_Angle": None, "main_centroid_Distance": None,
422
+ }
423
+ else:
424
+ result_df = cls["result_df"]
425
+ pattern_list = cls["pattern_list"]
426
+ dominant_zone = cls["dominant_zone"]
427
+ defect_count = cls["defect_count"]
428
+ centroid_fields = {
429
+ "main_centroid_x": cls["main_centroid_x"],
430
+ "main_centroid_y": cls["main_centroid_y"],
431
+ "main_centroid_Angle": cls["main_centroid_Angle"],
432
+ "main_centroid_Distance": cls["main_centroid_Distance"],
433
+ }
434
+
435
+ is_significant = self._is_significant(pattern_list, defect_count, wafer_count)
436
+
437
+ eqp_series = (df_day["EQP_ID_8030"].dropna()
438
+ if "EQP_ID_8030" in df_day.columns
439
+ else pd.Series([], dtype=object))
440
+ eqp_nm = eqp_series.mode().iloc[0] if not eqp_series.empty else "Unknown"
441
+
442
+ key = f"DAILY_{date_str}"
443
+ record = {
444
+ "status": "Success",
445
+ "mode": "daily",
446
+ "is_significant": is_significant,
447
+ "CST_ID": "ALL",
448
+ "HIS_REGIST_DTTM": date_str,
449
+ "EQP_NM_8030": eqp_nm,
450
+ "analysis_date": date_str,
451
+ "wafer_count": wafer_count,
452
+ "defect_count": defect_count,
453
+ "overall_pattern": self._pattern_str(pattern_list),
454
+ "overall_dominant_zone": dominant_zone,
455
+ **centroid_fields,
456
+ }
457
+ record = self._attach_contact_candidates(record)
458
+
459
+ # CSV ์ €์žฅ
460
+ pd.DataFrame([record]).to_csv(
461
+ os.path.join(self.daily_agg_dir, f"{date_str}_LLS_daily_analysis.csv"),
462
+ index=False, encoding="utf-8-sig",
463
+ )
464
+
465
+ # ํ†ตํ•ฉ ๊ฒฐํ•จ parquet ํ•ญ์ƒ ์ €์žฅ (๋ถ„๋ฅ˜ ๋ฌด๊ด€)
466
+ defects_dir = os.path.join(self.daily_agg_dir, "filtered_defects")
467
+ os.makedirs(defects_dir, exist_ok=True)
468
+ result_df.to_parquet(
469
+ os.path.join(defects_dir, f"{date_str}_filtered.parquet"),
470
+ index=False,
471
+ )
472
+
473
+ # ์‹œ๊ฐํ™”: ์œ ์˜/๋น„์œ ์˜ ํด๋” ๋ถ„๋ฆฌ
474
+ sub_dir = "significant" if is_significant else "others"
475
+ save_dir = os.path.join(self.figures_daily_dir, sub_dir)
476
+ os.makedirs(save_dir, exist_ok=True)
477
+ plot_wafer_map(
478
+ result_df=result_df,
479
+ key=key,
480
+ pattern_list=record["overall_pattern"],
481
+ dominant_zone=record["overall_dominant_zone"],
482
+ meta=record,
483
+ show_mode=False,
484
+ save_path=os.path.join(save_dir, f"{key}.jpg"),
485
+ )
486
+ return [record]
487
+
488
+ # ------------------------------------------------------------------
489
+ # Dispatcher / ์ง„์ž…์ 
490
+ # ------------------------------------------------------------------
491
+ def run(self, mode: Mode = "by_cst") -> pd.DataFrame:
492
+ """
493
+ ๋ชจ๋“œ๋ณ„ ์ผ์ž ์ผ๊ด„ ์ฒ˜๋ฆฌ.
494
+
495
+ Parameters
496
+ ----------
497
+ mode : {"by_cst", "daily"}
498
+ "by_cst": CST ร— ์Šค์บ”์‹œ๊ฐ ๋‹จ์œ„ (์„ธ๋ฐ€)
499
+ "daily" : ์ผ์ž ํ†ตํ•ฉ ๋‹จ์œ„ (ํŠธ๋ Œ๋“œ)
500
+
501
+ Returns
502
+ -------
503
+ pd.DataFrame
504
+ ๋ชจ๋“  ์ผ์ž record๋ฅผ ํ•ฉ์นœ ํ†ตํ•ฉ DF (`output_dir`์— CSV๋กœ๋„ ์ €์žฅ).
505
+ ๊ฒฐ๊ณผ ์—†์œผ๋ฉด ๋นˆ DF.
506
+
507
+ Raises
508
+ ------
509
+ ValueError
510
+ mode๊ฐ€ ํ—ˆ์šฉ ๊ฐ’์ด ์•„๋‹ ๋•Œ.
511
+ FileNotFoundError
512
+ ``daily_input_dir`` ์— parquet ํŒŒ์ผ์ด ์—†์„ ๋•Œ.
513
+ """
514
+ if mode not in ("by_cst", "daily"):
515
+ raise ValueError(f"mode๋Š” 'by_cst' ๋˜๋Š” 'daily' ์—ฌ์•ผ ํ•ฉ๋‹ˆ๋‹ค. got={mode}")
516
+
517
+ parquet_files = sorted(glob.glob(os.path.join(self.daily_input_dir, "*.parquet")))
518
+ if not parquet_files:
519
+ raise FileNotFoundError(
520
+ f"โŒ {self.daily_input_dir} ํด๋”์— parquet ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค."
521
+ )
522
+ print(f"โœ… ์ด {len(parquet_files)}๊ฐœ์˜ ์ผ์ž๋ณ„ ํŒŒ์ผ ๋ฐœ๊ฒฌ (mode={mode})")
523
+
524
+ all_results: List[dict] = []
525
+ for file_path in tqdm(parquet_files, desc=f"๐Ÿ“… ์ผ์ž๋ณ„ ์ฒ˜๋ฆฌ ({mode})"):
526
+ date_str = os.path.basename(file_path).split(".")[0]
527
+ if not (len(date_str) == 8 and date_str.isdigit()):
528
+ print(f"๐ŸŸก ๊ฑด๋„ˆ๋œ€ (ํŒŒ์ผ๋ช… ํ˜•์‹ ์˜ค๋ฅ˜): {file_path}")
529
+ continue
530
+
531
+ df = self._load_parquet(file_path)
532
+ if df is None:
533
+ print(f"๐ŸŸก ๋ฐ์ดํ„ฐ ์—†์Œ: {file_path}")
534
+ continue
535
+
536
+ if mode == "by_cst":
537
+ results = self.run_by_cst(df, date_str)
538
+ else:
539
+ results = self.run_daily(df, date_str)
540
+ all_results.extend(results)
541
+
542
+ if not all_results:
543
+ print("โŒ ๋ถ„์„๋œ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
544
+ return pd.DataFrame()
545
+
546
+ final_df = pd.DataFrame(all_results)
547
+ suffix = "by_cst" if mode == "by_cst" else "daily_agg"
548
+ final_path = os.path.join(self.output_dir, f"LLS_{suffix}_full_analysis.csv")
549
+ final_df.to_csv(final_path, index=False, encoding="utf-8-sig")
550
+ print(f"โœ… ์ „์ฒด ๋ถ„์„ ์™„๋ฃŒ: {len(all_results)}๊ฑด โ†’ {final_path}")
551
+ return final_df
552
+
553
+
554
+ # ----------------------------------------------------------------------
555
+ # CLI ์ง„์ž…์ : `python pattern_analyzer.py [by_cst|daily]`
556
+ # ----------------------------------------------------------------------
557
+ if __name__ == "__main__":
558
+ sys.path.append(os.getcwd())
559
+ mode: Mode = sys.argv[1] if len(sys.argv) > 1 else "by_cst"
560
+ analyzer = LLSPatternAnalyzer()
561
+ analyzer.run(mode=mode)
nu/pattern_detection.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pattern_detection.py
2
+ """
3
+ LLS ๊ฒฐํ•จ ํŒจํ„ด ์ž๋™ ๋ถ„๋ฅ˜ ๋ชจ๋“ˆ.
4
+
5
+ ์›จ์ดํผ ํ•œ ์žฅ(๋˜๋Š” ํ•œ ๊ทธ๋ฃน) ์œ„์˜ ๊ฒฐํ•จ ์ขŒํ‘œ ์ง‘ํ•ฉ์„ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›์•„
6
+ ํ™˜ํ˜•(Ring) / ์„ ํ˜•(Linear) / ๊ตฐ์ง‘(Cluster) / Others ์ค‘ ํ•˜๋‚˜๋กœ ๋ถ„๋ฅ˜ํ•œ๋‹ค.
7
+
8
+ ๋ถ„๋ฅ˜ ํŒŒ์ดํ”„๋ผ์ธ
9
+ ----------------
10
+ 1. HDBSCAN์œผ๋กœ 1์ฐจ ํด๋Ÿฌ์Šคํ„ฐ๋ง โ†’ outlier(-1) ์ œ๊ฑฐ
11
+ โ”” ์‹คํŒจ ์‹œ DBSCAN fallback
12
+ 2. LOF๋กœ 2์ฐจ outlier ์ œ๊ฑฐ (์ง€์—ญ ๋ฐ€๋„ ๊ธฐ๋ฐ˜)
13
+ 3. inlier ์ง‘ํ•ฉ์— ๋Œ€ํ•ด ํŒจํ„ด ํ›„๋ณด ํ‰๊ฐ€ (์šฐ์„ ์ˆœ์œ„ ์ˆœ)
14
+ (a) ํ™˜ํ˜• ๊ฒ€์ถœ : ์› ํ”ผํŒ… RMSE + ๊ฐ๋„ ์ปค๋ฒ„๋ฆฌ์ง€ + ์‹œ๊ณ„ sector ์ปค๋ฒ„๋ฆฌ์ง€
15
+ + PCA ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€(์›์  ํ†ต๊ณผ ์„ ํ˜• false-positive ๋ฐฉ์ง€)
16
+ (b) ์„ ํ˜• ๊ฒ€์ถœ : PCA eigenvalue ratio + ์ง์„  ํŽธ์ฐจ + gap ratio
17
+ (c) ๊ตฐ์ง‘ ๊ฒ€์ถœ : DBSCAN sub-cluster โ†’ compactness/PCA๋กœ ๊ตฐ์ง‘/์„ ํ˜• ์žฌํŒ์ •
18
+ 4. dominant_zone ๊ณ„์‚ฐ (์‹œ๊ฐํ™”์šฉ)
19
+ 5. centroid ์ขŒํ‘œ ์‚ฐ์ถœ
20
+ - ํ™˜ํ˜•: inlier ์ „์ฒด ํ‰๊ท 
21
+ - ์„ ํ˜•/๊ตฐ์ง‘: dominant_zone ์ ๋“ค์˜ ํ‰๊ท  (์—†์œผ๋ฉด inlier ํ‰๊ท )
22
+
23
+ API
24
+ ---
25
+ - `PatternDetector(config).classify(df)` (๊ถŒ์žฅ)
26
+ - `classify_wafer_patterns(df, config)` (๊ตฌ๋ฒ„์ „ ํ˜ธํ™˜)
27
+
28
+ ๋‘ API ๋ชจ๋‘ `(result_df, dominant_zone, pattern_list, centroid)` ํŠœํ”Œ ๋ฐ˜ํ™˜.
29
+ """
30
+ from __future__ import annotations
31
+
32
+ from collections import Counter
33
+ from typing import Tuple, List, Optional
34
+
35
+ import numpy as np
36
+ import pandas as pd
37
+ import hdbscan
38
+ from sklearn.decomposition import PCA
39
+ from sklearn.cluster import DBSCAN
40
+ from sklearn.neighbors import LocalOutlierFactor
41
+
42
+ from utils import WaferUtils
43
+
44
+
45
+ # ======================================================================
46
+ # PatternDetector
47
+ # ======================================================================
48
+ class PatternDetector:
49
+ """
50
+ config๋ฅผ ์ฃผ์ž…๋ฐ›์•„ LLS ๊ฒฐํ•จ ํŒจํ„ด์„ ๋ถ„๋ฅ˜ํ•˜๋Š” ๊ฒ€์ถœ๊ธฐ.
51
+
52
+ ์ƒํƒœ๋กœ `self.cfg` ํ•œ ๊ฐ€์ง€๋งŒ ๋ณด์œ ํ•˜๋ฏ€๋กœ thread-safeํ•˜๋ฉฐ,
53
+ ๋™์ผ ์ธ์Šคํ„ด์Šค๋ฅผ ์—ฌ๋Ÿฌ wafer ๊ทธ๋ฃน์— ๋ฐ˜๋ณต ์‚ฌ์šฉํ•ด๋„ ๋ฌด๋ฐฉํ•˜๋‹ค.
54
+
55
+ Parameters
56
+ ----------
57
+ config : dict
58
+ `lls_config.json` ๊ตฌ์กฐ์˜ dict.
59
+ ํ•„์š”ํ•œ ํ‚ค (์„œ๋ธŒํŠธ๋ฆฌ):
60
+ - preprocessing.inner_radius_mm
61
+ - clustering.{min_cluster_size, min_samples, cluster_selection_method,
62
+ dbscan_eps, cluster_dbscan_eps}
63
+ - lof.{lof_min_points, lof_n_neighbors, lof_contamination}
64
+ - ring.{ring_min_points, ring_band_width, ring_r_absolute_tolerance,
65
+ ring_min_angular_coverage, ring_min_sectors, ring_fit_rmse_max,
66
+ (์„ ํƒ) ring_pca_ratio_max}
67
+ - linear.{linear_pca_ratio_min, linear_max_deviation, linear_min_length,
68
+ linear_max_gap_ratio,
69
+ centroid_linear_min_length, centroid_linear_pca_min,
70
+ centroid_linear_dev_max}
71
+ - cluster.cluster_compactness_radius
72
+ - misc.min_points_for_clustering
73
+ """
74
+
75
+ def __init__(self, config: dict):
76
+ self.cfg = config
77
+
78
+ # ==================================================================
79
+ # ๊ณต๊ฐœ API
80
+ # ==================================================================
81
+ def classify(
82
+ self, df: pd.DataFrame
83
+ ) -> Tuple[pd.DataFrame, str, List[str], Optional[tuple]]:
84
+ """
85
+ ๊ฒฐํ•จ DataFrame์„ ๋ฐ›์•„ ํŒจํ„ด์„ ๋ถ„๋ฅ˜.
86
+
87
+ Parameters
88
+ ----------
89
+ df : pd.DataFrame
90
+ 'coor_x', 'coor_y' ์ปฌ๋Ÿผ์„ ๋ฐ˜๋“œ์‹œ ํฌํ•จ. inner_radius ๊ธฐ๋ฐ˜ zone ๋ผ๋ฒจ์€
91
+ ๋‚ด๋ถ€์—์„œ ์ž๋™์œผ๋กœ ๋ถ€์—ฌํ•œ๋‹ค.
92
+
93
+ Returns
94
+ -------
95
+ result_df : pd.DataFrame
96
+ ์›๋ณธ df + 'inlier' (bool) + 'zone_label'/'r'/'theta_deg' ์ปฌ๋Ÿผ.
97
+ dominant_zone : str
98
+ inlier ์ค‘ ๊ฐ€์žฅ ๋งŽ์ด ๋‚˜ํƒ€๋‚œ zone_label. inlier๊ฐ€ ๋น„๋ฉด "๋ฐ์ดํ„ฐ ์—†์Œ"/"N/A".
99
+ pattern_list : list[str]
100
+ ["ํ™˜ํ˜•"] / ["์„ ํ˜•"] / ["๊ตฐ์ง‘"] / ["Others"] / ["์ •์ƒ/๋ฏธ๋‹ฌ"].
101
+ centroid : tuple[float, float] | None
102
+ ํŒจํ„ด ๋ฐœ์ƒ ์ค‘์‹ฌ ์ขŒํ‘œ. ๋ถ„๋ฅ˜ ์‹คํŒจ ์‹œ None.
103
+ """
104
+ cfg = self.cfg
105
+
106
+ if df.empty:
107
+ return df, "๋ฐ์ดํ„ฐ ์—†์Œ", ["None"], None
108
+
109
+ # Zone ๋ผ๋ฒจ๋ง + ์ขŒํ‘œ ํ‰ํƒ„ํ™”
110
+ df = df.copy().reset_index(drop=True)
111
+ df = WaferUtils.add_zone_labels(df, inner_radius=cfg["preprocessing"]["inner_radius_mm"])
112
+ coords = df[["coor_x", "coor_y"]].values
113
+
114
+ n_total = len(df)
115
+ if n_total < cfg["misc"]["min_points_for_clustering"]:
116
+ return (df.assign(inlier=np.zeros(len(df), dtype=bool)),
117
+ "๋ฐ์ดํ„ฐ ์—†์Œ", ["์ •์ƒ/๋ฏธ๋‹ฌ"], None)
118
+
119
+ # --- 1์ฐจ ํด๋Ÿฌ์Šคํ„ฐ๋ง (HDBSCAN โ†’ DBSCAN fallback) ---
120
+ labels = self._cluster_hdbscan(coords)
121
+ if np.all(labels == -1):
122
+ labels = self._cluster_dbscan_fallback(coords)
123
+ inlier_mask = labels != -1
124
+ if not any(inlier_mask):
125
+ return df.assign(inlier=inlier_mask), "๋ฐ์ดํ„ฐ ์—†์Œ", ["Others"], None
126
+
127
+ # --- 2์ฐจ outlier ์ œ๊ฑฐ (LOF) ---
128
+ inlier_mask = self._apply_lof(coords, inlier_mask)
129
+ inlier_df = df[inlier_mask].copy()
130
+ inlier_coords = coords[inlier_mask]
131
+ n_inlier = len(inlier_df)
132
+
133
+ if n_inlier < cfg["clustering"]["min_cluster_size"]:
134
+ return df.assign(inlier=inlier_mask), "๋ฐ์ดํ„ฐ ์—†์Œ", ["Others"], None
135
+
136
+ # --- ํŒจํ„ด ํŒ์ •: ํ™˜ํ˜• โ†’ ์„ ํ˜• โ†’ ๊ตฐ์ง‘(์„œ๋ธŒ๋ถ„๋ฅ˜) ---
137
+ if self._is_ring(inlier_df):
138
+ zone = self._dominant_zone(inlier_df)
139
+ centroid = tuple(np.mean(inlier_df[["coor_x", "coor_y"]].values, axis=0))
140
+ return df.assign(inlier=inlier_mask), zone, ["ํ™˜ํ˜•"], centroid
141
+
142
+ if self._is_linear_set(inlier_coords):
143
+ zone = self._dominant_zone(inlier_df)
144
+ centroid = self._zone_centroid(inlier_df, inlier_coords, zone)
145
+ return df.assign(inlier=inlier_mask), zone, ["์„ ํ˜•"], centroid
146
+
147
+ # ๊ตฐ์ง‘ ํ›„๋ณด: ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ ๊ฒ€์‚ฌ
148
+ zone = self._dominant_zone(inlier_df)
149
+ centroid = self._zone_centroid(inlier_df, inlier_coords, zone)
150
+ pattern = self._classify_cluster_or_sub_linear(inlier_coords)
151
+ return df.assign(inlier=inlier_mask), zone, [pattern], centroid
152
+
153
+ # ==================================================================
154
+ # 1์ฐจ ํด๋Ÿฌ์Šคํ„ฐ๋ง
155
+ # ==================================================================
156
+ def _cluster_hdbscan(self, coords: np.ndarray) -> np.ndarray:
157
+ """HDBSCAN์œผ๋กœ ํด๋Ÿฌ์Šคํ„ฐ ๋ผ๋ฒจ ์‚ฐ์ถœ. outlier๋Š” -1."""
158
+ c = self.cfg["clustering"]
159
+ clusterer = hdbscan.HDBSCAN(
160
+ min_cluster_size=c["min_cluster_size"],
161
+ min_samples=c["min_samples"],
162
+ cluster_selection_method=c["cluster_selection_method"],
163
+ metric="euclidean",
164
+ gen_min_span_tree=True,
165
+ )
166
+ return clusterer.fit_predict(coords)
167
+
168
+ def _cluster_dbscan_fallback(self, coords: np.ndarray) -> np.ndarray:
169
+ """HDBSCAN ์‹คํŒจ ์‹œ DBSCAN fallback."""
170
+ c = self.cfg["clustering"]
171
+ return DBSCAN(eps=c["dbscan_eps"], min_samples=c["min_cluster_size"]).fit(coords).labels_
172
+
173
+ # ==================================================================
174
+ # 2์ฐจ outlier ์ œ๊ฑฐ (LOF)
175
+ # ==================================================================
176
+ def _apply_lof(self, coords: np.ndarray, inlier_mask: np.ndarray) -> np.ndarray:
177
+ """LOF๋กœ 1์ฐจ inlier์—์„œ ์ถ”๊ฐ€ outlier ์ œ๊ฑฐ."""
178
+ lof_cfg = self.cfg["lof"]
179
+ inlier_coords = coords[inlier_mask]
180
+ n_inlier = len(inlier_coords)
181
+ if n_inlier < lof_cfg["lof_min_points"]:
182
+ return inlier_mask
183
+
184
+ n_neighbors = min(lof_cfg["lof_n_neighbors"], n_inlier - 1)
185
+ if n_neighbors < 2:
186
+ return inlier_mask
187
+
188
+ lof = LocalOutlierFactor(
189
+ n_neighbors=n_neighbors,
190
+ contamination=lof_cfg["lof_contamination"],
191
+ metric="euclidean",
192
+ )
193
+ lof_labels = lof.fit_predict(inlier_coords)
194
+ # inlier_mask์™€ ๋™์ผ ๊ธธ์ด์˜ mask๋กœ ํ™•์žฅ
195
+ full_mask = np.zeros(len(coords), dtype=bool)
196
+ full_mask[inlier_mask] = lof_labels == 1
197
+ return inlier_mask & full_mask
198
+
199
+ # ==================================================================
200
+ # ํ™˜ํ˜• ๊ฒ€์ถœ
201
+ # ==================================================================
202
+ def _is_ring(self, inlier_df: pd.DataFrame) -> bool:
203
+ """
204
+ ํ™˜ํ˜•(ring) ํŒ์ •.
205
+
206
+ ๋‹จ๊ณ„
207
+ ----
208
+ 1. ์ตœ์†Œ ํฌ์ธํŠธ ์ˆ˜
209
+ 2. PCA ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€: ์ „์ฒด inlier๊ฐ€ ๊ฐ•ํ•œ ์„ ํ˜•์„ฑ์„ ๋ณด์ด๋ฉด ring ์•„๋‹˜
210
+ (์›์  ํ†ต๊ณผ ์„ ํ˜• false-positive ๋ฐฉ์ง€)
211
+ 3. r-ํžˆ์Šคํ† ๊ทธ๋žจ top bin๋งŒ ์ถ”์ถœ (main ring band)
212
+ 4. band ๋‚ด ์  ์ˆ˜ / r ํญ / ๊ฐ๋„ ์ปค๋ฒ„๋ฆฌ์ง€ / sector ์ปค๋ฒ„๋ฆฌ์ง€
213
+ 5. ์› ํ”ผํŒ… RMSE / ์ค‘์‹ฌ์  ์›์  ๊ทผ์ ‘๋„
214
+ """
215
+ cfg = self.cfg
216
+ n_total = len(inlier_df)
217
+ if n_total < cfg["ring"]["ring_min_points"]:
218
+ return False
219
+
220
+ # ์„ ํ˜•์„ฑ ๊ฑฐ๋ถ€ (Ring pre-check)
221
+ coords = inlier_df[["coor_x", "coor_y"]].values
222
+ if len(coords) >= 3:
223
+ pca_all = PCA(n_components=2).fit(coords)
224
+ if len(pca_all.explained_variance_) >= 2:
225
+ eig_ratio = pca_all.explained_variance_[0] / (pca_all.explained_variance_[1] + 1e-9)
226
+ ring_pca_max = cfg["ring"].get("ring_pca_ratio_max",
227
+ cfg["linear"]["linear_pca_ratio_min"])
228
+ if np.sqrt(eig_ratio) >= ring_pca_max:
229
+ return False
230
+
231
+ # Main ring band (top r-bin)
232
+ main_ring_df = self._filter_main_ring_band(inlier_df,
233
+ r_bin_width=cfg["ring"]["ring_band_width"],
234
+ top_n_bins=1)
235
+ if len(main_ring_df) < cfg["ring"]["ring_min_points"]:
236
+ return False
237
+
238
+ r = main_ring_df["r"].values
239
+ theta_deg = main_ring_df["theta_deg"].values
240
+ x = main_ring_df["coor_x"].values
241
+ y = main_ring_df["coor_y"].values
242
+
243
+ if r.max() - r.min() > cfg["ring"]["ring_r_absolute_tolerance"]: return False
244
+ if self._circular_range_deg(theta_deg) < cfg["ring"]["ring_min_angular_coverage"]: return False
245
+ if not self._check_sector_coverage(theta_deg, min_sectors=cfg["ring"]["ring_min_sectors"]):
246
+ return False
247
+
248
+ cx, cy, _, rmse = self._fit_circle_least_squares(x, y)
249
+ if rmse == np.inf or rmse > cfg["ring"]["ring_fit_rmse_max"]: return False
250
+ # ์ค‘์‹ฌ์ด ์›์ ์—์„œ ๋„ˆ๋ฌด ๋ฉ€๋ฉด wafer ring์œผ๋กœ ๋ณด์ง€ ์•Š์Œ (10mm ํ•œ๊ณ„)
251
+ if np.sqrt(cx ** 2 + cy ** 2) > 10.0: return False
252
+ return True
253
+
254
+ @staticmethod
255
+ def _filter_main_ring_band(
256
+ df: pd.DataFrame, r_bin_width: float = 5.0, top_n_bins: int = 1
257
+ ) -> pd.DataFrame:
258
+ """r-์ถ• ํžˆ์Šคํ† ๊ทธ๋žจ์—์„œ ์ ์ด ๊ฐ€์žฅ ๋งŽ์€ bin(๋“ค)์— ์†ํ•˜๋Š” ์ ๋งŒ ์ถ”์ถœ."""
259
+ if len(df) == 0 or "r" not in df.columns:
260
+ return df.copy()
261
+ r = df["r"].values
262
+ r = r[(r >= 0) & (r <= 150)]
263
+ if len(r) == 0:
264
+ return pd.DataFrame(columns=df.columns)
265
+
266
+ r_bins = np.arange(0, 150 + r_bin_width, r_bin_width)
267
+ r_hist, r_edges = np.histogram(df["r"].values, bins=r_bins)
268
+ top_idx = np.argsort(r_hist)[::-1][:top_n_bins]
269
+
270
+ mask = np.zeros(len(df), dtype=bool)
271
+ for bi in top_idx:
272
+ r_min, r_max = r_edges[bi], r_edges[bi + 1]
273
+ mask |= ((df["r"] >= r_min) & (df["r"] < r_max)).values
274
+ return df[mask].copy()
275
+
276
+ @staticmethod
277
+ def _circular_range_deg(angles_deg: np.ndarray) -> float:
278
+ """์›ํ˜• ๊ฐ๋„ ๋ถ„ํฌ์˜ ์ปค๋ฒ„๋ฆฌ์ง€ (๋„, 360ยฐ ์ค‘)."""
279
+ if len(angles_deg) < 2:
280
+ return 0.0
281
+ a = np.sort(np.array(angles_deg) % 360.0)
282
+ gaps = np.diff(a)
283
+ circ_gap = 360.0 - a[-1] + a[0]
284
+ return 360.0 - max(np.max(gaps), circ_gap)
285
+
286
+ @staticmethod
287
+ def _check_sector_coverage(theta_deg: np.ndarray, min_sectors: int = 8) -> bool:
288
+ """30ยฐ ๊ฐ„๊ฒฉ 12 sector ์ค‘ min_sectors ์ด์ƒ ์ปค๋ฒ„ํ•˜๋Š”์ง€."""
289
+ if len(theta_deg) == 0:
290
+ return False
291
+ sectors = ((theta_deg % 360) // 30).astype(int) % 12
292
+ return len(np.unique(sectors)) >= min_sectors
293
+
294
+ @staticmethod
295
+ def _fit_circle_least_squares(
296
+ x: np.ndarray, y: np.ndarray
297
+ ) -> Tuple[Optional[float], Optional[float], Optional[float], float]:
298
+ """
299
+ ๋Œ€์ˆ˜์  ์ตœ์†Œ์ œ๊ณฑ ์› ํ”ผํŒ….
300
+
301
+ Returns
302
+ -------
303
+ (cx, cy, radius, rmse) โ€” ์‹คํŒจ ์‹œ (None, None, None, inf)
304
+ """
305
+ if len(x) < 3:
306
+ return None, None, None, np.inf
307
+ x = x[:, np.newaxis]
308
+ y = y[:, np.newaxis]
309
+ A = np.hstack([x, y, np.ones_like(x)])
310
+ b = x ** 2 + y ** 2
311
+ try:
312
+ sol, *_ = np.linalg.lstsq(A, b, rcond=None)
313
+ a, bb, c = sol.flatten()
314
+ cx, cy = a / 2, bb / 2
315
+ radius = np.sqrt((a ** 2 + bb ** 2) / 4 + c)
316
+ fitted = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
317
+ rmse = np.sqrt(np.mean((fitted - radius) ** 2))
318
+ return cx, cy, radius, rmse
319
+ except Exception:
320
+ return None, None, None, np.inf
321
+
322
+ # ==================================================================
323
+ # ์„ ํ˜• ๊ฒ€์ถœ
324
+ # ==================================================================
325
+ def _is_linear_set(self, coords: np.ndarray) -> bool:
326
+ """์ „์ฒด inlier ์ง‘ํ•ฉ์ด ์ง์„ ์— ์ถฉ๋ถ„ํžˆ ๊ฐ€๊นŒ์šด์ง€."""
327
+ cfg = self.cfg["linear"]
328
+ n = len(coords)
329
+ if n < 3:
330
+ return False
331
+
332
+ centroid = np.mean(coords, axis=0)
333
+ max_dist = np.max(np.linalg.norm(coords - centroid, axis=1))
334
+ # ๊ธธ์ด ์กฐ๊ฑด (๋ฐ˜์ง€๋ฆ„์˜ 2๋ฐฐ = ์ตœ๋Œ€ ๊ธธ์ด)
335
+ if 2 * max_dist < cfg["linear_min_length"]:
336
+ return False
337
+
338
+ pca = PCA(n_components=min(2, n)).fit(coords)
339
+ if len(pca.explained_variance_) < 2:
340
+ return False
341
+ eig_ratio = pca.explained_variance_[0] / (pca.explained_variance_[1] + 1e-9)
342
+ if np.sqrt(eig_ratio) < cfg["linear_pca_ratio_min"]:
343
+ return False
344
+
345
+ # ์ฃผ์ถ• ์ง๊ฐ๋ฐฉํ–ฅ ํ‰๊ท  ํŽธ์ฐจ
346
+ normal = np.array([-pca.components_[0][1], pca.components_[0][0]])
347
+ if np.mean(np.abs(np.dot(coords - pca.mean_, normal))) > cfg["linear_max_deviation"]:
348
+ return False
349
+
350
+ # ์ฃผ์ถ• ํˆฌ์˜ ํ›„ gap ratio (์„ ์ด ๋Š๊ฒจ์žˆ์ง€ ์•Š์€์ง€)
351
+ proj = np.sort(np.dot(coords - pca.mean_, pca.components_[0]))
352
+ total_len = proj[-1] - proj[0]
353
+ if total_len > 0 and np.max(np.diff(proj)) / total_len > cfg["linear_max_gap_ratio"]:
354
+ return False
355
+ return True
356
+
357
+ def _is_centroids_linear(self, sub_coords_list: list) -> bool:
358
+ """์—ฌ๋Ÿฌ ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ์˜ ์ค‘์‹ฌ์ ๋“ค์ด ์ผ์ง์„  ์œ„์— ์žˆ๋Š”์ง€."""
359
+ cfg = self.cfg["linear"]
360
+ if len(sub_coords_list) < 3:
361
+ return False
362
+ centroids = np.array([np.mean(sc, axis=0) for sc in sub_coords_list])
363
+ max_span = 2 * np.max(np.linalg.norm(centroids - np.mean(centroids, axis=0), axis=1))
364
+ if max_span < cfg["centroid_linear_min_length"]:
365
+ return False
366
+ pca = PCA(n_components=2).fit(centroids)
367
+ if len(pca.explained_variance_) < 2:
368
+ return False
369
+ if np.sqrt(pca.explained_variance_[0] /
370
+ (pca.explained_variance_[1] + 1e-9)) < cfg["centroid_linear_pca_min"]:
371
+ return False
372
+ normal = np.array([-pca.components_[0][1], pca.components_[0][0]])
373
+ if np.mean(np.abs(np.dot(centroids - pca.mean_, normal))) > cfg["centroid_linear_dev_max"]:
374
+ return False
375
+ return True
376
+
377
+ # ==================================================================
378
+ # ๊ตฐ์ง‘ / ์„œ๋ธŒ ๋ถ„๋ฅ˜
379
+ # ==================================================================
380
+ def _classify_cluster_or_sub_linear(self, inlier_coords: np.ndarray) -> str:
381
+ """
382
+ ring/linear ๋‘˜ ๋‹ค ์•„๋‹ ๋•Œ ํ˜ธ์ถœ: ์„œ๋ธŒ DBSCAN์œผ๋กœ ๋ถ„ํ•  ํ›„ ํŒจํ„ด ์žฌํŒ์ •.
383
+
384
+ - ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ โ‰ฅ2๊ฐœ์ด๊ณ  ์ค‘์‹ฌ์ ๋“ค์ด ์ผ์ง์„  โ†’ ์„ ํ˜•
385
+ - ๊ทธ ์™ธ: ๊ฐ ์„œ๋ธŒ๋ฅผ ๊ตฐ์ง‘/์„ ํ˜•์œผ๋กœ ๋ผ๋ฒจ๋ง ํ›„ ๋ˆ„์  ๋‹ค์ˆ˜๊ฒฐ
386
+ """
387
+ cfg = self.cfg
388
+ if len(inlier_coords) < 2:
389
+ return "๊ตฐ์ง‘"
390
+
391
+ sub = DBSCAN(eps=cfg["clustering"]["cluster_dbscan_eps"],
392
+ min_samples=cfg["clustering"]["min_cluster_size"]).fit(inlier_coords)
393
+ sub_labels = sub.labels_
394
+ n_sub = len(set(sub_labels)) - (1 if -1 in sub_labels else 0)
395
+
396
+ if n_sub >= 2:
397
+ sub_list = [inlier_coords[sub_labels == lbl]
398
+ for lbl in set(sub_labels) if lbl != -1]
399
+ if self._is_centroids_linear(sub_list):
400
+ return "์„ ํ˜•"
401
+ results = [(self._classify_subcluster(sc), len(sc)) for sc in sub_list]
402
+ totals = {}
403
+ for pat, cnt in results:
404
+ totals[pat] = totals.get(pat, 0) + cnt
405
+ return max(totals, key=totals.get)
406
+ return self._classify_subcluster(inlier_coords)
407
+
408
+ def _classify_subcluster(self, sub_coords: np.ndarray) -> str:
409
+ """๋‹จ์ผ ์„œ๋ธŒํด๋Ÿฌ์Šคํ„ฐ๋ฅผ '๊ตฐ์ง‘' ๋˜๋Š” '์„ ํ˜•'์œผ๋กœ ๋ผ๋ฒจ๋ง."""
410
+ cfg = self.cfg
411
+ n = len(sub_coords)
412
+ if n < 3:
413
+ return "๊ตฐ์ง‘"
414
+ centroid = np.mean(sub_coords, axis=0)
415
+ max_dist = np.max(np.linalg.norm(sub_coords - centroid, axis=1))
416
+
417
+ # compactํ•œ ๊ตฐ์ง‘
418
+ if max_dist <= cfg["cluster"]["cluster_compactness_radius"]:
419
+ return "๊ตฐ์ง‘"
420
+
421
+ pca = PCA(n_components=min(2, n)).fit(sub_coords)
422
+ if len(pca.explained_variance_) >= 2:
423
+ eig_ratio = pca.explained_variance_[0] / (pca.explained_variance_[1] + 1e-9)
424
+ shape_idx = np.sqrt(eig_ratio)
425
+ if shape_idx >= cfg["linear"]["linear_pca_ratio_min"]:
426
+ normal = np.array([-pca.components_[0][1], pca.components_[0][0]])
427
+ mean_dev = np.mean(np.abs(np.dot(sub_coords - pca.mean_, normal)))
428
+ if (mean_dev <= cfg["linear"]["linear_max_deviation"]
429
+ and 2 * max_dist >= cfg["linear"]["linear_min_length"]):
430
+ return "์„ ํ˜•"
431
+ return "๊ตฐ์ง‘"
432
+
433
+ # ==================================================================
434
+ # Zone / Centroid ์œ ํ‹ธ
435
+ # ==================================================================
436
+ @staticmethod
437
+ def _dominant_zone(df: pd.DataFrame) -> str:
438
+ """๊ฐ€์žฅ ๋นˆ๋ฒˆํ•œ zone_label."""
439
+ if len(df) == 0 or "zone_label" not in df.columns:
440
+ return "N/A"
441
+ counter = Counter(df["zone_label"])
442
+ return counter.most_common(1)[0][0]
443
+
444
+ @staticmethod
445
+ def _zone_centroid(
446
+ inlier_df: pd.DataFrame, inlier_coords: np.ndarray, zone: str
447
+ ) -> tuple:
448
+ """dominant zone์— ์†ํ•œ ์ ๋“ค์˜ ํ‰๊ท . ์—†์œผ๋ฉด inlier ์ „์ฒด ํ‰๊ท ."""
449
+ dom = inlier_df[inlier_df["zone_label"] == zone] if "zone_label" in inlier_df.columns else inlier_df
450
+ if not dom.empty:
451
+ return tuple(np.mean(dom[["coor_x", "coor_y"]].values, axis=0))
452
+ return tuple(np.mean(inlier_coords, axis=0))
453
+
454
+
455
+ # ======================================================================
456
+ # Backward-compat: ๊ธฐ์กด ํ•จ์ˆ˜ API ์œ ์ง€
457
+ # ======================================================================
458
+ def classify_wafer_patterns(df: pd.DataFrame, cfg: dict):
459
+ """`PatternDetector(cfg).classify(df)`์˜ ํ•จ์ˆ˜ํ˜• alias."""
460
+ return PatternDetector(cfg).classify(df)
nu/test_synthetic_7days.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 7์ผ์น˜ ํ•ฉ์„ฑ LLS ๋ฐ์ดํ„ฐ + Contact ๋งคํ•‘ ๊ฒ€์ฆ.
3
+
4
+ ๊ฐ ์ผ์ž๋Š” contact_angle.csv์˜ ํŠน์ • ํ–‰๊ณผ ๋งค์นญ๋˜๋„๋ก ์˜๋„์ ์œผ๋กœ ๋ฐฐ์น˜:
5
+ Day 1 : ํ™˜ํ˜• (๊ฐ๋„ ๋ฌด๊ด€, Line ์ œ์™ธ ์ „์ฒด ํ›„๋ณด)
6
+ Day 2 : ์„ ํ˜• vertical x=38 โ†’ Line 75ยฐ/105ยฐ ๋“ฑ (cos*150โ‰ˆยฑ38.8)
7
+ Day 3 : ๊ตฐ์ง‘ near (130, 0) โ†’ ๋น„-Line, ๊ฐ๋„ 0ยฐ ๋ถ€๊ทผ (EPI FI FOUP)
8
+ Day 4 : ๊ตฐ์ง‘ near (0, 140) โ†’ ๋น„-Line, ๊ฐ๋„ 90ยฐ (EBIS Edge/Back Stage)
9
+ Day 5 : ๊ตฐ์ง‘ near (-130, 0) โ†’ ๋น„-Line, 180ยฐ (EPI FI FOUP, EBIS Edge Stage)
10
+ Day 6 : ์„ ํ˜• vertical x=-141 โ†’ Line 158ยฐ/197ยฐ (AVIS Flipper)
11
+ Day 7 : ๊ตฐ์ง‘ near (50, 50) โ†’ ๋งค์นญ ์—†์Œ (negative test)
12
+ """
13
+ import os
14
+ import sys
15
+ import shutil
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
20
+
21
+ SCR_DIR = os.path.dirname(os.path.abspath(__file__))
22
+ os.chdir(SCR_DIR)
23
+
24
+ TEST_ROOT = os.path.join(SCR_DIR, "_synthetic_test")
25
+ DAILY_DIR = os.path.join(TEST_ROOT, "daily")
26
+ OUTPUT_DIR = os.path.join(TEST_ROOT, "result")
27
+ if os.path.exists(TEST_ROOT):
28
+ shutil.rmtree(TEST_ROOT)
29
+ os.makedirs(DAILY_DIR, exist_ok=True)
30
+
31
+
32
+ def base_record(car_id, waf_id, dttm, eqp_8030="EQP_A"):
33
+ return {
34
+ "SUBLOT_ID": f"S{car_id}",
35
+ "CAR_ID": car_id,
36
+ "SCAN_TIME": dttm,
37
+ "EQP_ID_8210": "L01",
38
+ "EQP_ID_8030": eqp_8030,
39
+ "ROUTE_ID": "WF7EP01",
40
+ "PROD_ID": "P001",
41
+ "HIS_REGIST_DTTM_8030": dttm,
42
+ "HIS_REGIST_DTTM_Pcounter": dttm,
43
+ "WAF_ID": waf_id,
44
+ "DEFECT_CNT": 100,
45
+ "OPE_ID": "8210",
46
+ "test_no": 1,
47
+ "roughbin_no": 0,
48
+ "d_size": 0.05,
49
+ "LLS ๊ตฌ๋ถ„": "LPD",
50
+ "Particle ๋ถ„ํฌ": "๋ฉด๋‚ด",
51
+ }
52
+
53
+
54
+ def make_ring(n=400, radius=85, noise=3.0):
55
+ th = np.random.uniform(0, 2 * np.pi, n)
56
+ r = np.random.normal(radius, noise, n)
57
+ return r * np.cos(th), r * np.sin(th)
58
+
59
+
60
+ def make_vertical_line(n=300, x_center=38, y_range=(-60, 60), noise=3.0):
61
+ """Y์ถ• ํ‰ํ–‰ ์„ ํ˜•: ํŠน์ • x ์ขŒํ‘œ์— vertical line."""
62
+ y = np.random.uniform(*y_range, n)
63
+ x = np.full(n, x_center) + np.random.normal(0, noise, n)
64
+ return x, y
65
+
66
+
67
+ def make_cluster(n=300, center=(130, 0), spread=10):
68
+ x = np.random.normal(center[0], spread, n)
69
+ y = np.random.normal(center[1], spread, n)
70
+ return x, y
71
+
72
+
73
+ def build_day(day_idx, date_str, pattern_specs, wafers_per_cst=5, n_csts=2):
74
+ rows = []
75
+ for cst_i in range(n_csts):
76
+ car_id = f"C{day_idx:02d}{cst_i}"
77
+ dttm = f"2026010{day_idx}120000"
78
+ pattern_x, pattern_y = pattern_specs[cst_i % len(pattern_specs)]
79
+ for w in range(wafers_per_cst):
80
+ waf_id = f"{car_id}W{w:02d}"
81
+ jx = pattern_x + np.random.normal(0, 1.5, len(pattern_x))
82
+ jy = pattern_y + np.random.normal(0, 1.5, len(pattern_y))
83
+ for x, y in zip(jx, jy):
84
+ rec = base_record(car_id, waf_id, dttm)
85
+ rec["coor_x"] = float(x)
86
+ rec["coor_y"] = float(y)
87
+ rec["ANGLE"] = (np.arctan2(y, x) / np.pi * 180 + 360) % 360
88
+ rec["DISTANCE"] = float(np.sqrt(x ** 2 + y ** 2))
89
+ rows.append(rec)
90
+ return pd.DataFrame(rows)
91
+
92
+
93
+ # ----------------------------------------------------------------------
94
+ # ์ผ์ž๋ณ„ ํŒจํ„ด ์ •์˜ (contact ๋งค์นญ ์˜๋„)
95
+ # ----------------------------------------------------------------------
96
+ np.random.seed(42)
97
+
98
+ day_configs = {
99
+ "20260101": [make_ring(n=400, radius=85), make_ring(n=400, radius=85)],
100
+ "20260102": [make_vertical_line(n=300, x_center=38), make_vertical_line(n=300, x_center=38)],
101
+ "20260103": [make_cluster(n=300, center=(135, 0), spread=8), make_cluster(n=300, center=(135, 0), spread=8)],
102
+ "20260104": [make_cluster(n=300, center=(0, 140), spread=8), make_cluster(n=300, center=(0, 140), spread=8)],
103
+ "20260105": [make_cluster(n=300, center=(-135, 0), spread=8), make_cluster(n=300, center=(-135, 0), spread=8)],
104
+ "20260106": [make_vertical_line(n=300, x_center=-141), make_vertical_line(n=300, x_center=-141)],
105
+ "20260107": [make_cluster(n=300, center=(50, 50), spread=10), make_cluster(n=300, center=(50, 50), spread=10)],
106
+ }
107
+
108
+ for i, (date, specs) in enumerate(day_configs.items(), start=1):
109
+ df = build_day(i, date, specs, wafers_per_cst=5, n_csts=2)
110
+ out_path = os.path.join(DAILY_DIR, f"{date}.parquet")
111
+ df.to_parquet(out_path, index=False)
112
+ print(f" ์ƒ์„ฑ {date}: {len(df):>5}๊ฑด, CST {df['CAR_ID'].nunique()}, WAF {df['WAF_ID'].nunique()}")
113
+
114
+ print()
115
+ print("=" * 70)
116
+ print("LLSPatternAnalyzer ์‹คํ–‰")
117
+ print("=" * 70)
118
+
119
+ from pattern_analyzer import LLSPatternAnalyzer
120
+
121
+ analyzer = LLSPatternAnalyzer(
122
+ config_path="./lls_config.json",
123
+ daily_input_dir=DAILY_DIR,
124
+ output_dir=OUTPUT_DIR,
125
+ contact_csv="./contact_angle.csv",
126
+ setup_font=True,
127
+ )
128
+
129
+ print("\n--- Mode 1: daily aggregated ---")
130
+ df_daily = analyzer.run(mode="daily")
131
+
132
+ print("\n--- Mode 2: by_cst ---")
133
+ df_by_cst = analyzer.run(mode="by_cst")
134
+
135
+ # ----------------------------------------------------------------------
136
+ # ๊ฒฐ๊ณผ ์š”์•ฝ
137
+ # ----------------------------------------------------------------------
138
+ print("\n" + "=" * 70)
139
+ print("Mode 1 (daily) ๊ฒฐ๊ณผ + Contact ๋งคํ•‘")
140
+ print("=" * 70)
141
+ if not df_daily.empty:
142
+ cols = ["analysis_date", "overall_pattern", "main_centroid_x", "main_centroid_y",
143
+ "contact_candidate_count", "contact_candidates"]
144
+ with pd.option_context("display.max_colwidth", 200, "display.width", 200):
145
+ print(df_daily[cols].to_string(index=False))
146
+
147
+ print("\n" + "=" * 70)
148
+ print("Mode 2 (by_cst) ๊ฒฐ๊ณผ + Contact ๋งคํ•‘")
149
+ print("=" * 70)
150
+ if not df_by_cst.empty:
151
+ cols = ["analysis_date", "CST_ID", "overall_pattern", "main_centroid_x", "main_centroid_y",
152
+ "contact_candidate_count", "contact_candidates"]
153
+ with pd.option_context("display.max_colwidth", 200, "display.width", 200):
154
+ print(df_by_cst[cols].to_string(index=False))
nu/utils.py ADDED
@@ -0,0 +1,545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ """
3
+ ์›จ์ดํผ ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ ๊ณต์šฉ ์œ ํ‹ธ๋ฆฌํ‹ฐ.
4
+
5
+ ์ด ๋ชจ๋“ˆ์€ LLS(Laser Light Scattering) ๊ฒฐํ•จ ๋ถ„์„ ํŒŒ์ดํ”„๋ผ์ธ์—์„œ
6
+ ํŒจํ„ด ๋ถ„๋ฅ˜ ์ด์ „ยท์ดํ›„ ๋‹จ๊ณ„์— ๊ณตํ†ต์œผ๋กœ ์‚ฌ์šฉ๋˜๋Š” ํ•จ์ˆ˜๋“ค์„ ์ œ๊ณตํ•œ๋‹ค.
7
+
8
+ ํฌ๊ฒŒ 6๊ฐ€์ง€ ๋ฒ”์ฃผ๋ฅผ ํฌํ•จ:
9
+ 1. ํ™˜๊ฒฝ ์„ค์ • : ํ•œ๊ธ€ ํฐํŠธ, JSON config ๋กœ๋“œ
10
+ 2. ๊ฒฐํ•จ ๋ผ๋ฒจ ๋งคํ•‘ : roughbin_no โ†’ ํ•œ๊ธ€ ๊ฒฐํ•จ ๋ถ„๋ฅ˜๋ช…
11
+ 3. Zone ๋ผ๋ฒจ๋ง : ์‹œ๊ณ„๋ฐฉํ–ฅ 12๊ตฌ์—ญ ร— Inner/Outer ๋ถ„๋ฅ˜
12
+ 4. Fine-grid ์ฒ˜๋ฆฌ : ๊ฒฐํ•จ ์ขŒํ‘œ๋ฅผ ๊ฒฉ์ž cell์— ํ• ๋‹น
13
+ 5. ํ•„ํ„ฐ๋ง : cell ๋‹จ์œ„ wafer ์ˆ˜ ๊ธฐ์ค€ ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ
14
+ 6. ์‹œ๊ฐํ™” : ์›จ์ดํผ ๋งต (์‚ฐ์ ๋„ + zone + centroid ๋งˆํ‚น)
15
+
16
+ ํด๋ž˜์Šค `WaferUtils`๋กœ ๋ชจ๋“  ์œ ํ‹ธ๋ฆฌํ‹ฐ๋ฅผ ๋ฌถ์–ด IDE ์ž๋™์™„์„ฑ/ํƒ€์ž…ํžŒํŠธ ์ผ๊ด€์„ฑ์„ ๋†’์ด๊ณ ,
17
+ ํ•˜์œ„ ํ˜ธํ™˜์„ ์œ„ํ•ด ๋™์ผ ์ด๋ฆ„์˜ ๋ชจ๋“ˆ ๋ ˆ๋ฒจ ํ•จ์ˆ˜๋„ ํ•จ๊ป˜ ๋…ธ์ถœํ•œ๋‹ค.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import json
23
+ from typing import Optional, Tuple
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+ import matplotlib.pyplot as plt
28
+ import matplotlib.font_manager as fm
29
+ from matplotlib.patches import Circle, Wedge
30
+
31
+
32
+ # ----------------------------------------------------------------------
33
+ # ๋ชจ๋“ˆ ์ƒ์ˆ˜
34
+ # ----------------------------------------------------------------------
35
+
36
+ # roughbin_no(์ •์ˆ˜) โ†’ ๊ฒฐํ•จ ๋ถ„๋ฅ˜๋ช…(ํ•œ๊ธ€/์˜๋ฌธ) ๋งคํ•‘.
37
+ # ๊ฒ€์‚ฌ๊ธฐ raw ์ฝ”๋“œ๋ฅผ ์šด์˜์—์„œ ํ†ต์šฉ๋˜๋Š” ๋ถ„๋ฅ˜๋ช…์œผ๋กœ ๋ณ€ํ™˜ํ•  ๋•Œ ์‚ฌ์šฉ.
38
+ ROUGHBIN_MAPPING = {
39
+ 0: 'LPD', 100: 'LPD-N', 110: 'Micro-Scratch', 111: 'Void', 115: 'PID',
40
+ 120: 'LPD-E', 130: 'LPD-S', 140: 'LLPD', 141: 'Air Pocket', 150: 'DIC-Unique',
41
+ 160: 'Stain', 170: 'COP', 200: 'Cluster Area', 205: 'Extended Defects',
42
+ 210: 'Scratch', 220: 'Slipline', 230: 'Line', 231: 'Area', 233: 'Radial',
43
+ 234: 'Ring', 512: 'Residue', 520: 'Boat Mark', 902: 'Streak', 999: 'Nuisance',
44
+ 990: 'LPD Nuisance', 991: 'PPD Nuisance', 501: 'Haze Slipline', 502: 'Hazeline',
45
+ 600: 'Grid', 700: 'ROI', 800: 'X Section',
46
+ }
47
+
48
+ # ์‹œ๊ณ„๋ฐฉํ–ฅ 12๊ตฌ์—ญ ๋ผ๋ฒจ. 12์‹œ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ด์„œ ์‹œ๊ณ„๋ฐฉํ–ฅ(1, 2, ... 11์‹œ)์œผ๋กœ ์ง„ํ–‰.
49
+ CLOCK_LABELS = ["12", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11"]
50
+
51
+ # ํŒจํ„ด๋ณ„ ์‹œ๊ฐํ™” ์ƒ‰์ƒ.
52
+ PATTERN_COLORS = {
53
+ "ํ™˜ํ˜•": "darkorange",
54
+ "์„ ํ˜•": "forestgreen",
55
+ "๊ตฐ์ง‘": "mediumpurple",
56
+ "์ •์ƒ/๋ฏธ๋‹ฌ": "gray",
57
+ "Others": "gray",
58
+ }
59
+
60
+
61
+ # ======================================================================
62
+ # WaferUtils
63
+ # ======================================================================
64
+ class WaferUtils:
65
+ """
66
+ ์›จ์ดํผ ๊ฒฐํ•จ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ ์œ ํ‹ธ๋ฆฌํ‹ฐ ํด๋ž˜์Šค (facade ํ˜•ํƒœ).
67
+
68
+ ์ƒํƒœ๊ฐ€ ํ•„์š” ์—†๋Š” ์ˆœ์ˆ˜ ํ•จ์ˆ˜๋“ค์ด๋ฏ€๋กœ ๋Œ€๋ถ€๋ถ„ `@staticmethod`๋กœ ๊ตฌ์„ฑ๋˜๋ฉฐ,
69
+ ์ธ์Šคํ„ด์Šคํ™” ์—†์ด `WaferUtils.method(...)` ํ˜•ํƒœ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋‹ค.
70
+
71
+ Examples
72
+ --------
73
+ >>> df = WaferUtils.assign_fine_grid(df, cell_size_mm=3.0)
74
+ >>> df = WaferUtils.add_zone_labels(df, inner_radius=105.0)
75
+ >>> WaferUtils.plot_wafer_map(result_df, key="...", pattern_list=["ํ™˜ํ˜•"], ...)
76
+ """
77
+
78
+ # ------------------------------------------------------------------
79
+ # 1. ํ™˜๊ฒฝ ์„ค์ •
80
+ # ------------------------------------------------------------------
81
+ @staticmethod
82
+ def setup_korean_font() -> Optional[str]:
83
+ """
84
+ ์‹œ์Šคํ…œ ํ•œ๊ธ€ ํฐํŠธ๋ฅผ matplotlib ๊ธฐ๋ณธ ํฐํŠธ๋กœ ๋“ฑ๋ก.
85
+
86
+ ์„ ํ˜ธ ์ˆœ์„œ: Malgun Gothic > Nanum Gothic > NanumBarunGothic > Batang > Gulim > AppleGothic.
87
+ ์„ ํ˜ธ ํ›„๋ณด๊ฐ€ ์—†์œผ๋ฉด ์‹œ์Šคํ…œ์—์„œ 'gothic/mincho/dotum/gulim/malgun/sans/korean'
88
+ ํ‚ค์›Œ๋“œ๊ฐ€ ํฌํ•จ๋œ ์ฒซ ๋ฒˆ์งธ ํ•œ๊ธ€ ํ›„๋ณด๋ฅผ ์‚ฌ์šฉ.
89
+
90
+ Returns
91
+ -------
92
+ Optional[str]
93
+ ์ ์šฉ๋œ ํฐํŠธ๋ช…. ์‹œ์Šคํ…œ์— ํ•œ๊ธ€ ํฐํŠธ๊ฐ€ ์ „ํ˜€ ์—†์œผ๋ฉด None.
94
+ """
95
+ korean_fonts = [
96
+ f.name for f in fm.fontManager.ttflist
97
+ if any(k in f.name.lower()
98
+ for k in ["gothic", "mincho", "dotum", "gulim", "malgun", "sans", "korean"])
99
+ ]
100
+ preferred = ["Malgun Gothic", "Nanum Gothic", "NanumBarunGothic",
101
+ "Batang", "Gulim", "AppleGothic"]
102
+ selected = next((f for f in preferred if f in korean_fonts), None)
103
+ if selected is None and korean_fonts:
104
+ selected = korean_fonts[0]
105
+
106
+ if selected:
107
+ plt.rcParams["font.family"] = selected
108
+ plt.rcParams["font.size"] = 10
109
+ # ํ•œ๊ธ€ ํฐํŠธ์—์„œ ์Œ์ˆ˜ ๋ถ€ํ˜ธ(โˆ’)๊ฐ€ ๊นจ์ง€๋Š” ํ˜„์ƒ ๋ฐฉ์ง€
110
+ plt.rcParams["axes.unicode_minus"] = False
111
+ print(f"โœ… ํ•œ๊ธ€ ํฐํŠธ ์„ค์ • ์™„๋ฃŒ: {selected}")
112
+ else:
113
+ print("โš ๏ธ ๊ฒฝ๊ณ : ์‹œ์Šคํ…œ์— ํ•œ๊ธ€ ํฐํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๊ธฐ๋ณธ ํฐํŠธ ์‚ฌ์šฉ (๊ธ€์ž ๊นจ์ง ๋ฐœ์ƒ)")
114
+ return selected
115
+
116
+ @staticmethod
117
+ def load_config(config_path: str = "./lls_config.json") -> dict:
118
+ """
119
+ JSON ํ˜•์‹์˜ ๋ถ„์„ ์„ค์ • ํŒŒ์ผ ๋กœ๏ฟฝ๏ฟฝ.
120
+
121
+ Parameters
122
+ ----------
123
+ config_path : str
124
+ ์„ค์ • ํŒŒ์ผ ๊ฒฝ๋กœ (UTF-8 ์ธ์ฝ”๋”ฉ ๊ฐ€์ •).
125
+
126
+ Returns
127
+ -------
128
+ dict
129
+ ์„ค์ • ํŠธ๋ฆฌ (preprocessing / clustering / ring / linear / lof / cluster /
130
+ misc / contact_mapping ๋“ฑ์˜ ํ‚ค ํฌํ•จ).
131
+
132
+ Raises
133
+ ------
134
+ FileNotFoundError
135
+ ์ง€์ •ํ•œ ๊ฒฝ๋กœ์— ํŒŒ์ผ์ด ์—†๋Š” ๊ฒฝ์šฐ.
136
+ """
137
+ if not os.path.exists(config_path):
138
+ raise FileNotFoundError(f"์„ค์ • ํŒŒ์ผ ์—†์Œ: {config_path}")
139
+ with open(config_path, "r", encoding="utf-8") as f:
140
+ return json.load(f)
141
+
142
+ # ------------------------------------------------------------------
143
+ # 2. ๊ฒฐํ•จ ๋ผ๋ฒจ ๋งคํ•‘
144
+ # ------------------------------------------------------------------
145
+ @staticmethod
146
+ def map_roughbin_no(roughbin) -> Optional[str]:
147
+ """
148
+ roughbin_no(๊ฒ€์‚ฌ๊ธฐ raw ์ฝ”๋“œ)๋ฅผ ์šด์˜ ๊ฒฐํ•จ ๋ถ„๋ฅ˜๋ช…์œผ๋กœ ๋งคํ•‘.
149
+
150
+ ๊ณ ์ • ๋งคํ•‘ ํ…Œ์ด๋ธ”(`ROUGHBIN_MAPPING`)์„ ์šฐ์„  ์กฐํšŒํ•˜๊ณ ,
151
+ ๋ฒ”์œ„ํ˜• ์ฝ”๋“œ(541~548 ๋“ฑ)๋Š” ๋ณ„๋„ if-์กฐ๊ฑด์œผ๋กœ ์ฒ˜๋ฆฌํ•œ๋‹ค.
152
+
153
+ Parameters
154
+ ----------
155
+ roughbin : Any
156
+ ์ •์ˆ˜ ๋˜๋Š” ์ •์ˆ˜ ๋ณ€ํ™˜ ๊ฐ€๋Šฅํ•œ ๊ฐ’. NaN/None/๋ฌธ์ž์—ด ๋“ฑ์€ None ๋ฐ˜ํ™˜.
157
+
158
+ Returns
159
+ -------
160
+ Optional[str]
161
+ ๋ถ„๋ฅ˜๋ช…("LPD", "Haze Slipline" ๋“ฑ). ๋งคํ•‘ ์‹คํŒจ ์‹œ "Unknown",
162
+ ์ž…๋ ฅ์ด NaN/๋ณ€ํ™˜ ๋ถˆ๊ฐ€ ์‹œ None.
163
+ """
164
+ if pd.isna(roughbin):
165
+ return None
166
+ try:
167
+ roughbin = int(roughbin)
168
+ except (TypeError, ValueError):
169
+ return None
170
+
171
+ if roughbin in ROUGHBIN_MAPPING:
172
+ return ROUGHBIN_MAPPING[roughbin]
173
+ # ๋ฒ”์œ„ํ˜• ์ฝ”๋“œ ์ฒ˜๋ฆฌ
174
+ if 541 <= roughbin <= 548: return "Haze Slipline"
175
+ if 531 <= roughbin <= 538: return "Hazeline"
176
+ if 601 <= roughbin <= 609: return "Grid"
177
+ if 701 <= roughbin <= 709: return "ROI"
178
+ if 801 <= roughbin <= 809: return "X Section"
179
+ return "Unknown"
180
+
181
+ # ------------------------------------------------------------------
182
+ # 3. Zone ๋ผ๋ฒจ๋ง
183
+ # ------------------------------------------------------------------
184
+ @staticmethod
185
+ def add_zone_labels(df: pd.DataFrame, inner_radius: float = 105.0) -> pd.DataFrame:
186
+ """
187
+ ๊ฒฐํ•จ ์ขŒํ‘œ์— zone ๋ผ๋ฒจ์„ ๋ถ€์—ฌ.
188
+
189
+ Zone ๋ผ๋ฒจ ํ˜•์‹: `{Inner|Outer}_{์‹œ๊ณ„์œ„์น˜ 2์ž๋ฆฌ}`
190
+ ์˜ˆ) "Inner_03" = ๋ฐ˜์ง€๋ฆ„ โ‰ค inner_radius, 3์‹œ ๋ฐฉํ–ฅ
191
+ "Outer_12" = ๋ฐ˜์ง€๋ฆ„ > inner_radius, 12์‹œ ๋ฐฉํ–ฅ
192
+
193
+ ๊ฐ๋„ ๋ณ€ํ™˜:
194
+ - ์ˆ˜ํ•™ ๊ฐ๋„(atan2) โ†’ 12์‹œ ๊ธฐ์ค€ ์‹œ๊ณ„๋ฐฉํ–ฅ ๊ฐ๋„(`theta_from_12 = (90ยฐ - math) mod 360`)
195
+ - sector index = floor(theta_from_12 / 30ยฐ) % 12
196
+
197
+ Parameters
198
+ ----------
199
+ df : pd.DataFrame
200
+ 'coor_x', 'coor_y' ์ปฌ๋Ÿผ์„ ํฌํ•จํ•œ ๊ฒฐํ•จ ์ขŒํ‘œ DF.
201
+ inner_radius : float
202
+ Inner / Outer ๊ฒฝ๊ณ„๊ฐ€ ๋˜๋Š” ๋ฐ˜์ง€๋ฆ„ (mm).
203
+
204
+ Returns
205
+ -------
206
+ pd.DataFrame
207
+ 'zone_label', 'r' (์›์  ๊ฑฐ๋ฆฌ), 'theta_deg' (์ˆ˜ํ•™ ๊ฐ๋„) ์ปฌ๋Ÿผ ์ถ”๊ฐ€๋œ ์‚ฌ๋ณธ.
208
+ """
209
+ df = df.copy()
210
+ r = np.hypot(df["coor_x"], df["coor_y"])
211
+ theta_deg = np.degrees(np.arctan2(df["coor_y"], df["coor_x"]))
212
+ # ์‹œ๊ณ„ ๋ฐฉํ–ฅ ํ™˜์‚ฐ: 12์‹œ = 0ยฐ, ์‹œ๊ณ„๋ฐฉํ–ฅ์œผ๋กœ ์ฆ๊ฐ€
213
+ theta_from_12 = (90.0 - theta_deg) % 360.0
214
+ sector_index = (theta_from_12 // 30).astype(int) % 12
215
+
216
+ clock_str = pd.Series([CLOCK_LABELS[i] for i in sector_index], index=df.index)
217
+ zone_type = np.where(r <= inner_radius, "Inner", "Outer")
218
+
219
+ df["zone_label"] = [f"{zt}_{c}" for zt, c in zip(zone_type, clock_str)]
220
+ df["r"] = r
221
+ df["theta_deg"] = theta_deg
222
+ return df
223
+
224
+ # ------------------------------------------------------------------
225
+ # 4. Fine-grid ์ฒ˜๋ฆฌ
226
+ # ------------------------------------------------------------------
227
+ @staticmethod
228
+ def assign_fine_grid(df: pd.DataFrame, cell_size_mm: float = 3.0) -> pd.DataFrame:
229
+ """
230
+ ๊ฒฐํ•จ ์ขŒํ‘œ๋ฅผ fine-grid cell ์— ํ• ๋‹น.
231
+
232
+ ์›จ์ดํผ ์ขŒํ‘œ ๋ฒ”์œ„ [-150, 150] ร— [-150, 150] (mm)๋ฅผ `cell_size_mm` ํฌ๊ธฐ์˜
233
+ ์ •์‚ฌ๊ฐ ๊ฒฉ์ž๋กœ ๋ถ„ํ• ํ•˜๊ณ , ๊ฐ ๊ฒฐํ•จ์ด ์†ํ•˜๋Š” cell์˜ ์ค‘์‹ฌ ์ขŒํ‘œ์™€ ID๋ฅผ ๋ถ€์—ฌ.
234
+
235
+ Parameters
236
+ ----------
237
+ df : pd.DataFrame
238
+ 'coor_x', 'coor_y' ์ปฌ๋Ÿผ ํฌํ•จ DF.
239
+ cell_size_mm : float
240
+ ์…€ ํ•œ ๋ณ€์˜ ํฌ๊ธฐ (mm). ๊ธฐ๋ณธ 3.0.
241
+
242
+ Returns
243
+ -------
244
+ pd.DataFrame
245
+ 'cell_x', 'cell_y' (์…€ ์ค‘์‹ฌ ์ขŒํ‘œ mm),
246
+ 'cell_id' ("{int_x}_{int_y}" ํ˜•์‹์˜ unique ID) ์ถ”๊ฐ€๋œ ์‚ฌ๋ณธ.
247
+
248
+ Notes
249
+ -----
250
+ cell_id๋Š” cell_x/cell_y๋ฅผ ๋ฐ˜์˜ฌ๋ฆผํ•˜์—ฌ ์ •์ˆ˜ํ™”ํ•œ ๋ฌธ์ž์—ด์ด๋ผ
251
+ cell_size_mm๊ฐ€ ์ •์ˆ˜ ๊ฒฝ๊ณ„์™€ ์–ด๊ธ‹๋‚˜๋ฉด ์ถฉ๋Œ ๊ฐ€๋Šฅ. ํ†ต์ƒ 3.0/5.0 ๋“ฑ ์ •์ˆ˜ ๊ถŒ์žฅ.
252
+ """
253
+ df = df.copy()
254
+ # ์ขŒํ‘œ ํ‰ํ–‰์ด๋™ ํ›„ floor โ†’ bin index
255
+ bin_x = np.floor((df["coor_x"] + 150) / cell_size_mm).astype(int)
256
+ bin_y = np.floor((df["coor_y"] + 150) / cell_size_mm).astype(int)
257
+
258
+ # ์…€ ์ค‘์‹ฌ ์ขŒํ‘œ (mm)
259
+ df["cell_x"] = bin_x * cell_size_mm - 150 + cell_size_mm / 2
260
+ df["cell_y"] = bin_y * cell_size_mm - 150 + cell_size_mm / 2
261
+
262
+ cell_x_int = np.round(df["cell_x"]).astype(int)
263
+ cell_y_int = np.round(df["cell_y"]).astype(int)
264
+ df["cell_id"] = cell_x_int.astype(str) + "_" + cell_y_int.astype(str)
265
+ return df
266
+
267
+ @staticmethod
268
+ def get_cell_wafer_counts(df: pd.DataFrame) -> pd.DataFrame:
269
+ """
270
+ ๊ฐ cell์—์„œ ๊ฒฐํ•จ์ด ๋ฐœ์ƒํ•œ unique wafer ์ˆ˜์™€ ๊ฒฐํ•จ ์ˆ˜๋ฅผ ์ง‘๊ณ„.
271
+
272
+ Parameters
273
+ ----------
274
+ df : pd.DataFrame
275
+ 'cell_id', 'WAF_ID' ์ปฌ๋Ÿผ ํฌํ•จ DF.
276
+
277
+ Returns
278
+ -------
279
+ pd.DataFrame
280
+ index = cell_id
281
+ columns = ['wafer_count', 'defect_count', 'wafer_ratio']
282
+ - wafer_count : ํ•ด๋‹น cell์—์„œ ๊ฒฐํ•จ์„ ๋ณด์ธ unique wafer ์ˆ˜
283
+ - defect_count : ํ•ด๋‹น cell์˜ ์ „์ฒด ๊ฒฐํ•จ ์ˆ˜
284
+ - wafer_ratio : wafer_count / ์ „์ฒด unique wafer ์ˆ˜
285
+ """
286
+ total_wafers = df["WAF_ID"].nunique()
287
+ cell_stats = df.groupby("cell_id").agg(
288
+ wafer_count=("WAF_ID", "nunique"),
289
+ defect_count=("WAF_ID", "size"),
290
+ )
291
+ cell_stats["wafer_ratio"] = cell_stats["wafer_count"] / total_wafers if total_wafers else 0.0
292
+ return cell_stats
293
+
294
+ @staticmethod
295
+ def filter_by_cell_wafer_count(
296
+ df: pd.DataFrame,
297
+ n1_min_wafer: int,
298
+ cell_size_mm: float = 3.0,
299
+ ) -> pd.DataFrame:
300
+ """
301
+ Fine-grid ๊ธฐ๋ฐ˜ n1 ํ•„ํ„ฐ: ์ถฉ๋ถ„ํ•œ wafer์—์„œ ๊ณตํ†ต ๋ฐœ์ƒํ•œ cell์˜ ๊ฒฐํ•จ๋งŒ ์œ ์ง€.
302
+
303
+ '๊ณตํ†ต ์œ„์น˜์— ๋ฐ˜๋ณต ๋ฐœ์ƒํ•˜๋Š” ๊ฒฐํ•จ๋งŒ ์œ ์˜๋ฏธํ•˜๋‹ค'๋Š” ๊ฐ€์ •์„ ๊ตฌํ˜„.
304
+ unique wafer ์ˆ˜๊ฐ€ `n1_min_wafer` ๋ฏธ๋งŒ์ธ cell์€ ๋…ธ์ด์ฆˆ๋กœ ๊ฐ„์ฃผ ์ œ๊ฑฐ.
305
+
306
+ Parameters
307
+ ----------
308
+ df : pd.DataFrame
309
+ 'coor_x', 'coor_y', 'WAF_ID' ํฌํ•จ DF (cell ํ• ๋‹น์€ ๋‚ด๋ถ€์—์„œ ์ˆ˜ํ–‰).
310
+ n1_min_wafer : int
311
+ cell์ด ์œ ํšจํ•˜๊ธฐ ์œ„ํ•ด ํ•„์š”ํ•œ ์ตœ์†Œ unique wafer ์ˆ˜.
312
+ cell_size_mm : float
313
+ fine-grid cell ํฌ๊ธฐ (mm).
314
+
315
+ Returns
316
+ -------
317
+ pd.DataFrame
318
+ n1 ์กฐ๊ฑด์„ ํ†ต๊ณผํ•œ cell์˜ ๊ฒฐํ•จ๋งŒ ํฌํ•จ. 'cell_wafer_count' ์ปฌ๋Ÿผ ์ถ”๊ฐ€.
319
+ """
320
+ df = WaferUtils.assign_fine_grid(df, cell_size_mm=cell_size_mm)
321
+ cell_stats = WaferUtils.get_cell_wafer_counts(df)
322
+
323
+ valid_cells = cell_stats[cell_stats["wafer_count"] >= n1_min_wafer].index
324
+ df_filtered = df[df["cell_id"].isin(valid_cells)].copy()
325
+ df_filtered = df_filtered.merge(
326
+ cell_stats[["wafer_count", "wafer_ratio"]],
327
+ left_on="cell_id", right_index=True, how="left",
328
+ )
329
+ df_filtered.rename(columns={"wafer_count": "cell_wafer_count"}, inplace=True)
330
+ return df_filtered
331
+
332
+ @staticmethod
333
+ def summarize_filtering_result(
334
+ df_original: pd.DataFrame,
335
+ df_filtered: pd.DataFrame,
336
+ ) -> dict:
337
+ """
338
+ ํ•„ํ„ฐ๋ง ์ „ํ›„ ๊ฒฐํ•จ/Cell ์ˆ˜ ์š”์•ฝ ํ†ต๊ณ„.
339
+
340
+ Returns
341
+ -------
342
+ dict
343
+ original_defects, filtered_defects, removed_defects, removal_rate(%),
344
+ original_cells, valid_cells.
345
+ """
346
+ orig = len(df_original)
347
+ filt = len(df_filtered)
348
+ removed = orig - filt
349
+ rate = (removed / orig * 100) if orig else 0.0
350
+ return {
351
+ "original_defects": orig,
352
+ "filtered_defects": filt,
353
+ "removed_defects": removed,
354
+ "removal_rate": round(rate, 2),
355
+ "original_cells": df_original["cell_id"].nunique() if "cell_id" in df_original.columns else 0,
356
+ "valid_cells": df_filtered["cell_id"].nunique() if "cell_id" in df_filtered.columns else 0,
357
+ }
358
+
359
+ # ------------------------------------------------------------------
360
+ # 5. ์‹œ๊ฐํ™”
361
+ # ------------------------------------------------------------------
362
+ @staticmethod
363
+ def plot_wafer_map(
364
+ result_df: pd.DataFrame,
365
+ key: str,
366
+ pattern_list,
367
+ dominant_zone: str,
368
+ meta: Optional[dict] = None,
369
+ figsize: Tuple[int, int] = (8, 8),
370
+ save_path: Optional[str] = None,
371
+ show_mode: bool = False,
372
+ ) -> None:
373
+ """
374
+ ์›จ์ดํผ ๊ฒฐํ•จ ๋งต ์‹œ๊ฐํ™”.
375
+
376
+ ๊ตฌ์„ฑ ์š”์†Œ
377
+ ----------
378
+ 1. ๋ฐฐ๊ฒฝ ์˜์—ญ
379
+ - ํ™˜ํ˜• ํŒจํ„ด: ์ „์ฒด ์› ์˜์—ญ์„ ๋ฒ ์ด์ง€์ƒ‰์œผ๋กœ ํ‘œ์‹œ
380
+ - ๊ทธ ์™ธ: dominant_zone์— ํ•ด๋‹นํ•˜๋Š” wedge๋งŒ ๋ฒ ์ด์ง€์ƒ‰ ํ‘œ์‹œ
381
+ 2. ๊ฒฐํ•จ ์‚ฐ์ ๋„
382
+ - inlier ์ปฌ๋Ÿผ์ด ์žˆ์œผ๋ฉด inlier/outlier ์ƒ‰์ƒ ๋ถ„๋ฆฌ
383
+ - inlier ์ƒ‰์ƒ์€ ํŒจํ„ด์— ๋”ฐ๋ผ PATTERN_COLORS ๋งคํ•‘
384
+ 3. Centroid ๋งˆ์ปค (ํ™˜ํ˜• ์ œ์™ธ)
385
+ - ๋นจ๊ฐ„ ์›(10mm) + X ๋งˆ์ปค
386
+ 4. ๋™์‹ฌ์›: 30/45/60/90/120/150mm
387
+ 5. ์‹œ๊ณ„๋ฐฉํ–ฅ ๊ทธ๋ฆฌ๋“œ + 12์‹œยท1์‹œยท...ยท11์‹œ ๋ผ๋ฒจ
388
+ 6. ์บก์…˜: ํŒจํ„ด/๊ตฌ์—ญ/๊ฒฐํ•จ์ˆ˜/์žฅ๋น„/์›จ์ดํผ
389
+
390
+ Parameters
391
+ ----------
392
+ result_df : pd.DataFrame
393
+ 'coor_x', 'coor_y', (์„ ํƒ) 'inlier', 'zone_label' ์ปฌ๋Ÿผ.
394
+ key : str
395
+ ์ €์žฅ ํŒŒ์ผ๋ช…ยท์บก์…˜์— ์‚ฌ์šฉํ•  ํ‚ค.
396
+ pattern_list : list[str] | str
397
+ ํŒจํ„ด๋ช…. ['ํ™˜ํ˜•','๊ตฐ์ง‘'] ๊ฐ™์€ ๋ฆฌ์ŠคํŠธ๋„ ํ—ˆ์šฉ.
398
+ dominant_zone : str
399
+ ์ฃผ์š” zone ๋ผ๋ฒจ (์˜ˆ: 'Inner_03'). 'N/A' ์ด๋ฉด ๋ฏธํ‘œ์‹œ.
400
+ meta : dict, optional
401
+ 'main_centroid_x', 'main_centroid_y', 'wafer_count', 'EQP_NM_8030' ๋“ฑ.
402
+ figsize : (int, int)
403
+ Figure ํฌ๊ธฐ.
404
+ save_path : str, optional
405
+ ์ €์žฅ ๊ฒฝ๋กœ. None์ด๋ฉด './result/result_figures/{key}.jpg'.
406
+ show_mode : bool
407
+ True ๋ฉด plt.show() ํ˜ธ์ถœ.
408
+ """
409
+ solid_radii = [45, 90, 150]
410
+ dashed_radii = [30, 60, 120]
411
+
412
+ # ํŒจํ„ด ๋ฌธ์ž์—ด ์ •๊ทœํ™”
413
+ if isinstance(pattern_list, list):
414
+ pattern_str = ", ".join(pattern_list)
415
+ first_pattern = pattern_list[0]
416
+ else:
417
+ pattern_str = str(pattern_list)
418
+ first_pattern = pattern_str.split(",")[0].strip()
419
+
420
+ color = PATTERN_COLORS.get(first_pattern, "steelblue")
421
+ fig, ax = plt.subplots(figsize=figsize)
422
+
423
+ # --- ๋ฐฐ๊ฒฝ: ํ™˜ํ˜• โ†’ ์ „์ฒด ์›, ๊ทธ ์™ธ โ†’ dominant zone wedge ---
424
+ if "ํ™˜ํ˜•" in pattern_str:
425
+ ax.add_patch(Circle((0, 0), 150, facecolor="#F5F5DC",
426
+ edgecolor="none", alpha=0.8, zorder=1))
427
+ elif dominant_zone and dominant_zone != "N/A":
428
+ try:
429
+ for zone in [z.strip() for z in dominant_zone.split(",")]:
430
+ ztype, zclock = zone.split("_")
431
+ r_min = 0 if ztype == "Inner" else 105
432
+ r_max = 105 if ztype == "Inner" else 150
433
+ if zclock in CLOCK_LABELS:
434
+ idx = CLOCK_LABELS.index(zclock)
435
+ # ์‹œ๊ณ„ ๊ฐ๋„ โ†’ ์ˆ˜ํ•™ ๊ฐ๋„ ๋ณ€ํ™˜ (Wedge๋Š” ์ˆ˜ํ•™ ๊ฐ๋„ ์‚ฌ์šฉ)
436
+ math_start = 90 - (idx + 1) * 30
437
+ math_end = 90 - idx * 30
438
+ ax.add_patch(Wedge((0, 0), r_max, math_start, math_end,
439
+ width=(r_max - r_min),
440
+ facecolor="#F5F5DC",
441
+ edgecolor="none", alpha=0.8, zorder=1))
442
+ except Exception:
443
+ # zone ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ ๋ฐฐ๊ฒฝ ์ƒ๋žต (์‹œ๊ฐํ™”๋Š” ๊ณ„์† ์ง„ํ–‰)
444
+ pass
445
+
446
+ # --- ๊ฒฐํ•จ ์‚ฐ์ ๋„ ---
447
+ if "inlier" in result_df.columns:
448
+ inliers = result_df[result_df["inlier"] == True]
449
+ outliers = result_df[result_df["inlier"] == False]
450
+ ax.scatter(outliers["coor_x"], outliers["coor_y"],
451
+ c="lightgray", s=15, alpha=0.3, zorder=4)
452
+ ax.scatter(inliers["coor_x"], inliers["coor_y"],
453
+ c=color, s=35, alpha=0.5,
454
+ label=f"Inlier ({pattern_str})", zorder=5)
455
+ else:
456
+ ax.scatter(result_df["coor_x"], result_df["coor_y"],
457
+ c=color, s=30, alpha=0.5, zorder=5)
458
+
459
+ # --- Centroid ๋งˆ์ปค: ํ™˜ํ˜•์ด๋ฉด ์ƒ๋žต (ring center๋Š” ์›์  ๊ทผ์ฒ˜๋ผ ์ •๋ณด ์—†์Œ) ---
460
+ if meta and "ํ™˜ํ˜•" not in pattern_str:
461
+ cx = meta.get("main_centroid_x")
462
+ cy = meta.get("main_centroid_y")
463
+ if cx is not None and cy is not None:
464
+ ax.add_patch(Circle((cx, cy), radius=10, facecolor="none",
465
+ edgecolor="red", linewidth=2.5,
466
+ linestyle="-", alpha=0.9, zorder=7))
467
+ ax.scatter(cx, cy, c="red", s=80, marker="x",
468
+ linewidths=2.5, zorder=8, label="Centroid")
469
+
470
+ # --- ์›จ์ดํผ ๋™์‹ฌ์› ---
471
+ for r in solid_radii:
472
+ ax.add_patch(plt.Circle((0, 0), r, color="black", fill=False,
473
+ linestyle="-", linewidth=1.2, alpha=0.7, zorder=2))
474
+ for r in dashed_radii:
475
+ ax.add_patch(plt.Circle((0, 0), r, color="gray", fill=False,
476
+ linestyle="--", linewidth=0.8, alpha=0.5, zorder=2))
477
+
478
+ # --- ์‹œ๊ณ„ ๋ฐฉํ–ฅ ๊ทธ๋ฆฌ๋“œ + ๋ผ๋ฒจ ---
479
+ clock_angles = {0: "12์‹œ", 30: "1์‹œ", 60: "2์‹œ", 90: "3์‹œ",
480
+ 120: "4์‹œ", 150: "5์‹œ", 180: "6์‹œ", 210: "7์‹œ",
481
+ 240: "8์‹œ", 270: "9์‹œ", 300: "10์‹œ", 330: "11์‹œ"}
482
+ grid_end = max(solid_radii) + 12
483
+ label_r = grid_end * 0.93
484
+ for angle_deg, label_text in clock_angles.items():
485
+ # ์‹œ๊ณ„ ๊ฐ๋„ โ†’ ์ˆ˜ํ•™ ๊ฐ๋„
486
+ math_rad = np.deg2rad(90 - angle_deg)
487
+ ax.plot([0, grid_end * np.cos(math_rad)],
488
+ [0, grid_end * np.sin(math_rad)],
489
+ color="gray", linestyle=":", linewidth=0.8, zorder=2)
490
+ ax.text(label_r * np.cos(math_rad), label_r * np.sin(math_rad),
491
+ label_text, color="darkblue", fontsize=8,
492
+ ha="center", va="center", weight="bold", alpha=0.75, zorder=3)
493
+
494
+ ax.axhline(0, color="k", linewidth=0.4, zorder=3)
495
+ ax.axvline(0, color="k", linewidth=0.4, zorder=3)
496
+ max_range = max(solid_radii) + 20
497
+ ax.set_xlim(-max_range, max_range)
498
+ ax.set_ylim(-max_range, max_range)
499
+ ax.set_aspect("equal", "box")
500
+ ax.set_xlabel("X (mm)")
501
+ ax.set_ylabel("Y (mm)")
502
+ ax.legend(loc="upper right", fontsize=8)
503
+ ax.grid(True, alpha=0.15)
504
+
505
+ # --- ์บก์…˜ ---
506
+ total = len(result_df)
507
+ dom_cnt = 0
508
+ if "zone_label" in result_df.columns and dominant_zone != "N/A":
509
+ dom_zones = [z.strip() for z in dominant_zone.split(",")]
510
+ dom_cnt = result_df[result_df["zone_label"].isin(dom_zones)].shape[0]
511
+ ratio = (dom_cnt / total * 100) if total else 0.0
512
+
513
+ lines = [
514
+ f"Key: {key}",
515
+ f"ํŒจํ„ด: {pattern_str} | ๋ฐœ์ƒ๊ตฌ์—ญ: {dominant_zone}",
516
+ f"์ „์ฒด ๊ฒฐํ•จ: {total}๊ฑด | ์ฃผ์š”์˜์—ญ ๊ฒฐํ•จ: {dom_cnt}๊ฑด | ๋น„์œจ: {ratio:.1f}%",
517
+ ]
518
+ if meta:
519
+ lines.append(f"์žฅ๋น„: {meta.get('EQP_NM_8030', '-')} | ์›จ์ดํผ: {meta.get('wafer_count', '-')}๋งค")
520
+ ax.set_title("\n".join(lines), fontsize=9, loc="left", pad=8)
521
+ plt.tight_layout()
522
+
523
+ if save_path is None:
524
+ save_dir = "./result/result_figures"
525
+ os.makedirs(save_dir, exist_ok=True)
526
+ save_path = os.path.join(save_dir, f"{key}.jpg")
527
+ plt.savefig(save_path, dpi=150, bbox_inches="tight")
528
+ if show_mode:
529
+ plt.show()
530
+ plt.close()
531
+
532
+
533
+ # ======================================================================
534
+ # Backward-compat: ๊ธฐ์กด ๋ชจ๋“ˆ ๋ ˆ๋ฒจ ํ•จ์ˆ˜ alias
535
+ # (๊ธฐ์กด ์ฝ”๋“œ `from utils import setup_korean_font, ...` ํ˜•ํƒœ๋ฅผ ๊ทธ๋Œ€๋กœ ์ง€์›)
536
+ # ======================================================================
537
+ setup_korean_font = WaferUtils.setup_korean_font
538
+ load_config = WaferUtils.load_config
539
+ map_roughbin_no = WaferUtils.map_roughbin_no
540
+ add_zone_labels = WaferUtils.add_zone_labels
541
+ assign_fine_grid = WaferUtils.assign_fine_grid
542
+ get_cell_wafer_counts = WaferUtils.get_cell_wafer_counts
543
+ filter_by_cell_wafer_count = WaferUtils.filter_by_cell_wafer_count
544
+ summarize_filtering_result = WaferUtils.summarize_filtering_result
545
+ plot_wafer_map = WaferUtils.plot_wafer_map