File size: 30,692 Bytes
7803723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
"""

언론 보도 객관성 측정을 위한 무주체 피동형 술어 정규표현식 (v2)

================================================================================



I.  객관성 의심 술어 (DOUBT): 기자 의견으로 여겨질 수 있는 무주체 주관적 술어

II. 객관성 지지 술어 (SUPPORT): 사실 확인/명시적 출처 기반 술어



* 무주체 피동형 술어: 발언/판단의 주체가 문장에 없어 기자 의견으로 읽힐 수 있는 표현

"""

import re
from typing import Dict, Pattern, List, Any
from korean_sentence_splitter import KoreanSentenceSplitter

# =============================================================================
# I. 객관성 의심 술어 (DOUBT):
# =============================================================================

DOUBT_PREDICATES: Dict[str, Pattern] = {
    
    # =========================================================================
    # 1. 분석/해석형
    # =========================================================================
    "분석형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"분석(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"풀이(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"해석(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"진단(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"읽(?:힌다|힙니다|혔다|혔습니다|히고\s*있다|히고\s*있습니다)|"
        # --- 명사형 술어: 분석/풀이/해석/진단 ---
        r"(?:분석|풀이|해석|진단)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:분석|풀이|해석|진단)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:분석|풀이|해석|진단)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:분석|풀이|해석|진단)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다|이\s*지배적이다|이\s*지배적입니다)"
        r")"
    ),
    
    # =========================================================================
    # 2. 전망/예측형
    # =========================================================================
    "전망형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"전망(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"예상(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"예측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"점쳐(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
        r"예견(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        # --- 명사형 술어 ---
        r"(?:전망|예상|예측)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:전망|예상|예측|관측)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:전망|예상|예측|관측)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다|이\s*우세하다|이\s*우세합니다|이\s*지배적이다|이\s*지배적입니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:전망|예상|예측|관측)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다)"
        r")"
    ),
    
    # =========================================================================
    # 3. 관측/추정형
    # =========================================================================
    "관측형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"관측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"추정(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"추측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"짐작(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        # --- 명사형 술어 ---
        r"(?:관측|추정|추측)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:관측|추정|추측)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:관측|추정|추측)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다)"
        r")"
    ),
    
    # =========================================================================
    # 4. 전언/보도형
    # =========================================================================
    "전언형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"알려(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
        r"전해(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
        r"보도(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"전달(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        # 것으로 + 전언
        r"것으로\s*(?:알려졌다|알려졌습니다|알려집니다|알려지고\s*있다|알려지고\s*있습니다|전해졌다|전해졌습니다|전해집니다|전해지고\s*있다|전해지고\s*있습니다)|"
        # --- 명사형 술어 ---
        r"(?:소식|보도|소문)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:소식|보도|소문)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는)?\s*(?:소식|보도)(?:이\s*전해졌다|이\s*전해졌습니다|이\s*전해지고\s*있다|이\s*전해지고\s*있습니다|이\s*들려온다|이\s*들려옵니다|이\s*들려왔다|이\s*들려왔습니다)|"
        # ~라는 겁니다/것입니다
        r"(?:라는|다는)\s*(?:겁니다|것입니다|것이다|얘기다|얘기입니다|이야기다|이야기입니다)"
        r")"
    ),
    
    # =========================================================================
    # 5. 평가/판단형
    # =========================================================================
    "평가형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"평가(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
        r"판단(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"인식(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"간주(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"여겨(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
        # --- 명사형 술어 ---
        r"(?:평가|판단|인식)(?:다|이다|입니다|였다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:평가|판단|인식)(?:다|이다|입니다|였다|이었다|이었습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:평가|판단|인식)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:평가|판단)(?:가\s*나온다|가\s*나옵니다|가\s*나오고\s*있다|가\s*나오고\s*있습니다|가\s*나왔다|가\s*나왔습니다|이\s*나온다|이\s*나옵니다)|"
        r"(?:라는|다는)?\s*(?:평가|판단)(?:를\s*받고\s*있다|를\s*받고\s*있습니다|를\s*받았다|를\s*받았습니다|를\s*받는다|를\s*받습니다)"
        r")"
    ),
    
    # =========================================================================
    # 6. 비판/지적형
    # =========================================================================
    "비판형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"비판(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
        r"비난(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
        r"지적(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
        # --- 명사형 술어 ---
        r"(?:비판|비난|지적)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:비판|비난|지적)(?:이다|입니다|이었다|이었습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:비판|비난|지적)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다|도\s*제기됐다|도\s*제기됐습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:비판|비난|지적)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다|이\s*제기됐다|이\s*제기됐습니다|이\s*제기되고\s*있다|이\s*제기되고\s*있습니다|이\s*쏟아지고\s*있다|이\s*쏟아지고\s*있습니다|이\s*쏟아졌다|이\s*쏟아졌습니다|이\s*잇따르고\s*있다|이\s*잇따르고\s*있습니다|이\s*잇따랐다|이\s*잇따랐습니다)|"
        r"(?:비판|비난|지적)(?:을\s*받고\s*있다|을\s*받고\s*있습니다|을\s*받았다|을\s*받았습니다|을\s*받는다|을\s*받습니다|을\s*면치\s*못하고\s*있다|을\s*면치\s*못하고\s*있습니다)"
        r")"
    ),
    
    # =========================================================================
    # 7. 제기/거론형
    # =========================================================================
    "제기형": re.compile(
        r"(?:"
        r"제기(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"거론(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"언급(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"지목(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"논의(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"검토(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        r"거명(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)"
        r")"
    ),
    
    # =========================================================================
    # 8. 우려/의혹형
    # =========================================================================
    "우려형": re.compile(
        r"(?:"
        # 우려
        r"우려(?:가|도|를)?\s*(?:있다|있습니다|나온다|나옵니다|나오고\s*있다|나오고\s*있습니다|나왔다|나왔습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|커지고\s*있다|커지고\s*있습니다|커졌다|커졌습니다|낳고\s*있다|낳고\s*있습니다|낳았다|낳았습니다|높아지고\s*있다|높아지고\s*있습니다)|"
        # 의혹
        r"의혹(?:이|을|도)?\s*(?:있다|있습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|불거졌다|불거졌습니다|불거지고\s*있다|불거지고\s*있습니다|일고\s*있다|일고\s*있습니다|일었다|일었습니다|사고\s*있다|사고\s*있습니다|샀다|샀습니다|증폭되고\s*있다|증폭되고\s*있습니다|확산되고\s*있다|확산되고\s*있습니다)|"
        # 논란
        r"논란(?:이|도)?\s*(?:있다|있습니다|일고\s*있다|일고\s*있습니다|일었다|일었습니다|되고\s*있다|되고\s*있습니다|됐다|됐습니다|불거졌다|불거졌습니다|불거지고\s*있다|불거지고\s*있습니다|예상된다|예상됩니다|이어지고\s*있다|이어지고\s*있습니다)|"
        # 의문
        r"의문(?:이|도)?\s*(?:있다|있습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|일고\s*있다|일고\s*있습니다|남는다|남습니다|남아\s*있다|남아\s*있습니다|든다|듭니다)|"
        # 명사형
        r"(?:우려|의혹|논란|의문)(?:다|이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:우려|의혹|논란|의문)(?:다|이다|입니다|이었다|이었습니다)|"
        r"(?:라는|다는)?\s*(?:우려|의혹|논란|의문)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*제기됐다|도\s*제기됐습니다)"
        r")"
    ),
    
    # =========================================================================
    # 9. 가능성/여지형
    # =========================================================================
    "가능성형": re.compile(
        r"(?:"
        r"가능성(?:이|도|을)?\s*(?:있다|있습니다|크다|큽니다|높다|높습니다|낮다|낮습니다|제기됐다|제기됐습니다|거론되고\s*있다|거론되고\s*있습니다|점쳐지고\s*있다|점쳐지고\s*있습니다|배제할\s*수\s*없다|배제할\s*수\s*없습니다|열려\s*있다|열려\s*있습니다|열렸다|열렸습니다|제기된다|제기됩니다|나온다|나옵니다)|"
        r"개연성(?:이|도)?\s*(?:있다|있습니다|크다|큽니다|높다|높습니다|낮다|낮습니다)|"
        r"여지(?:가|도|를)?\s*(?:있다|있습니다|남아\s*있다|남아\s*있습니다|남겨져\s*있다|남겨져\s*있습니다|남는다|남습니다|남겼다|남겼습니다)|"
        r"(?:라는|다는)?\s*(?:가능성|개연성|여지)(?:도\s*있다|도\s*있습니다|이\s*제기됐다|이\s*제기됐습니다|이\s*나온다|이\s*나옵니다)"
        r")"
    ),
    
    # =========================================================================
    # 10. 분위기/목소리형
    # =========================================================================
    "분위기형": re.compile(
        r"(?:"
        # 분위기
        r"분위기(?:다|이다|입니다|이었다|이었습니다|가\s*감지되고\s*있다|가\s*감지되고\s*있습니다|가\s*형성되고\s*있다|가\s*형성되고\s*있습니다|가\s*확산되고\s*있다|가\s*확산되고\s*있습니다|가\s*팽배하다|가\s*팽배합니다|가\s*역력하다|가\s*역력합니다)|"
        # 목소리
        r"목소리(?:가|도)?\s*(?:나온다|나옵니다|나오고\s*있다|나오고\s*있습니다|나왔다|나왔습니다|높아지고\s*있다|높아지고\s*있습니다|높아졌다|높아졌습니다|커지고\s*있다|커지고\s*있습니다|커졌다|커졌습니다|있다|있습니다)|"
        # 기대
        r"기대(?:가|를|도)?\s*(?:모아지고\s*있다|모아지고\s*있습니다|모이고\s*있다|모이고\s*있습니다|높아지고\s*있다|높아지고\s*있습니다|커지고\s*있다|커지고\s*있습니다|크다|큽니다|높다|높습니다)|"
        # 관심/이목
        r"(?:관심|이목)(?:이|을)?\s*(?:쏠리고\s*있다|쏠리고\s*있습니다|쏠렸다|쏠렸습니다|집중되고\s*있다|집중되고\s*있습니다|집중됐다|집중됐습니다|모아지고\s*있다|모아지고\s*있습니다)|"
        # 기류/조짐/흐름
        r"(?:기류|조짐|흐름|양상)(?:이|가)?\s*(?:감지되고\s*있다|감지되고\s*있습니다|감지됐다|감지됐습니다|포착되고\s*있다|포착되고\s*있습니다|나타나고\s*있다|나타나고\s*있습니다)"
        r")"
    ),
    
    # =========================================================================
    # 11. 주장/입장형
    # =========================================================================
    "주장형": re.compile(
        r"(?:"
        # --- 피동형 술어 ---
        r"주장(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
        # --- 명사형 술어 ---
        r"(?:주장|입장|방침|계획|생각|확신|설명|해명)(?:이다|입니다|이었다|이었습니다|이에요)|"
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:주장|입장|방침|계획|생각|확신|설명|해명)(?:이다|입니다|이었다|이었습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:주장|입장)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:주장|입장|설명|해명)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다)"
        r")"
    ),
    
    # =========================================================================
    # 12. 시각/견해형
    # =========================================================================
    "시각형": re.compile(
        r"(?:"
        # 단독 사용
        r"(?:시각|견해|관점|자평)(?:이다|다|입니다|이었다|였다|이었습니다|이에요)|"
        # ~라는 + 명사
        r"(?:라는|다는|이라는|란|는|은|인)\s*(?:시각|견해|관점|인식|자평)(?:이다|다|입니다|이었다|였다|이었습니다)|"
        # 명사 + 도 있다/지배적이다
        r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:시각|견해|관점|인식)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|이\s*있다|이\s*있습니다|이\s*나온다|이\s*나옵니다|이\s*지배적이다|이\s*지배적입니다|가\s*지배적이다|가\s*지배적입니다|이\s*우세하다|이\s*우세합니다|가\s*우세하다|가\s*우세합니다)"
        r")"
    ),
    
    # =========================================================================
    # 13. 격찬/혹평형 (극단적 평가)
    # =========================================================================
    "격찬형": re.compile(
        r"(?:"
        r"(?:격찬|찬사|호평)(?:이|을)?\s*(?:쏟아졌다|쏟아졌습니다|쏟아지고\s*있다|쏟아지고\s*있습니다|이어졌다|이어졌습니다|이어지고\s*있다|이어지고\s*있습니다|나왔다|나왔습니다|나오고\s*있다|나오고\s*있습니다|받았다|받았습니다|받고\s*있다|받고\s*있습니다)|"
        r"(?:혹평|악평)(?:이|을)?\s*(?:쏟아졌다|쏟아졌습니다|쏟아지고\s*있다|쏟아지고\s*있습니다|이어졌다|이어졌습니다|이어지고\s*있다|이어지고\s*있습니다|나왔다|나왔습니다|나오고\s*있다|나오고\s*있습니다|받았다|받았습니다|받고\s*있다|받고\s*있습니다)|"
        r"(?:라는|다는)?\s*(?:격찬|찬사|호평|혹평|악평)(?:이다|입니다|이었다|이었습니다)"
        r")"
    ),
    
    # =========================================================================
    # 15. 관용표현형
    # =========================================================================
    "관용표현형": re.compile(
        r"(?:"
        # ~한 셈이다
        r"[은는인된한했던]\s*셈(?:이다|입니다|이에요|이었다|이었습니다)|"
        r"셈(?:이다|입니다|이에요|이었다|이었습니다)|"
        # ~해야 할 판이다
        r"(?:해야\s*할|하게\s*된|하게\s*됐)\s*판(?:이다|입니다|이에요)|"
        # ~로 보인다/여겨진다/비춰진다
        r"(?:으로|로)\s*(?:보인다|보입니다|보여진다|보여집니다|보이고\s*있다|보이고\s*있습니다)|"
        r"(?:으로|로)\s*(?:여겨진다|여겨집집니다|여겨지고\s*있다|여겨지고\s*있습니다)|"
        r"(?:으로|로)\s*(?:비춰진다|비춰집니다|비쳐진다|비쳐집니다|비쳐지고\s*있다|비쳐지고\s*있습니다)|"
        r"(?:으로|로)\s*(?:받아들여지고\s*있다|받아들여지고\s*있습니다|받아들여진다|받아들여집니다|받아들여졌다|받아들여졌습니다)|"
        # ~것 아니냐는/~지 않겠느냐는
        r"(?:는|은)\s*것\s*아니(?:냐는|냐고|겠냐는|냐며)|"
        r"(?:지|치)\s*않(?:겠느냐는|을까\s*하는|을까\s*싶은|느냐는)|"
        # ~가 아닌가 싶다
        r"(?:가|이)\s*아닌가\s*(?:싶다|싶습니다|하는|하다|합니다)|"
        # ~를 짐작하게/케 한다
        r"(?:을|를)?\s*짐작(?:하게|케)\s*(?:한다|합니다|했다|했습니다)"
        r")"
    ),
    
    # =========================================================================
    # 16. 완화표현형 (Hedges)
    # =========================================================================
    "완화표현형": re.compile(
        r"(?:"
        # 것으로 + 술어
        r"것으로\s*(?:보인다|보입니다|보여진다|보여집니다)|"
        r"것으로\s*(?:추정된다|추정됩니다|추정되고\s*있다|추정되고\s*있습니다)|"
        r"것으로\s*(?:판단된다|판단됩니다|판단되고\s*있다|판단되고\s*있습니다)|"
        r"것으로\s*(?:분석된다|분석됩니다|분석되고\s*있다|분석되고\s*있습니다)|"
        r"것으로\s*(?:예상된다|예상됩니다|예상되고\s*있다|예상되고\s*있습니다)|"
        r"것으로\s*(?:전망된다|전망됩니다|전망되고\s*있다|전망되고\s*있습니다)|"
        r"것으로\s*(?:관측된다|관측됩니다|관측되고\s*있다|관측되고\s*있습니다)|"
        r"것으로\s*(?:평가된다|평가됩니다|평가되고\s*있다|평가되고\s*있습니다)|"
        r"것으로\s*(?:풀이된다|풀이됩니다|풀이되고\s*있다|풀이되고\s*있습니다)|"
        r"것으로\s*(?:해석된다|해석됩니다|해석되고\s*있다|해석되고\s*있습니다)|"
        r"것으로\s*(?:파악된다|파악됩니다|파악되고\s*있다|파악되고\s*있습니다)|"
        r"것으로\s*(?:나타났다|나타났습니다|나타나고\s*있다|나타나고\s*있습니다)|"
        # 듯 + 술어
        r"듯\s*(?:보인다|보입니다|하다|합니다|싶다|싶습니다)|"
        # ~지도 모른다
        r"[을를]지도?\s*모른(?:다|릅니다)|"
        # ~ㄹ 것 같다
        r"[을를]\s*것\s*같(?:다|습니다)"
        r")"
    ),
}


# =============================================================================
# II. 객관성 지지 술어 (SUPPORT): 
# =============================================================================

SUPPORT_PREDICATES: Dict[str, Pattern] = {
    
    # =========================================================================
    # 1. 확인/검증형 
    # =========================================================================
    "확인형": re.compile(
        r"(?:"
        # 확인/밝혀지다/드러나다
        r"확인(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"밝혀(?:졌다|졌습니다|진다|집니다|지고\s*있다|지고\s*있습니다)|"
        r"드러(?:났다|났습니다|난다|납니다|나고\s*있다|나고\s*있습니다)|"
        r"판명(?:됐다|됐습니다|된다|됩니다|났다|났습니다)|"
        r"입증(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
        r"규명(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
        r")"
    ),
    
    # =========================================================================
    # 2. 발견/탐지형
    # =========================================================================
    "발견형": re.compile(
        r"(?:"
        r"발견(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"발각(?:됐다|됐습니다|된다|됩니다)|"
        r"적발(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
        r"포착(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
        r"감지(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
        r"파악(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)"
        r")"
    ),
    
    # =========================================================================
    # 3. 기록/집계형
    # =========================================================================
    "기록형": re.compile(
        r"(?:"
        r"기록(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"집계(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"조사(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"측정(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
        r"나타(?:났다|났습니다|난다|납니다)"
        r")"
    ),
}


# =============================================================================
# 유틸리티 함수
# =============================================================================

def analyze_objectivity(text: str) -> Dict[str, Any]:
    """텍스트를 문장 단위로 나누어 객관성 의심/지지 요소를 분석"""
    splitter = KoreanSentenceSplitter()
    sentences = splitter.split(text)
    
    doubt_predicates = []
    support_predicates = []
    doubt_sentences = []
    support_sentences = []
    
    doubt_sentence_count = 0
    support_sentence_count = 0
    
    for sent in sentences:
        sent_doubt_matches = []
        sent_support_matches = []
        
        # 중복 방지를 위한 스팬(Span) 기반 체크
        doubt_spans = {}
        support_spans = {}
        
        for _, pattern in DOUBT_PREDICATES.items():
            for match in pattern.finditer(sent):
                m_text = match.group(0).strip()
                if m_text:
                    doubt_spans[match.span()] = m_text
        
        for _, pattern in SUPPORT_PREDICATES.items():
            for match in pattern.finditer(sent):
                m_text = match.group(0).strip()
                if m_text:
                    support_spans[match.span()] = m_text
        
        # 문장 내 추출 결과 정리
        if doubt_spans:
            sorted_doubt = [doubt_spans[s] for s in sorted(doubt_spans.keys())]
            doubt_predicates.extend(sorted_doubt)
            doubt_sentences.append(sent)
            doubt_sentence_count += 1
            
        if support_spans:
            sorted_support = [support_spans[s] for s in sorted(support_spans.keys())]
            support_predicates.extend(sorted_support)
            support_sentences.append(sent)
            support_sentence_count += 1
            
    total_sentences = doubt_sentence_count + support_sentence_count
    
    return {
        "doubt_predicates": doubt_predicates,
        "support_predicates": support_predicates,
        "doubt_sentences": doubt_sentences,
        "support_sentences": support_sentences,
        "doubt_count": doubt_sentence_count,
        "support_count": support_sentence_count,
        "objectivity_ratio": round(support_sentence_count / total_sentences, 4) if total_sentences > 0 else None
    }


def find_doubt_predicates(text: str) -> Dict[str, List[str]]:
    """객관성 의심 술어만 반환"""
    results = {}
    for category, pattern in DOUBT_PREDICATES.items():
        matches = pattern.findall(text)
        if matches:
            results[category] = matches
    return results


def find_support_predicates(text: str) -> Dict[str, List[str]]:
    """객관성 지지 술어만 반환"""
    results = {}
    for category, pattern in SUPPORT_PREDICATES.items():
        matches = pattern.findall(text)
        if matches:
            results[category] = matches
    return results


def print_pattern_summary():
    """패턴 요약 출력"""
    print("=" * 70)
    print("언론 보도 객관성 측정용 무주체 술어 정규표현식 v2")
    print("=" * 70)
    print()
    print("I. 객관성 의심 술어 (DOUBT) - 무주체 주관적 술어")
    print("-" * 70)
    for i, (name, _) in enumerate(DOUBT_PREDICATES.items(), 1):
        print(f"   {i:2d}. {name}")
    print(f"\n   총 {len(DOUBT_PREDICATES)}개 카테고리")
    print()
    print("II. 객관성 지지 술어 (SUPPORT) - 사실 확인/명시적 출처")
    print("-" * 70)
    for i, (name, _) in enumerate(SUPPORT_PREDICATES.items(), 1):
        print(f"   {i:2d}. {name}")
    print(f"\n   총 {len(SUPPORT_PREDICATES)}개 카테고리")
    print("=" * 70)


if __name__ == "__main__":
    print_pattern_summary()