Spaces:
Sleeping
Sleeping
File size: 30,692 Bytes
7803723 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 |
"""
언론 보도 객관성 측정을 위한 무주체 피동형 술어 정규표현식 (v2)
================================================================================
I. 객관성 의심 술어 (DOUBT): 기자 의견으로 여겨질 수 있는 무주체 주관적 술어
II. 객관성 지지 술어 (SUPPORT): 사실 확인/명시적 출처 기반 술어
* 무주체 피동형 술어: 발언/판단의 주체가 문장에 없어 기자 의견으로 읽힐 수 있는 표현
"""
import re
from typing import Dict, Pattern, List, Any
from korean_sentence_splitter import KoreanSentenceSplitter
# =============================================================================
# I. 객관성 의심 술어 (DOUBT):
# =============================================================================
DOUBT_PREDICATES: Dict[str, Pattern] = {
# =========================================================================
# 1. 분석/해석형
# =========================================================================
"분석형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"분석(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"풀이(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"해석(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"진단(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"읽(?:힌다|힙니다|혔다|혔습니다|히고\s*있다|히고\s*있습니다)|"
# --- 명사형 술어: 분석/풀이/해석/진단 ---
r"(?:분석|풀이|해석|진단)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:분석|풀이|해석|진단)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:분석|풀이|해석|진단)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:분석|풀이|해석|진단)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다|이\s*지배적이다|이\s*지배적입니다)"
r")"
),
# =========================================================================
# 2. 전망/예측형
# =========================================================================
"전망형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"전망(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"예상(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"예측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"점쳐(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
r"예견(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
# --- 명사형 술어 ---
r"(?:전망|예상|예측)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:전망|예상|예측|관측)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:전망|예상|예측|관측)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다|이\s*우세하다|이\s*우세합니다|이\s*지배적이다|이\s*지배적입니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:전망|예상|예측|관측)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다)"
r")"
),
# =========================================================================
# 3. 관측/추정형
# =========================================================================
"관측형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"관측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"추정(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"추측(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"짐작(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
# --- 명사형 술어 ---
r"(?:관측|추정|추측)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:관측|추정|추측)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:관측|추정|추측)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다)"
r")"
),
# =========================================================================
# 4. 전언/보도형
# =========================================================================
"전언형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"알려(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
r"전해(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
r"보도(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"전달(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
# 것으로 + 전언
r"것으로\s*(?:알려졌다|알려졌습니다|알려집니다|알려지고\s*있다|알려지고\s*있습니다|전해졌다|전해졌습니다|전해집니다|전해지고\s*있다|전해지고\s*있습니다)|"
# --- 명사형 술어 ---
r"(?:소식|보도|소문)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:소식|보도|소문)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는)?\s*(?:소식|보도)(?:이\s*전해졌다|이\s*전해졌습니다|이\s*전해지고\s*있다|이\s*전해지고\s*있습니다|이\s*들려온다|이\s*들려옵니다|이\s*들려왔다|이\s*들려왔습니다)|"
# ~라는 겁니다/것입니다
r"(?:라는|다는)\s*(?:겁니다|것입니다|것이다|얘기다|얘기입니다|이야기다|이야기입니다)"
r")"
),
# =========================================================================
# 5. 평가/판단형
# =========================================================================
"평가형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"평가(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
r"판단(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"인식(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"간주(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"여겨(?:진다|집니다|졌다|졌습니다|지고\s*있다|지고\s*있습니다)|"
# --- 명사형 술어 ---
r"(?:평가|판단|인식)(?:다|이다|입니다|였다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:평가|판단|인식)(?:다|이다|입니다|였다|이었다|이었습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:평가|판단|인식)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:평가|판단)(?:가\s*나온다|가\s*나옵니다|가\s*나오고\s*있다|가\s*나오고\s*있습니다|가\s*나왔다|가\s*나왔습니다|이\s*나온다|이\s*나옵니다)|"
r"(?:라는|다는)?\s*(?:평가|판단)(?:를\s*받고\s*있다|를\s*받고\s*있습니다|를\s*받았다|를\s*받았습니다|를\s*받는다|를\s*받습니다)"
r")"
),
# =========================================================================
# 6. 비판/지적형
# =========================================================================
"비판형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"비판(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
r"비난(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
r"지적(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다|받고\s*있다|받고\s*있습니다|받았다|받았습니다|받는다|받습니다)|"
# --- 명사형 술어 ---
r"(?:비판|비난|지적)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:비판|비난|지적)(?:이다|입니다|이었다|이었습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:비판|비난|지적)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다|도\s*제기됐다|도\s*제기됐습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:비판|비난|지적)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다|이\s*제기됐다|이\s*제기됐습니다|이\s*제기되고\s*있다|이\s*제기되고\s*있습니다|이\s*쏟아지고\s*있다|이\s*쏟아지고\s*있습니다|이\s*쏟아졌다|이\s*쏟아졌습니다|이\s*잇따르고\s*있다|이\s*잇따르고\s*있습니다|이\s*잇따랐다|이\s*잇따랐습니다)|"
r"(?:비판|비난|지적)(?:을\s*받고\s*있다|을\s*받고\s*있습니다|을\s*받았다|을\s*받았습니다|을\s*받는다|을\s*받습니다|을\s*면치\s*못하고\s*있다|을\s*면치\s*못하고\s*있습니다)"
r")"
),
# =========================================================================
# 7. 제기/거론형
# =========================================================================
"제기형": re.compile(
r"(?:"
r"제기(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"거론(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"언급(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"지목(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"논의(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"검토(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
r"거명(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)"
r")"
),
# =========================================================================
# 8. 우려/의혹형
# =========================================================================
"우려형": re.compile(
r"(?:"
# 우려
r"우려(?:가|도|를)?\s*(?:있다|있습니다|나온다|나옵니다|나오고\s*있다|나오고\s*있습니다|나왔다|나왔습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|커지고\s*있다|커지고\s*있습니다|커졌다|커졌습니다|낳고\s*있다|낳고\s*있습니다|낳았다|낳았습니다|높아지고\s*있다|높아지고\s*있습니다)|"
# 의혹
r"의혹(?:이|을|도)?\s*(?:있다|있습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|불거졌다|불거졌습니다|불거지고\s*있다|불거지고\s*있습니다|일고\s*있다|일고\s*있습니다|일었다|일었습니다|사고\s*있다|사고\s*있습니다|샀다|샀습니다|증폭되고\s*있다|증폭되고\s*있습니다|확산되고\s*있다|확산되고\s*있습니다)|"
# 논란
r"논란(?:이|도)?\s*(?:있다|있습니다|일고\s*있다|일고\s*있습니다|일었다|일었습니다|되고\s*있다|되고\s*있습니다|됐다|됐습니다|불거졌다|불거졌습니다|불거지고\s*있다|불거지고\s*있습니다|예상된다|예상됩니다|이어지고\s*있다|이어지고\s*있습니다)|"
# 의문
r"의문(?:이|도)?\s*(?:있다|있습니다|제기됐다|제기됐습니다|제기되고\s*있다|제기되고\s*있습니다|일고\s*있다|일고\s*있습니다|남는다|남습니다|남아\s*있다|남아\s*있습니다|든다|듭니다)|"
# 명사형
r"(?:우려|의혹|논란|의문)(?:다|이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:우려|의혹|논란|의문)(?:다|이다|입니다|이었다|이었습니다)|"
r"(?:라는|다는)?\s*(?:우려|의혹|논란|의문)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*제기됐다|도\s*제기됐습니다)"
r")"
),
# =========================================================================
# 9. 가능성/여지형
# =========================================================================
"가능성형": re.compile(
r"(?:"
r"가능성(?:이|도|을)?\s*(?:있다|있습니다|크다|큽니다|높다|높습니다|낮다|낮습니다|제기됐다|제기됐습니다|거론되고\s*있다|거론되고\s*있습니다|점쳐지고\s*있다|점쳐지고\s*있습니다|배제할\s*수\s*없다|배제할\s*수\s*없습니다|열려\s*있다|열려\s*있습니다|열렸다|열렸습니다|제기된다|제기됩니다|나온다|나옵니다)|"
r"개연성(?:이|도)?\s*(?:있다|있습니다|크다|큽니다|높다|높습니다|낮다|낮습니다)|"
r"여지(?:가|도|를)?\s*(?:있다|있습니다|남아\s*있다|남아\s*있습니다|남겨져\s*있다|남겨져\s*있습니다|남는다|남습니다|남겼다|남겼습니다)|"
r"(?:라는|다는)?\s*(?:가능성|개연성|여지)(?:도\s*있다|도\s*있습니다|이\s*제기됐다|이\s*제기됐습니다|이\s*나온다|이\s*나옵니다)"
r")"
),
# =========================================================================
# 10. 분위기/목소리형
# =========================================================================
"분위기형": re.compile(
r"(?:"
# 분위기
r"분위기(?:다|이다|입니다|이었다|이었습니다|가\s*감지되고\s*있다|가\s*감지되고\s*있습니다|가\s*형성되고\s*있다|가\s*형성되고\s*있습니다|가\s*확산되고\s*있다|가\s*확산되고\s*있습니다|가\s*팽배하다|가\s*팽배합니다|가\s*역력하다|가\s*역력합니다)|"
# 목소리
r"목소리(?:가|도)?\s*(?:나온다|나옵니다|나오고\s*있다|나오고\s*있습니다|나왔다|나왔습니다|높아지고\s*있다|높아지고\s*있습니다|높아졌다|높아졌습니다|커지고\s*있다|커지고\s*있습니다|커졌다|커졌습니다|있다|있습니다)|"
# 기대
r"기대(?:가|를|도)?\s*(?:모아지고\s*있다|모아지고\s*있습니다|모이고\s*있다|모이고\s*있습니다|높아지고\s*있다|높아지고\s*있습니다|커지고\s*있다|커지고\s*있습니다|크다|큽니다|높다|높습니다)|"
# 관심/이목
r"(?:관심|이목)(?:이|을)?\s*(?:쏠리고\s*있다|쏠리고\s*있습니다|쏠렸다|쏠렸습니다|집중되고\s*있다|집중되고\s*있습니다|집중됐다|집중됐습니다|모아지고\s*있다|모아지고\s*있습니다)|"
# 기류/조짐/흐름
r"(?:기류|조짐|흐름|양상)(?:이|가)?\s*(?:감지되고\s*있다|감지되고\s*있습니다|감지됐다|감지됐습니다|포착되고\s*있다|포착되고\s*있습니다|나타나고\s*있다|나타나고\s*있습니다)"
r")"
),
# =========================================================================
# 11. 주장/입장형
# =========================================================================
"주장형": re.compile(
r"(?:"
# --- 피동형 술어 ---
r"주장(?:된다|됩니다|됐다|됐습니다|되고\s*있다|되고\s*있습니다)|"
# --- 명사형 술어 ---
r"(?:주장|입장|방침|계획|생각|확신|설명|해명)(?:이다|입니다|이었다|이었습니다|이에요)|"
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:주장|입장|방침|계획|생각|확신|설명|해명)(?:이다|입니다|이었다|이었습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:주장|입장)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|도\s*나오고\s*있다|도\s*나오고\s*있습니다)|"
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:주장|입장|설명|해명)(?:이\s*나온다|이\s*나옵니다|이\s*나오고\s*있다|이\s*나오고\s*있습니다|이\s*나왔다|이\s*나왔습니다)"
r")"
),
# =========================================================================
# 12. 시각/견해형
# =========================================================================
"시각형": re.compile(
r"(?:"
# 단독 사용
r"(?:시각|견해|관점|자평)(?:이다|다|입니다|이었다|였다|이었습니다|이에요)|"
# ~라는 + 명사
r"(?:라는|다는|이라는|란|는|은|인)\s*(?:시각|견해|관점|인식|자평)(?:이다|다|입니다|이었다|였다|이었습니다)|"
# 명사 + 도 있다/지배적이다
r"(?:라는|다는|이라는|란|는|은|인)?\s*(?:시각|견해|관점|인식)(?:도\s*있다|도\s*있습니다|도\s*나온다|도\s*나옵니다|이\s*있다|이\s*있습니다|이\s*나온다|이\s*나옵니다|이\s*지배적이다|이\s*지배적입니다|가\s*지배적이다|가\s*지배적입니다|이\s*우세하다|이\s*우세합니다|가\s*우세하다|가\s*우세합니다)"
r")"
),
# =========================================================================
# 13. 격찬/혹평형 (극단적 평가)
# =========================================================================
"격찬형": re.compile(
r"(?:"
r"(?:격찬|찬사|호평)(?:이|을)?\s*(?:쏟아졌다|쏟아졌습니다|쏟아지고\s*있다|쏟아지고\s*있습니다|이어졌다|이어졌습니다|이어지고\s*있다|이어지고\s*있습니다|나왔다|나왔습니다|나오고\s*있다|나오고\s*있습니다|받았다|받았습니다|받고\s*있다|받고\s*있습니다)|"
r"(?:혹평|악평)(?:이|을)?\s*(?:쏟아졌다|쏟아졌습니다|쏟아지고\s*있다|쏟아지고\s*있습니다|이어졌다|이어졌습니다|이어지고\s*있다|이어지고\s*있습니다|나왔다|나왔습니다|나오고\s*있다|나오고\s*있습니다|받았다|받았습니다|받고\s*있다|받고\s*있습니다)|"
r"(?:라는|다는)?\s*(?:격찬|찬사|호평|혹평|악평)(?:이다|입니다|이었다|이었습니다)"
r")"
),
# =========================================================================
# 15. 관용표현형
# =========================================================================
"관용표현형": re.compile(
r"(?:"
# ~한 셈이다
r"[은는인된한했던]\s*셈(?:이다|입니다|이에요|이었다|이었습니다)|"
r"셈(?:이다|입니다|이에요|이었다|이었습니다)|"
# ~해야 할 판이다
r"(?:해야\s*할|하게\s*된|하게\s*됐)\s*판(?:이다|입니다|이에요)|"
# ~로 보인다/여겨진다/비춰진다
r"(?:으로|로)\s*(?:보인다|보입니다|보여진다|보여집니다|보이고\s*있다|보이고\s*있습니다)|"
r"(?:으로|로)\s*(?:여겨진다|여겨집집니다|여겨지고\s*있다|여겨지고\s*있습니다)|"
r"(?:으로|로)\s*(?:비춰진다|비춰집니다|비쳐진다|비쳐집니다|비쳐지고\s*있다|비쳐지고\s*있습니다)|"
r"(?:으로|로)\s*(?:받아들여지고\s*있다|받아들여지고\s*있습니다|받아들여진다|받아들여집니다|받아들여졌다|받아들여졌습니다)|"
# ~것 아니냐는/~지 않겠느냐는
r"(?:는|은)\s*것\s*아니(?:냐는|냐고|겠냐는|냐며)|"
r"(?:지|치)\s*않(?:겠느냐는|을까\s*하는|을까\s*싶은|느냐는)|"
# ~가 아닌가 싶다
r"(?:가|이)\s*아닌가\s*(?:싶다|싶습니다|하는|하다|합니다)|"
# ~를 짐작하게/케 한다
r"(?:을|를)?\s*짐작(?:하게|케)\s*(?:한다|합니다|했다|했습니다)"
r")"
),
# =========================================================================
# 16. 완화표현형 (Hedges)
# =========================================================================
"완화표현형": re.compile(
r"(?:"
# 것으로 + 술어
r"것으로\s*(?:보인다|보입니다|보여진다|보여집니다)|"
r"것으로\s*(?:추정된다|추정됩니다|추정되고\s*있다|추정되고\s*있습니다)|"
r"것으로\s*(?:판단된다|판단됩니다|판단되고\s*있다|판단되고\s*있습니다)|"
r"것으로\s*(?:분석된다|분석됩니다|분석되고\s*있다|분석되고\s*있습니다)|"
r"것으로\s*(?:예상된다|예상됩니다|예상되고\s*있다|예상되고\s*있습니다)|"
r"것으로\s*(?:전망된다|전망됩니다|전망되고\s*있다|전망되고\s*있습니다)|"
r"것으로\s*(?:관측된다|관측됩니다|관측되고\s*있다|관측되고\s*있습니다)|"
r"것으로\s*(?:평가된다|평가됩니다|평가되고\s*있다|평가되고\s*있습니다)|"
r"것으로\s*(?:풀이된다|풀이됩니다|풀이되고\s*있다|풀이되고\s*있습니다)|"
r"것으로\s*(?:해석된다|해석됩니다|해석되고\s*있다|해석되고\s*있습니다)|"
r"것으로\s*(?:파악된다|파악됩니다|파악되고\s*있다|파악되고\s*있습니다)|"
r"것으로\s*(?:나타났다|나타났습니다|나타나고\s*있다|나타나고\s*있습니다)|"
# 듯 + 술어
r"듯\s*(?:보인다|보입니다|하다|합니다|싶다|싶습니다)|"
# ~지도 모른다
r"[을를]지도?\s*모른(?:다|릅니다)|"
# ~ㄹ 것 같다
r"[을를]\s*것\s*같(?:다|습니다)"
r")"
),
}
# =============================================================================
# II. 객관성 지지 술어 (SUPPORT):
# =============================================================================
SUPPORT_PREDICATES: Dict[str, Pattern] = {
# =========================================================================
# 1. 확인/검증형
# =========================================================================
"확인형": re.compile(
r"(?:"
# 확인/밝혀지다/드러나다
r"확인(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"밝혀(?:졌다|졌습니다|진다|집니다|지고\s*있다|지고\s*있습니다)|"
r"드러(?:났다|났습니다|난다|납니다|나고\s*있다|나고\s*있습니다)|"
r"판명(?:됐다|됐습니다|된다|됩니다|났다|났습니다)|"
r"입증(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
r"규명(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
r")"
),
# =========================================================================
# 2. 발견/탐지형
# =========================================================================
"발견형": re.compile(
r"(?:"
r"발견(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"발각(?:됐다|됐습니다|된다|됩니다)|"
r"적발(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
r"포착(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
r"감지(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다)|"
r"파악(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)"
r")"
),
# =========================================================================
# 3. 기록/집계형
# =========================================================================
"기록형": re.compile(
r"(?:"
r"기록(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"집계(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"조사(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"측정(?:됐다|됐습니다|된다|됩니다|되고\s*있다|되고\s*있습니다|했다|했습니다|한다|합니다)|"
r"나타(?:났다|났습니다|난다|납니다)"
r")"
),
}
# =============================================================================
# 유틸리티 함수
# =============================================================================
def analyze_objectivity(text: str) -> Dict[str, Any]:
"""텍스트를 문장 단위로 나누어 객관성 의심/지지 요소를 분석"""
splitter = KoreanSentenceSplitter()
sentences = splitter.split(text)
doubt_predicates = []
support_predicates = []
doubt_sentences = []
support_sentences = []
doubt_sentence_count = 0
support_sentence_count = 0
for sent in sentences:
sent_doubt_matches = []
sent_support_matches = []
# 중복 방지를 위한 스팬(Span) 기반 체크
doubt_spans = {}
support_spans = {}
for _, pattern in DOUBT_PREDICATES.items():
for match in pattern.finditer(sent):
m_text = match.group(0).strip()
if m_text:
doubt_spans[match.span()] = m_text
for _, pattern in SUPPORT_PREDICATES.items():
for match in pattern.finditer(sent):
m_text = match.group(0).strip()
if m_text:
support_spans[match.span()] = m_text
# 문장 내 추출 결과 정리
if doubt_spans:
sorted_doubt = [doubt_spans[s] for s in sorted(doubt_spans.keys())]
doubt_predicates.extend(sorted_doubt)
doubt_sentences.append(sent)
doubt_sentence_count += 1
if support_spans:
sorted_support = [support_spans[s] for s in sorted(support_spans.keys())]
support_predicates.extend(sorted_support)
support_sentences.append(sent)
support_sentence_count += 1
total_sentences = doubt_sentence_count + support_sentence_count
return {
"doubt_predicates": doubt_predicates,
"support_predicates": support_predicates,
"doubt_sentences": doubt_sentences,
"support_sentences": support_sentences,
"doubt_count": doubt_sentence_count,
"support_count": support_sentence_count,
"objectivity_ratio": round(support_sentence_count / total_sentences, 4) if total_sentences > 0 else None
}
def find_doubt_predicates(text: str) -> Dict[str, List[str]]:
"""객관성 의심 술어만 반환"""
results = {}
for category, pattern in DOUBT_PREDICATES.items():
matches = pattern.findall(text)
if matches:
results[category] = matches
return results
def find_support_predicates(text: str) -> Dict[str, List[str]]:
"""객관성 지지 술어만 반환"""
results = {}
for category, pattern in SUPPORT_PREDICATES.items():
matches = pattern.findall(text)
if matches:
results[category] = matches
return results
def print_pattern_summary():
"""패턴 요약 출력"""
print("=" * 70)
print("언론 보도 객관성 측정용 무주체 술어 정규표현식 v2")
print("=" * 70)
print()
print("I. 객관성 의심 술어 (DOUBT) - 무주체 주관적 술어")
print("-" * 70)
for i, (name, _) in enumerate(DOUBT_PREDICATES.items(), 1):
print(f" {i:2d}. {name}")
print(f"\n 총 {len(DOUBT_PREDICATES)}개 카테고리")
print()
print("II. 객관성 지지 술어 (SUPPORT) - 사실 확인/명시적 출처")
print("-" * 70)
for i, (name, _) in enumerate(SUPPORT_PREDICATES.items(), 1):
print(f" {i:2d}. {name}")
print(f"\n 총 {len(SUPPORT_PREDICATES)}개 카테고리")
print("=" * 70)
if __name__ == "__main__":
print_pattern_summary()
|