File size: 6,349 Bytes
8485d6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""ํŒ๋‹จ ๊ทผ๊ฑฐ ์„ค๋ช… ์ƒ์„ฑ๊ธฐ (Rule-explainer-v1)
=================================================

๋ถ„๋ฅ˜๊ธฐ๊ฐ€ ๋Œ๋ ค์ค€ ๊ตฌ์กฐํ™”๋œ reasons ๋ฅผ **์ž์—ฐ์–ด ์„ค๋ช…**์œผ๋กœ ๋ณ€ํ™˜ํ•œ๋‹ค.
SPEC ยง1 ๊ธฐ๋Šฅ 4 โ€” *"๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ ์„ค๋ช…"* ์˜ ๊ฐ•ํ™” ๋ฒ„์ „.

LLM ์„ ํ˜ธ์ถœํ•˜์ง€ ์•Š๊ณ  ํ…œํ”Œ๋ฆฟ/๋ฌธ์žฅ ์กฐ๋ฆฝ๋งŒ ์‚ฌ์šฉ โ€” ๊ฒฐ์ •์ ์ด๊ณ  ๋น ๋ฅด๋ฉฐ ๋น„์šฉ 0.
๋‹ค๋งŒ ์ž…๋ ฅ์ด ๊ทธ๋Œ€๋กœ ์ž์—ฐ์–ด ๋ฌธ์žฅ์œผ๋กœ ๋งคํ•‘๋˜๋„๋ก ์ถฉ๋ถ„ํžˆ ํ’๋ถ€ํ•œ ๋ถ„๊ธฐ๋ฅผ ๊ฐ–๋Š”๋‹ค.
"""
from __future__ import annotations

EXPLAINER_VERSION = "rule-explainer-v1"

# entity_type โ†’ ํ•œ๊ตญ์–ด ์„ค๋ช…
ENTITY_DESCRIPTIONS = {
    "KR_RRN":            "์ฃผ๋ฏผ๋“ฑ๋ก๋ฒˆํ˜ธ",
    "KR_PASSPORT":       "์—ฌ๊ถŒ๋ฒˆํ˜ธ",
    "KR_BIZ_NO":         "์‚ฌ์—…์ž๋“ฑ๋ก๋ฒˆํ˜ธ",
    "KR_PHONE":          "ํ•œ๊ตญ ์ „ํ™”๋ฒˆํ˜ธ",
    "KR_ADDRESS":        "ํ•œ๊ตญ ์ฃผ์†Œ",
    "PHONE_NUMBER":      "์ „ํ™”๋ฒˆํ˜ธ",
    "CREDIT_CARD":       "์‹ ์šฉ์นด๋“œ๋ฒˆํ˜ธ",
    "US_SSN":            "๋ฏธ๊ตญ SSN",
    "IBAN_CODE":         "IBAN ๊ณ„์ขŒ๋ฒˆํ˜ธ",
    "AWS_ACCESS_KEY":    "AWS ์•ก์„ธ์Šค ํ‚ค",
    "GENERIC_API_KEY":   "API ํ‚ค ์ถ”์ • ํ† ํฐ",
    "VIP_NAMES":         "VIP ๋ช…๋‹จ ์ด๋ฆ„",
    "INTERNAL_PROJECTS": "๋‚ด๋ถ€ ํ”„๋กœ์ ํŠธ๋ช…",
    "EMAIL_ADDRESS":     "์ด๋ฉ”์ผ ์ฃผ์†Œ",
    "IP_ADDRESS":        "IP ์ฃผ์†Œ",
    "URL":               "URL",
    "PERSON":            "์ธ๋ช…",
    "LOCATION":          "์ง€๋ช…/์žฅ์†Œ",
    "ORGANIZATION":      "์กฐ์ง๋ช…",
    "DATE_TIME":         "๋‚ ์งœ/์‹œ๊ฐ„",
}


def _grade_label(g: str) -> str:
    return {"C": "**์œ„ํ—˜ (Critical)**", "S": "**๋ฏผ๊ฐ (Sensitive)**", "O": "**๊ณต๊ฐœ (Open)**"}.get(g, g)


def _signal_phrase(reason: dict) -> str:
    label = reason.get("label", "?")
    cnt   = reason.get("count", 1)
    contrib = reason.get("contribution", 0)
    if reason.get("kind") == "keyword":
        return f"๋“ฑ๊ธ‰ ๋ผ๋ฒจ '{label}' {cnt}ํšŒ ({contrib:+.2f}์ )"
    desc = ENTITY_DESCRIPTIONS.get(label, label)
    if cnt > 1:
        return f"{desc} {cnt}๊ฑด ({contrib:+.2f}์ )"
    return f"{desc} ({contrib:+.2f}์ )"


def explain(classification: dict, findings: list[dict] | None = None) -> dict:
    """classification + findings โ†’ {summary, narrative, bullets, version}.

    Returns:
        summary:   1์ค„ ์š”์•ฝ (๋“ฑ๊ธ‰ + ์ ์ˆ˜)
        narrative: 2~5๋ฌธ์žฅ ์ž์—ฐ์–ด ์„ค๋ช… (markdown bold ํฌํ•จ)
        bullets:   ์‚ฌ์šฉ์ž๊ฐ€ ๋น ๋ฅด๊ฒŒ ํ›‘์„ ์ˆ˜ ์žˆ๋Š” ํ‚ค ํฌ์ธํŠธ ๋ฆฌ์ŠคํŠธ
        version:   "rule-explainer-v1"
    """
    g     = classification.get("grade", "O")
    score = classification.get("score", 0.0)
    conf  = classification.get("confidence", 0.5)
    th    = classification.get("thresholds", {"C": 5.0, "S": 2.0})
    reasons = classification.get("reasons") or []

    entity_reasons = [r for r in reasons if r.get("kind") == "entity"]
    kw_reasons     = [r for r in reasons if r.get("kind") == "keyword"]
    top = reasons[:3]

    # ---- summary (ํ•œ ์ค„) ----
    summary = f"{_grade_label(g)} โ€” score {score} (์‹ ๋ขฐ๋„ {conf*100:.0f}%)"

    # ---- narrative (๋ฌธ๋‹จ) ----
    parts: list[str] = []

    # 1) ๋“ฑ๊ธ‰ ๊ฒฐ์ • ์ด์œ  + ๋งˆ์ง„
    if g == "C":
        margin = score - th["C"]
        parts.append(
            f"์ด ๋ฌธ์„œ๋Š” {_grade_label(g)} ๋“ฑ๊ธ‰์œผ๋กœ ๋ถ„๋ฅ˜๋ฉ๋‹ˆ๋‹ค โ€” "
            f"๋ˆ„์  ์ ์ˆ˜ {score} ๊ฐ€ C ์ž„๊ณ„๊ฐ’ {th['C']} ๋ฅผ {margin:.2f}์  ์ดˆ๊ณผํ–ˆ์Šต๋‹ˆ๋‹ค."
        )
    elif g == "S":
        parts.append(
            f"์ด ๋ฌธ์„œ๋Š” {_grade_label(g)} ๋“ฑ๊ธ‰์œผ๋กœ ๋ถ„๋ฅ˜๋ฉ๋‹ˆ๋‹ค โ€” "
            f"์ ์ˆ˜ {score} ๊ฐ€ S ์ž„๊ณ„๊ฐ’ {th['S']} ์™€ C ์ž„๊ณ„๊ฐ’ {th['C']} ์‚ฌ์ด์— ์œ„์น˜ํ•ฉ๋‹ˆ๋‹ค."
        )
    else:
        parts.append(
            f"์ด ๋ฌธ์„œ๋Š” {_grade_label(g)} ๋“ฑ๊ธ‰์œผ๋กœ ๋ถ„๋ฅ˜๋ฉ๋‹ˆ๋‹ค โ€” "
            f"์ ์ˆ˜ {score} ๊ฐ€ S ์ž„๊ณ„๊ฐ’ {th['S']} ๋ฏธ๋งŒ์œผ๋กœ, ๋“ฑ๊ธ‰์„ ์˜ฌ๋ฆด ๋งŒํ•œ ์‹ ํ˜ธ๊ฐ€ ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค."
        )

    # 2) ๊ฒฐ์ •์  ์‹ ํ˜ธ
    if top:
        phrases = [_signal_phrase(r) for r in top]
        if len(phrases) == 1:
            parts.append(f"๊ฒฐ์ •์  ์‹ ํ˜ธ๋Š” {phrases[0]} ๋‹จ ํ•œ ๊ฐœ์˜€์Šต๋‹ˆ๋‹ค.")
        else:
            parts.append("๊ฒฐ์ •์ ์ด์—ˆ๋˜ ์‹ ํ˜ธ: " + ", ".join(phrases) + ".")
    else:
        parts.append("๋งค์นญ๋œ ์‹ ํ˜ธ๊ฐ€ ์—†์–ด ์ ์ˆ˜๊ฐ€ 0์— ๊ฐ€๊น์Šต๋‹ˆ๋‹ค.")

    # 3) ์‹ ํ˜ธ ๊ตฌ์„ฑ ๋ถ„์„
    if kw_reasons and entity_reasons:
        parts.append(
            f"๋“ฑ๊ธ‰ ๋ผ๋ฒจ ํ‚ค์›Œ๋“œ {len(kw_reasons)}์ข…๊ณผ ์‹๋ณ„์ž {len(entity_reasons)}์ข…์ด ํ•จ๊ป˜ ๋งค์นญ๋˜์–ด "
            f"๋“ฑ๊ธ‰์ด ๋” ์•ˆ์ •์ ์œผ๋กœ ๊ฒฐ์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
        )
    elif kw_reasons and not entity_reasons:
        parts.append(
            "๋ณธ๋ฌธ์— ๋ช…์‹œ๋œ ๋“ฑ๊ธ‰ ๋ผ๋ฒจ(์˜ˆ: ๋Œ€์™ธ๋น„/๊ธฐ๋ฐ€) ๋งŒ์œผ๋กœ ๊ฒฐ์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค โ€” "
            "์‹ค์ œ ์‹๋ณ„์ž๊ฐ€ ์—†์„ ์ˆ˜๋„ ์žˆ์œผ๋‹ˆ ์‚ฌ์šฉ์ž ๊ฒ€ํ† ๋ฅผ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค."
        )
    elif entity_reasons and not kw_reasons:
        if g == "C":
            parts.append("๋“ฑ๊ธ‰ ๋ผ๋ฒจ ํ‚ค์›Œ๋“œ ์—†์ด ์‹๋ณ„์ž ๊ฒ€์ถœ๋งŒ์œผ๋กœ ์œ„ํ—˜ ๋“ฑ๊ธ‰์ด ํ™•์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
        elif g == "S":
            parts.append("๊ฐœ์ธ์ •๋ณด/๊ณ„์ • ์‹๋ณ„์ž ๊ฒ€์ถœ๋กœ ๋ฏผ๊ฐ ๋“ฑ๊ธ‰์ด ๋ถ€์—ฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")

    # 4) ํ•ต์‹ฌ PII ์š”์•ฝ (์žˆ์„ ๋•Œ๋งŒ)
    pii_high = [r for r in entity_reasons if r.get("contribution", 0) >= 2.0]
    if pii_high:
        names = ", ".join(ENTITY_DESCRIPTIONS.get(r["label"], r["label"]) for r in pii_high)
        parts.append(f"๊ณ ์œ„ํ—˜ ์‹๋ณ„์ž: {names}.")

    # 5) ์‹ ๋ขฐ๋„ ์ฝ”๋ฉ˜ํŠธ
    if conf < 0.62:
        parts.append(
            f"โš  ์‹ ๋ขฐ๋„ {conf*100:.0f}% โ€” ์ž„๊ณ„๊ฐ’ ๊ฒฝ๊ณ„์— ๊ฐ€๊นŒ์›Œ ์‚ฌ์šฉ์ž ์ตœ์ข… ํ™•์ธ์„ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค."
        )
    elif conf > 0.85:
        parts.append(f"์‹ ๋ขฐ๋„ {conf*100:.0f}% โ€” ๋“ฑ๊ธ‰ ๊ฒฝ๊ณ„์—์„œ ์ถฉ๋ถ„ํžˆ ๋–จ์–ด์ง„ ๋ช…ํ™•ํ•œ ๋งค์นญ.")

    narrative = " ".join(parts)

    # ---- bullets (๋Œ€์‹œ๋ณด๋“œ์šฉ) ----
    bullets: list[str] = []
    if top:
        for r in top:
            bullets.append(_signal_phrase(r))
    bullets.append(f"์ ์ˆ˜ {score} (Sโ‰ฅ{th['S']} ยท Cโ‰ฅ{th['C']})")
    bullets.append(f"์‹ ๋ขฐ๋„ {conf*100:.0f}%")
    if not entity_reasons and not kw_reasons:
        bullets.append("๋งค์นญ๋œ ์‹ ํ˜ธ ์—†์Œ โ€” ๊ธฐ๋ณธ๊ฐ’(O)")

    return {
        "summary":   summary,
        "narrative": narrative,
        "bullets":   bullets,
        "version":   EXPLAINER_VERSION,
    }