Spaces:
Sleeping
Sleeping
| """ํ๋จ ๊ทผ๊ฑฐ ์ค๋ช ์์ฑ๊ธฐ (Rule-explainer-v1) | |
| ================================================= | |
| ๋ถ๋ฅ๊ธฐ๊ฐ ๋๋ ค์ค ๊ตฌ์กฐํ๋ reasons ๋ฅผ **์์ฐ์ด ์ค๋ช **์ผ๋ก ๋ณํํ๋ค. | |
| SPEC ยง1 ๊ธฐ๋ฅ 4 โ *"๋ถ๋ฅ ๊ฒฐ๊ณผ ์ค๋ช "* ์ ๊ฐํ ๋ฒ์ . | |
| LLM ์ ํธ์ถํ์ง ์๊ณ ํ ํ๋ฆฟ/๋ฌธ์ฅ ์กฐ๋ฆฝ๋ง ์ฌ์ฉ โ ๊ฒฐ์ ์ ์ด๊ณ ๋น ๋ฅด๋ฉฐ ๋น์ฉ 0. | |
| ๋ค๋ง ์ ๋ ฅ์ด ๊ทธ๋๋ก ์์ฐ์ด ๋ฌธ์ฅ์ผ๋ก ๋งคํ๋๋๋ก ์ถฉ๋ถํ ํ๋ถํ ๋ถ๊ธฐ๋ฅผ ๊ฐ๋๋ค. | |
| """ | |
| from __future__ import annotations | |
| EXPLAINER_VERSION = "rule-explainer-v1" | |
| # entity_type โ ํ๊ตญ์ด ์ค๋ช | |
| ENTITY_DESCRIPTIONS = { | |
| "KR_RRN": "์ฃผ๋ฏผ๋ฑ๋ก๋ฒํธ", | |
| "KR_PASSPORT": "์ฌ๊ถ๋ฒํธ", | |
| "KR_BIZ_NO": "์ฌ์ ์๋ฑ๋ก๋ฒํธ", | |
| "KR_PHONE": "ํ๊ตญ ์ ํ๋ฒํธ", | |
| "KR_ADDRESS": "ํ๊ตญ ์ฃผ์", | |
| "PHONE_NUMBER": "์ ํ๋ฒํธ", | |
| "CREDIT_CARD": "์ ์ฉ์นด๋๋ฒํธ", | |
| "US_SSN": "๋ฏธ๊ตญ SSN", | |
| "IBAN_CODE": "IBAN ๊ณ์ข๋ฒํธ", | |
| "AWS_ACCESS_KEY": "AWS ์ก์ธ์ค ํค", | |
| "GENERIC_API_KEY": "API ํค ์ถ์ ํ ํฐ", | |
| "VIP_NAMES": "VIP ๋ช ๋จ ์ด๋ฆ", | |
| "INTERNAL_PROJECTS": "๋ด๋ถ ํ๋ก์ ํธ๋ช ", | |
| "EMAIL_ADDRESS": "์ด๋ฉ์ผ ์ฃผ์", | |
| "IP_ADDRESS": "IP ์ฃผ์", | |
| "URL": "URL", | |
| "PERSON": "์ธ๋ช ", | |
| "LOCATION": "์ง๋ช /์ฅ์", | |
| "ORGANIZATION": "์กฐ์ง๋ช ", | |
| "DATE_TIME": "๋ ์ง/์๊ฐ", | |
| } | |
| def _grade_label(g: str) -> str: | |
| return {"C": "**์ํ (Critical)**", "S": "**๋ฏผ๊ฐ (Sensitive)**", "O": "**๊ณต๊ฐ (Open)**"}.get(g, g) | |
| def _signal_phrase(reason: dict) -> str: | |
| label = reason.get("label", "?") | |
| cnt = reason.get("count", 1) | |
| contrib = reason.get("contribution", 0) | |
| if reason.get("kind") == "keyword": | |
| return f"๋ฑ๊ธ ๋ผ๋ฒจ '{label}' {cnt}ํ ({contrib:+.2f}์ )" | |
| desc = ENTITY_DESCRIPTIONS.get(label, label) | |
| if cnt > 1: | |
| return f"{desc} {cnt}๊ฑด ({contrib:+.2f}์ )" | |
| return f"{desc} ({contrib:+.2f}์ )" | |
| def explain(classification: dict, findings: list[dict] | None = None) -> dict: | |
| """classification + findings โ {summary, narrative, bullets, version}. | |
| Returns: | |
| summary: 1์ค ์์ฝ (๋ฑ๊ธ + ์ ์) | |
| narrative: 2~5๋ฌธ์ฅ ์์ฐ์ด ์ค๋ช (markdown bold ํฌํจ) | |
| bullets: ์ฌ์ฉ์๊ฐ ๋น ๋ฅด๊ฒ ํ์ ์ ์๋ ํค ํฌ์ธํธ ๋ฆฌ์คํธ | |
| version: "rule-explainer-v1" | |
| """ | |
| g = classification.get("grade", "O") | |
| score = classification.get("score", 0.0) | |
| conf = classification.get("confidence", 0.5) | |
| th = classification.get("thresholds", {"C": 5.0, "S": 2.0}) | |
| reasons = classification.get("reasons") or [] | |
| entity_reasons = [r for r in reasons if r.get("kind") == "entity"] | |
| kw_reasons = [r for r in reasons if r.get("kind") == "keyword"] | |
| top = reasons[:3] | |
| # ---- summary (ํ ์ค) ---- | |
| summary = f"{_grade_label(g)} โ score {score} (์ ๋ขฐ๋ {conf*100:.0f}%)" | |
| # ---- narrative (๋ฌธ๋จ) ---- | |
| parts: list[str] = [] | |
| # 1) ๋ฑ๊ธ ๊ฒฐ์ ์ด์ + ๋ง์ง | |
| if g == "C": | |
| margin = score - th["C"] | |
| parts.append( | |
| f"์ด ๋ฌธ์๋ {_grade_label(g)} ๋ฑ๊ธ์ผ๋ก ๋ถ๋ฅ๋ฉ๋๋ค โ " | |
| f"๋์ ์ ์ {score} ๊ฐ C ์๊ณ๊ฐ {th['C']} ๋ฅผ {margin:.2f}์ ์ด๊ณผํ์ต๋๋ค." | |
| ) | |
| elif g == "S": | |
| parts.append( | |
| f"์ด ๋ฌธ์๋ {_grade_label(g)} ๋ฑ๊ธ์ผ๋ก ๋ถ๋ฅ๋ฉ๋๋ค โ " | |
| f"์ ์ {score} ๊ฐ S ์๊ณ๊ฐ {th['S']} ์ C ์๊ณ๊ฐ {th['C']} ์ฌ์ด์ ์์นํฉ๋๋ค." | |
| ) | |
| else: | |
| parts.append( | |
| f"์ด ๋ฌธ์๋ {_grade_label(g)} ๋ฑ๊ธ์ผ๋ก ๋ถ๋ฅ๋ฉ๋๋ค โ " | |
| f"์ ์ {score} ๊ฐ S ์๊ณ๊ฐ {th['S']} ๋ฏธ๋ง์ผ๋ก, ๋ฑ๊ธ์ ์ฌ๋ฆด ๋งํ ์ ํธ๊ฐ ๋ถ์กฑํฉ๋๋ค." | |
| ) | |
| # 2) ๊ฒฐ์ ์ ์ ํธ | |
| if top: | |
| phrases = [_signal_phrase(r) for r in top] | |
| if len(phrases) == 1: | |
| parts.append(f"๊ฒฐ์ ์ ์ ํธ๋ {phrases[0]} ๋จ ํ ๊ฐ์์ต๋๋ค.") | |
| else: | |
| parts.append("๊ฒฐ์ ์ ์ด์๋ ์ ํธ: " + ", ".join(phrases) + ".") | |
| else: | |
| parts.append("๋งค์นญ๋ ์ ํธ๊ฐ ์์ด ์ ์๊ฐ 0์ ๊ฐ๊น์ต๋๋ค.") | |
| # 3) ์ ํธ ๊ตฌ์ฑ ๋ถ์ | |
| if kw_reasons and entity_reasons: | |
| parts.append( | |
| f"๋ฑ๊ธ ๋ผ๋ฒจ ํค์๋ {len(kw_reasons)}์ข ๊ณผ ์๋ณ์ {len(entity_reasons)}์ข ์ด ํจ๊ป ๋งค์นญ๋์ด " | |
| f"๋ฑ๊ธ์ด ๋ ์์ ์ ์ผ๋ก ๊ฒฐ์ ๋์์ต๋๋ค." | |
| ) | |
| elif kw_reasons and not entity_reasons: | |
| parts.append( | |
| "๋ณธ๋ฌธ์ ๋ช ์๋ ๋ฑ๊ธ ๋ผ๋ฒจ(์: ๋์ธ๋น/๊ธฐ๋ฐ) ๋ง์ผ๋ก ๊ฒฐ์ ๋์์ต๋๋ค โ " | |
| "์ค์ ์๋ณ์๊ฐ ์์ ์๋ ์์ผ๋ ์ฌ์ฉ์ ๊ฒํ ๋ฅผ ๊ถ์ฅํฉ๋๋ค." | |
| ) | |
| elif entity_reasons and not kw_reasons: | |
| if g == "C": | |
| parts.append("๋ฑ๊ธ ๋ผ๋ฒจ ํค์๋ ์์ด ์๋ณ์ ๊ฒ์ถ๋ง์ผ๋ก ์ํ ๋ฑ๊ธ์ด ํ์ ๋์์ต๋๋ค.") | |
| elif g == "S": | |
| parts.append("๊ฐ์ธ์ ๋ณด/๊ณ์ ์๋ณ์ ๊ฒ์ถ๋ก ๋ฏผ๊ฐ ๋ฑ๊ธ์ด ๋ถ์ฌ๋์์ต๋๋ค.") | |
| # 4) ํต์ฌ PII ์์ฝ (์์ ๋๋ง) | |
| pii_high = [r for r in entity_reasons if r.get("contribution", 0) >= 2.0] | |
| if pii_high: | |
| names = ", ".join(ENTITY_DESCRIPTIONS.get(r["label"], r["label"]) for r in pii_high) | |
| parts.append(f"๊ณ ์ํ ์๋ณ์: {names}.") | |
| # 5) ์ ๋ขฐ๋ ์ฝ๋ฉํธ | |
| if conf < 0.62: | |
| parts.append( | |
| f"โ ์ ๋ขฐ๋ {conf*100:.0f}% โ ์๊ณ๊ฐ ๊ฒฝ๊ณ์ ๊ฐ๊น์ ์ฌ์ฉ์ ์ต์ข ํ์ธ์ ๊ถ์ฅํฉ๋๋ค." | |
| ) | |
| elif conf > 0.85: | |
| parts.append(f"์ ๋ขฐ๋ {conf*100:.0f}% โ ๋ฑ๊ธ ๊ฒฝ๊ณ์์ ์ถฉ๋ถํ ๋จ์ด์ง ๋ช ํํ ๋งค์นญ.") | |
| narrative = " ".join(parts) | |
| # ---- bullets (๋์๋ณด๋์ฉ) ---- | |
| bullets: list[str] = [] | |
| if top: | |
| for r in top: | |
| bullets.append(_signal_phrase(r)) | |
| bullets.append(f"์ ์ {score} (Sโฅ{th['S']} ยท Cโฅ{th['C']})") | |
| bullets.append(f"์ ๋ขฐ๋ {conf*100:.0f}%") | |
| if not entity_reasons and not kw_reasons: | |
| bullets.append("๋งค์นญ๋ ์ ํธ ์์ โ ๊ธฐ๋ณธ๊ฐ(O)") | |
| return { | |
| "summary": summary, | |
| "narrative": narrative, | |
| "bullets": bullets, | |
| "version": EXPLAINER_VERSION, | |
| } | |