Spaces:
Running
Running
| """ | |
| ํ์ผ/๋ฌธ์ ๊ฐ๋ช ํยท์ต๋ช ํ PoC ํ๋ ์์ํฌ. | |
| ํ์ค ๊ธฐ๋ฐ: | |
| - ISO/IEC 20889:2018 Privacy enhancing data de-identification terminology | |
| and classification of techniques | |
| - ISO/IEC 27559:2022 Privacy enhancing data de-identification framework | |
| ์ฉ์ด ๊ธฐ๋ฐ: | |
| - W3C DPV 2.0 (Data Privacy Vocabulary) โ dpv-pd:* compact IRI | |
| ๊ท์ ๋งคํธ๋ฆญ์ค: | |
| - KR ๊ฐ์ธ์ ๋ณด๋ณดํธ๋ฒ + ๊ฐ์ธ์ ๋ณด๋ณดํธ์์ํ ๊ฐ๋ช ์ ๋ณด ์ฒ๋ฆฌ ๊ฐ์ด๋๋ผ์ธ | |
| - JP ๅไบบๆ ๅ ฑไฟ่ญทๆณ (APPI) + ไปฎๅๅ ๅทฅๆ ๅ ฑใปๅฟๅๅ ๅทฅๆ ๅ ฑใฎไฝๆๅบๆบ | |
| - US HIPAA Safe Harbor (45 CFR ยง164.514(b)(2)) + CCPA/CPRA + NIST SP 800-188 | |
| - EU GDPR Art. 4(5), Recital 26 + EDPB Guidelines 01/2025 + WP29 Op. 05/2014 | |
| ๋ณธ ๋ชจ๋์ PII Scanner ์ ๊ฒ์ถ ๊ฒฐ๊ณผ(findings)๋ฅผ ์ ๋ ฅ์ผ๋ก ๋ฐ์, | |
| ISO 20889 ์ ๊ธฐ๋ฒ ์นดํ๋ก๊ทธ์ ๋ฐ๋ผ ๋ณํ์ ์ ์ฉํ๊ณ | |
| ๊ดํ ๋ณ ์ค์ ์ฌ๋ถ๋ฅผ ํ์ ํฉ๋๋ค (PoC โ ์ค ์ด์ ์ ๋ฒ๋ฌด ๊ฒํ ํ์). | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import hmac | |
| import re | |
| import secrets | |
| from dataclasses import asdict, dataclass, field | |
| from typing import Dict, List, Optional, Tuple | |
| # ========================================================================= | |
| # DPV ๋งคํ โ PII Scanner entity_type โ DPV 2.0 personal data category | |
| # ========================================================================= | |
| # ํ์: entity_type โ (dpv_concept, ํด์ค) | |
| DPV_CATEGORY: Dict[str, Tuple[str, str]] = { | |
| "KR_RRN": ("dpv-pd:NationalIdentificationNumber", "์ง์ ์๋ณ์: ํ๊ตญ ์ฃผ๋ฏผ๋ฑ๋ก๋ฒํธ"), | |
| "KR_PASSPORT": ("dpv-pd:PassportNumber", "์ง์ ์๋ณ์: ํ๊ตญ ์ฌ๊ถ๋ฒํธ"), | |
| "KR_PHONE": ("dpv-pd:TelephoneNumber", "์ง์ ์๋ณ์: ํ๊ตญ ํด๋ํฐ"), | |
| "KR_BIZ_NO": ("dpv-pd:Identifier", "๋ฒ์ธ ์๋ณ์ (์ฌ์ ์๋ฑ๋ก๋ฒํธ)"), | |
| "KR_ADDRESS": ("dpv-pd:HomeAddress", "์ค์๋ณ์: ํ๊ตญ ์ฃผ์"), | |
| "EMAIL_ADDRESS": ("dpv-pd:EmailAddress", "์ง์ ์๋ณ์: ์ด๋ฉ์ผ"), | |
| "PHONE_NUMBER": ("dpv-pd:TelephoneNumber", "์ง์ ์๋ณ์: ์ผ๋ฐ ์ ํ๋ฒํธ"), | |
| "CREDIT_CARD": ("dpv-pd:CreditCardNumber", "๋ฏผ๊ฐ/๊ธ์ต: ์ ์ฉ์นด๋"), | |
| "US_SSN": ("dpv-pd:NationalIdentificationNumber", "์ง์ ์๋ณ์: ๋ฏธ๊ตญ SSN"), | |
| "URL": ("dpv-pd:URL", "์ค์๋ณ์(๊ฐ๋ฅ): URL"), | |
| "IP_ADDRESS": ("dpv-pd:IPAddress", "์ค์๋ณ์/Tracking"), | |
| "IBAN_CODE": ("dpv-pd:BankAccount", "๋ฏผ๊ฐ/๊ธ์ต: IBAN"), | |
| "VIP_PERSON": ("dpv-pd:Name", "์ง์ ์๋ณ์: ์์ฐ์ธ ์ด๋ฆ"), | |
| "INTERNAL_PROJECT": ("dpv:NonPersonalData", "๋ด๋ถ ์ฝ๋๋ช (PII ์๋)"), | |
| "AWS_ACCESS_KEY": ("dpv:NonPersonalData", "๋น๋ฐ: ์๊ฒฉ์ฆ๋ช "), | |
| "GENERIC_API_KEY": ("dpv:NonPersonalData", "๋น๋ฐ: ์๊ฒฉ์ฆ๋ช ํ๋ณด"), | |
| # ์ผ๋ณธ PII โ APPI / ใใคใใณใใผๆณ | |
| "JP_MY_NUMBER": ("dpv-pd:NationalIdentificationNumber", "ๅไบบ่ญๅฅ็ฌฆๅท: ใใคใใณใใผ (12์๋ฆฌ). ๋ณ๋๋ฒ(ใใคใใณใใผๆณ) ์ ์ฉ"), | |
| "JP_PASSPORT": ("dpv-pd:PassportNumber", "ๅไบบ่ญๅฅ็ฌฆๅท: ์ผ๋ณธ ์ฌ๊ถ๋ฒํธ"), | |
| "JP_DRIVERS_LICENSE": ("dpv-pd:Identifier", "ๅไบบ่ญๅฅ็ฌฆๅท: ์ด์ ๋ฉดํ๋ฒํธ (12์๋ฆฌ)"), | |
| "JP_PHONE": ("dpv-pd:TelephoneNumber", "์ง์ ์๋ณ์: ์ผ๋ณธ ์ ํ๋ฒํธ"), | |
| "JP_POSTAL_CODE": ("dpv-pd:HomeAddress", "์ค์๋ณ์: ์ผ๋ณธ ์ฐํธ๋ฒํธ"), | |
| "JP_ADDRESS": ("dpv-pd:HomeAddress", "์ค์๋ณ์: ์ผ๋ณธ ์ฃผ์"), | |
| "JP_CORPORATE_NUMBER": ("dpv:NonPersonalData", "๋ฒ์ธ๋ฒํธ (13์๋ฆฌ) โ ๅไบบๆ ๅ ฑ ์๋"), | |
| "JP_BANK_ACCOUNT": ("dpv-pd:BankAccount", "๋ฏผ๊ฐ/๊ธ์ต: ์ผ๋ณธ ์ํ๊ตฌ์ข"), | |
| } | |
| # ========================================================================= | |
| # ๊ดํ ๋ณ ๊ท์ ๋งคํธ๋ฆญ์ค | |
| # ========================================================================= | |
| # ๊ฐ ๊ดํ ์์ entity_type ์ ์ด๋ป๊ฒ ๋ถ๋ฅํ๋์ง(direct/quasi/sensitive/secret) | |
| JURISDICTION: Dict[str, Dict] = { | |
| "KR": { | |
| "name": "๋ํ๋ฏผ๊ตญ", | |
| "law": "๊ฐ์ธ์ ๋ณด๋ณดํธ๋ฒ ยง2ยทยง28-2~7 ยท ๊ฐ๋ช ์ ๋ณด ์ฒ๋ฆฌ ๊ฐ์ด๋๋ผ์ธ (PIPC)", | |
| "url": "https://www.pipc.go.kr/", | |
| "notes": ( | |
| "์ง์ ์๋ณ์๋ ๊ฐ๋ช ํ ์ ๋น๊ฐ์ญ์ ๋ณํ ํ์. ์ถ๊ฐ์ ๋ณด(๋งคํ ํค ๋ฑ)๋ " | |
| "๋ณ๋ ๋ถ๋ฆฌ ๋ณด๊ด. ์ค์๋ณ์ ์กฐํฉ์ผ๋ก ์ฌ์๋ณ ๊ฐ๋ฅ์ฑ์ด ๋ฎ์์ผ ํจ " | |
| "(k-์ต๋ช ์ฑยทl-๋ค์์ฑ ๋ฑ ์ ์ ์ฑ ๊ฒํ ). ์ต๋ช ์ ๋ณด๋ ์ด๋ค ์ถ๊ฐ์ ๋ณด๋ก๋ " | |
| "๋ณต์ ๋ถ๊ฐ๋ฅํด์ผ ํจ (ยง2 ์ 1ํธ์2)." | |
| ), | |
| "direct": ["KR_RRN", "KR_PASSPORT", "EMAIL_ADDRESS", "PHONE_NUMBER", "KR_PHONE", "VIP_PERSON", "US_SSN"], | |
| "quasi": ["KR_ADDRESS", "KR_BIZ_NO", "IP_ADDRESS", "URL"], | |
| "sensitive":["CREDIT_CARD", "IBAN_CODE"], | |
| "secret": ["AWS_ACCESS_KEY", "GENERIC_API_KEY"], | |
| }, | |
| "JP": { | |
| "name": "ๆฅๆฌ", | |
| "law": "ๅไบบๆ ๅ ฑไฟ่ญทๆณ (APPI) ยง2ยทยง16-2ยทยง35-2ยทยง43 / ใใคใใณใใผๆณ ยง3ยทยง19 / PPCใไปฎๅๅ ๅทฅๆ ๅ ฑใปๅฟๅๅ ๅทฅๆ ๅ ฑใฎไฝๆๅบๆบใ", | |
| "url": "https://www.ppc.go.jp/", | |
| "notes": ( | |
| "ๅไบบ่ญๅฅ็ฌฆๅท(ใใคใใณใใผยทๆ ๅธยท้่ปขๅ ่จฑยทๆ็ด ๋ฑ)๋ ไปฎๅๅ ๅทฅ ์ " | |
| "ๅ้ค ๋๋ ๅพฉๅ ไธๅฏ่ฝๅ. ่ฆ้ ๆ ฎๅไบบๆ ๅ ฑ(๋ณ๋ ฅยท๋ฒ์ฃ๊ฒฝ๋ ฅ ๋ฑ)๋ ์ฌ์ " | |
| "๋์ ํ์. ใใคใใณใใผใฏ็นๅฎๅไบบๆ ๅ ฑใจใใฆๅฅ้ๅณๆ ผ่ฆๅถ(" | |
| "ๅฉ็จ็ฎ็้ๅฎยทๆๅทๅๅฟ ้ ). ไปฎๅๅ ๅทฅๆ ๅ ฑ๋ ์ถ๊ฐ์ ๋ณด ๋ณ๋ ๊ด๋ฆฌ ์ " | |
| "์๋ณ ๊ฐ๋ฅ. ๅฟๅๅ ๅทฅๆ ๅ ฑ๋ ๅพฉๅ ไธๅฏ่ฝ + ๅ ๅทฅๆนๆณ ์ ๋ณด ๋ณด์กด ํ์." | |
| ), | |
| # ๅไบบ่ญๅฅ็ฌฆๅท (ๆฟไปค์ด๊ฑฐ) โ ๋จ๋ ์ผ๋ก ๊ฐ์ธ์ ๋ณด, ์ง์ ์๋ณ์ | |
| "direct": ["JP_MY_NUMBER", "JP_PASSPORT", "JP_DRIVERS_LICENSE", | |
| "EMAIL_ADDRESS", "JP_PHONE", "PHONE_NUMBER", "VIP_PERSON"], | |
| "quasi": ["JP_ADDRESS", "JP_POSTAL_CODE", "IP_ADDRESS", "URL"], | |
| # APPI ์ ่ฆ้ ๆ ฎๅไบบๆ ๅ ฑ โ ๋ณ๋ ์ตํธ์ธ ๋์ ํ์ (๋ณ๋ ๋ผ๋ฒจ) | |
| "sensitive_appi": [], # ์๋ฃ/๋ณ๋ ฅ ๊ฒ์ถ๊ธฐ ์ถ๊ฐ ์ ์ฌ๊ธฐ ํ์ฅ | |
| "sensitive": ["CREDIT_CARD", "JP_BANK_ACCOUNT", "IBAN_CODE"], | |
| "secret": ["AWS_ACCESS_KEY", "GENERIC_API_KEY"], | |
| # ใใคใใณใใผๆณ ยง ์ ์ฉ โ ๋ฌด์กฐ๊ฑด suppress ๊ฐ์ + ๋ฏธ์ฒ๋ฆฌ ์ verdict insufficient | |
| "my_number_act": ["JP_MY_NUMBER"], | |
| }, | |
| "US": { | |
| "name": "United States", | |
| "law": "HIPAA Safe Harbor (45 CFR ยง164.514(b)(2)) ยท CCPA/CPRA ยท NIST SP 800-188", | |
| "url": "https://www.hhs.gov/hipaa/", | |
| "notes": ( | |
| "HIPAA Safe Harbor 18๊ฐ์ง ์๋ณ์ ๋ชจ๋ ์ ๊ฑฐ/์ผ๋ฐํ โ ์ด๋ฆยท์ฃผ์ " | |
| "(์ 3์๋ฆฌ ZIP ๋ง ๊ฐ๋ฅ, ์ธ๊ตฌ โฅ20,000), ๋ชจ๋ ๋ ์ง(์ฐ๋๋ง), ์ ํยทํฉ์คยท" | |
| "์ด๋ฉ์ผยทSSNยท๊ณ์ ยท์ธ์ฆ์ยท์ฐจ๋ยท๊ธฐ๊ธฐยทURLยทIPยท์์ฒด์ ๋ณดยท์ฌ์ง. CCPA " | |
| "deidentified data ๋ 'cannot reasonably identify' + ๊ธฐ์ ยท๊ณ์ฝ ํต์ ." | |
| ), | |
| "direct": ["KR_RRN", "KR_PASSPORT", "EMAIL_ADDRESS", "PHONE_NUMBER", "KR_PHONE", "VIP_PERSON", "US_SSN", "URL", "IP_ADDRESS", "CREDIT_CARD", "IBAN_CODE"], | |
| "quasi": ["KR_ADDRESS", "KR_BIZ_NO"], | |
| "sensitive":[], | |
| "secret": ["AWS_ACCESS_KEY", "GENERIC_API_KEY"], | |
| }, | |
| "EU": { | |
| "name": "European Union", | |
| "law": "GDPR Art. 4(5), Recital 26 ยท EDPB Guidelines 01/2025 on Pseudonymisation ยท WP29 Op. 05/2014", | |
| "url": "https://edpb.europa.eu/", | |
| "notes": ( | |
| "Pseudonymisation = ์ถ๊ฐ์ ๋ณด๋ฅผ ๋ณ๋๋ก ๋ณด๊ดํ๊ณ ๊ธฐ์ ยท์กฐ์ง์ ์กฐ์น๋ก " | |
| "์ฌ์๋ณ์ ์ฐจ๋จ(Art 4(5)). Anonymisation = ์ด๋ค ํฉ๋ฆฌ์ ์๋จ์ผ๋ก๋ " | |
| "์ฌ์๋ณ ๋ถ๊ฐ๋ฅ (Recital 26). Special categories(Art 9: ๊ฑด๊ฐยท์์ฒดยท" | |
| "๋ฏผ์กฑ ๋ฑ) ์ ์ถ๊ฐ ๋ณดํธ. Singling-out, linkability, inference 3๊ฐ " | |
| "๋ฆฌ์คํฌ๊ฐ ๋ชจ๋ ์ ๊ฑฐ๋์ด์ผ ์ต๋ช ." | |
| ), | |
| "direct": ["KR_RRN", "KR_PASSPORT", "EMAIL_ADDRESS", "PHONE_NUMBER", "KR_PHONE", "VIP_PERSON", "US_SSN", "IP_ADDRESS"], | |
| "quasi": ["KR_ADDRESS", "KR_BIZ_NO", "URL"], | |
| "sensitive":["CREDIT_CARD", "IBAN_CODE"], | |
| "secret": ["AWS_ACCESS_KEY", "GENERIC_API_KEY"], | |
| }, | |
| } | |
| # ========================================================================= | |
| # ๊ถ์ฅ ๊ธฐ๋ฒ (ISO/IEC 20889 ๋ถ๋ฅ ์ธ์ฉ) | |
| # ========================================================================= | |
| TECHNIQUE: Dict[str, Tuple[str, str, str]] = { | |
| # entity โ (technique_id, ISO 20889 ์ธ์ฉ, DPV ํํ) | |
| "KR_RRN": ("tokenize_random", "ISO 20889 ยง8.4 Tokenization (random) โ ์ง์ ์๋ณ์, ๋งคํ ํค๋ ๋ถ๋ฆฌ ๋ณด๊ด", "dpv:Pseudonymisation+dpv:Tokenisation"), | |
| "KR_PASSPORT": ("tokenize_random", "ISO 20889 ยง8.4 Tokenization", "dpv:Pseudonymisation+dpv:Tokenisation"), | |
| "KR_PHONE": ("mask_partial", "ISO 20889 ยง7.5 Masking โ ์ 3 / ๋ค 4 ์ ์ง", "dpv:DataMasking"), | |
| "PHONE_NUMBER": ("mask_partial", "ISO 20889 ยง7.5 Masking", "dpv:DataMasking"), | |
| "EMAIL_ADDRESS": ("hash_local_keep_domain", "ISO 20889 ยง8.4 Cryptographic โ local ๋ถ๋ถ HMAC-BLAKE2b, ๋๋ฉ์ธ ์ ์ง", "dpv:Pseudonymisation+dpv:Encryption"), | |
| "KR_ADDRESS": ("generalize_to_city", "ISO 20889 ยง7.2 Generalization โ ์ยท๋ ๋จ์๊น์ง ์ผ๋ฐํ (HIPAA SH ยง164.514(b)(2)(i)(B) ์ ์ ํฉ)", "dpv:Generalisation"), | |
| "CREDIT_CARD": ("mask_pan", "PCI-DSS Req 3.4 โ ์ 6 / ๋ค 4 ์ ์ง, ์ค๊ฐ ๋ง์คํน", "dpv:DataMasking"), | |
| "US_SSN": ("tokenize_random", "์ง์ ์๋ณ์ โ ๋น๊ฐ์ญ ํ ํฐ", "dpv:Pseudonymisation+dpv:Tokenisation"), | |
| "IBAN_CODE": ("mask_partial", "ISO 20889 ยง7.5 Masking โ ๊ตญ๊ฐ์ฝ๋+์ฒดํฌ๋์งํธ ์ ์ง, ๊ณ์ข๋ถ ์ค๊ฐ ๋ง์คํน", "dpv:DataMasking"), | |
| "VIP_PERSON": ("pseudonym_consistent", "ISO 20889 ยง8.4 Pseudonymisation โ ๋์ผ์ธ ์ผ๊ด ๋งคํ(HMAC)", "dpv:Pseudonymisation"), | |
| "AWS_ACCESS_KEY": ("suppress", "๋น๋ฐ ์๊ฒฉ์ฆ๋ช โ ์ฆ์ ํ์ (rotate) + ์์ ์ ๊ฑฐ", "dpv:Erasure"), | |
| "GENERIC_API_KEY": ("suppress", "๋น๋ฐ ํ๋ณด โ ๋ณด์์ ์์ ์ ๊ฑฐ", "dpv:Erasure"), | |
| "INTERNAL_PROJECT": ("tokenize_random", "๋ด๋ถ ์ฝ๋๋ช โ ์ธ๋ถ ๋ ธ์ถ ์ ๋ฌด์์ ํ ํฐ", "dpv:Pseudonymisation"), | |
| "KR_BIZ_NO": ("mask_partial", "๋ฒ์ธ ID โ ๋ถ๋ถ ๋ง์คํน (์ ์ฒด ๋น์๋ณ ์์๋ ํ ํฐํ)", "dpv:DataMasking"), | |
| "URL": ("generalize_url", "ISO 20889 ยง7.2 โ ํธ์คํธ๋ง ์ ์ง, ๊ฒฝ๋ก/์ฟผ๋ฆฌ ์ ๊ฑฐ", "dpv:Generalisation"), | |
| "IP_ADDRESS": ("ip_truncate", "ISO 20889 ยง7.2 โ IPv4 ๋ง์ง๋ง ์ฅํ ์ ๋จ (/24)", "dpv:Generalisation"), | |
| # ์ผ๋ณธ PII ๊ถ์ฅ ๊ธฐ๋ฒ | |
| "JP_MY_NUMBER": ("suppress", "ใใคใใณใใผๆณ ยง19 ๅฉ็จ็ฎ็ๅคไฟ็ฎก็ฆๆญข โ ไปฎๅๅ ๅทฅยทๅฟๅๅ ๅทฅ ๋ชจ๋ ์์ ์ ๊ฑฐ", "dpv:Erasure"), | |
| "JP_PASSPORT": ("tokenize_random", "ISO 20889 ยง8.4 โ ๅไบบ่ญๅฅ็ฌฆๅท (ๆ ๅธ็ชๅท) ๋น๊ฐ์ญ ํ ํฐํ", "dpv:Pseudonymisation+dpv:Tokenisation"), | |
| "JP_DRIVERS_LICENSE": ("tokenize_random", "ISO 20889 ยง8.4 โ ๅไบบ่ญๅฅ็ฌฆๅท (ๅ ฌๅฎๅงๅกไผ็บ่ก็ชๅท) ๋น๊ฐ์ญ ํ ํฐํ", "dpv:Pseudonymisation+dpv:Tokenisation"), | |
| "JP_PHONE": ("mask_partial", "ISO 20889 ยง7.5 โ ์ 3 / ๋ค 4 ์ ์ง (ํ๊ตญ KR_PHONE ๊ณผ ๋์ผ ์ ์ฑ )", "dpv:DataMasking"), | |
| "JP_POSTAL_CODE": ("generalize_postal", "์ 3์๋ฆฌ (ํ์ ๊ตฌ์ญ ๋จ์) ๋ง ์ ์ง โ HIPAA SH ์ ZIP3 ์ ํฉ", "dpv:Generalisation"), | |
| "JP_ADDRESS": ("generalize_to_city", "ISO 20889 ยง7.2 โ ้ฝ้ๅบ็ยทๅธๅบ็บๆ ๋จ์๊น์ง ์ผ๋ฐํ", "dpv:Generalisation"), | |
| "JP_CORPORATE_NUMBER": ("identity", "๋ฒ์ธ๋ฒํธ๋ ๅไบบๆ ๅ ฑ ์๋ (ๆณไบบ็จๆณ ยง10-3 ๅ ฌ้) โ ๊ธฐ๋ณธ ๋ณด์กด", "dpv:NonPersonalData"), | |
| "JP_BANK_ACCOUNT": ("mask_partial", "ISO 20889 ยง7.5 โ ๆซๅฐพ4ๆก ์ ์ง, ๋ณธ๋ฌธ ๋ง์คํน", "dpv:DataMasking"), | |
| } | |
| # ========================================================================= | |
| # ๋ฐ์ดํฐ ํด๋์ค | |
| # ========================================================================= | |
| class EntityRecord: | |
| index: int | |
| entity_type: str | |
| original: str | |
| start: int | |
| end: int | |
| score: float | |
| dpv_concept: str | |
| dpv_note: str | |
| technique: str | |
| technique_note: str | |
| technique_dpv: str | |
| transformed: str | |
| classifications: Dict[str, str] # {jurisdiction: 'direct'|'quasi'|'sensitive'|'secret'|'unmapped'} | |
| class ComplianceVerdict: | |
| jurisdiction: str | |
| name: str | |
| law: str | |
| url: str | |
| notes: str | |
| treatment_level: str # 'pseudonymization' | 'anonymization' | |
| counts: Dict[str, int] # direct/quasi/sensitive/secret | |
| untreated: List[str] | |
| verdict: str # 'compliant' | 'partial' | 'insufficient' | |
| rationale: str | |
| requirements_met: List[str] | |
| requirements_pending: List[str] | |
| # ========================================================================= | |
| # ๋ณํ๊ธฐ (ISO 20889 ๊ธฐ๋ฒ๋ณ) | |
| # ========================================================================= | |
| class Pseudonymizer: | |
| """๋ณํ ๊ธฐ๋ฒ ๋ชจ์. salt ์ ๋งคํ ํ ์ด๋ธ์ ์ธ์คํด์ค ๋ด์ ๋ณด๊ด โ ์ค ์ด์ ์ | |
| ์ด ๋งคํ์ด 'additional information' (GDPR Art 4(5)) ์ ํด๋นํ๋ฏ๋ก ๋ณ๋ KMSยทHSM | |
| ๋ณด๊ด ๊ถ์ฅ. PoC ์์๋ ๋ฉ๋ชจ๋ฆฌ ๋ด.""" | |
| def __init__(self, salt: Optional[bytes] = None, anonymize: bool = False): | |
| self.salt = salt or secrets.token_bytes(16) | |
| self.anonymize = anonymize # True ๋ฉด ์ผ๊ด์ฑ ๋งคํ๋ ๋ | |
| self.consistent: Dict[Tuple[str, str], str] = {} | |
| self._counter: Dict[str, int] = {} | |
| def transform(self, entity_type: str, value: str) -> Tuple[str, str, str, str]: | |
| """๋ฐํ: (๋ณํ๋ฌธ, technique_id, ISO 20889 ๋ ธํธ, DPV ํํ)""" | |
| tech_id, note, dpv = TECHNIQUE.get( | |
| entity_type, | |
| ("suppress", "๊ธฐ๋ณธ ์ ์ฑ โ ๋ฏธ์ง์ ์ํฐํฐ๋ ๋ณด์์ ์ผ๋ก ์ ๊ฑฐ", "dpv:Erasure"), | |
| ) | |
| method = getattr(self, f"_{tech_id}", self._suppress) | |
| return method(entity_type, value), tech_id, note, dpv | |
| # ---- ๊ธฐ๋ฒ ๊ตฌํ ---- | |
| def _suppress(self, et, v): | |
| return "[REDACTED]" | |
| def _tokenize_random(self, et, v): | |
| if not self.anonymize: | |
| key = (et, v) | |
| if key in self.consistent: | |
| return self.consistent[key] | |
| n = self._counter.get(et, 0) + 1 | |
| self._counter[et] = n | |
| token = f"<{et}_{n:04d}>" | |
| if not self.anonymize: | |
| self.consistent[(et, v)] = token | |
| return token | |
| def _pseudonym_consistent(self, et, v): | |
| if self.anonymize: | |
| # ์ต๋ช ํ: ๋์ผ์ฑ๋ ๋ณด์กดํ์ง ์์ โ ๋งค๋ฒ ์์ ๊ฐ | |
| return f"<PERSON_{secrets.token_hex(3).upper()}>" | |
| key = (et, v) | |
| if key in self.consistent: | |
| return self.consistent[key] | |
| h = hmac.new(self.salt, v.encode("utf-8"), hashlib.blake2b).hexdigest()[:8] | |
| token = f"<PERSON_{h.upper()}>" | |
| self.consistent[key] = token | |
| return token | |
| def _mask_partial(self, et, v): | |
| digit_pos = [i for i, c in enumerate(v) if c.isdigit()] | |
| if len(digit_pos) < 7: | |
| return self._suppress(et, v) | |
| keep_front, keep_back = 3, 4 | |
| masked = set(digit_pos[keep_front : len(digit_pos) - keep_back]) | |
| return "".join("*" if i in masked else c for i, c in enumerate(v)) | |
| def _mask_pan(self, et, v): | |
| digit_pos = [i for i, c in enumerate(v) if c.isdigit()] | |
| if len(digit_pos) < 13: | |
| return self._suppress(et, v) | |
| masked = set(digit_pos[6 : len(digit_pos) - 4]) | |
| return "".join("*" if i in masked else c for i, c in enumerate(v)) | |
| def _hash_local_keep_domain(self, et, v): | |
| if "@" not in v: | |
| return self._suppress(et, v) | |
| local, domain = v.split("@", 1) | |
| h = hmac.new(self.salt, local.encode("utf-8"), hashlib.blake2b).hexdigest()[:8] | |
| return f"user-{h}@{domain}" | |
| _KR_PROVINCES = ( | |
| "์์ธ", "๋ถ์ฐ", "๋๊ตฌ", "์ธ์ฒ", "๊ด์ฃผ", "๋์ ", "์ธ์ฐ", "์ธ์ข ", | |
| "๊ฒฝ๊ธฐ", "๊ฐ์", "์ถฉ๋ถ", "์ถฉ๋จ", "์ ๋ถ", "์ ๋จ", "๊ฒฝ๋ถ", "๊ฒฝ๋จ", "์ ์ฃผ", | |
| ) | |
| _JP_PROVINCES = ( | |
| "ๆฑไบฌ้ฝ", "ไบฌ้ฝๅบ", "ๅคง้ชๅบ", "ๅๆตท้", | |
| "็ฅๅฅๅท็", "ๅผ็็", "ๅ่็", "่จๅ็", "ๆ ๆจ็", "็พค้ฆฌ็", | |
| "ๆ็ฅ็", "ๅฒ้็", "ไธ้็", "้ๅฒก็", | |
| "ๅ ตๅบซ็", "ๅฅ่ฏ็", "ๅๆญๅฑฑ็", "ๆป่ณ็", | |
| "็ฆๅฒก็", "ไฝ่ณ็", "้ทๅด็", "็ๆฌ็", "ๅคงๅ็", "ๅฎฎๅด็", "้นฟๅ ๅณถ็", "ๆฒ็ธ็", | |
| "ๅฎฎๅ็", "็ฆๅณถ็", "ๅฑฑๅฝข็", "็ง็ฐ็", "ๅฒฉๆ็", "้ๆฃฎ็", | |
| "ๆฐๆฝ็", "ๅฏๅฑฑ็", "็ณๅท็", "็ฆไบ็", "ๅฑฑๆขจ็", "้ท้็", | |
| "้ณฅๅ็", "ๅณถๆ น็", "ๅฒกๅฑฑ็", "ๅบๅณถ็", "ๅฑฑๅฃ็", | |
| "ๅพณๅณถ็", "้ฆๅท็", "ๆๅช็", "้ซ็ฅ็", | |
| ) | |
| def _generalize_to_city(self, et, v): | |
| # JP ์ฐ์ (๊ธด ๋งค์น) | |
| for p in self._JP_PROVINCES: | |
| if v.startswith(p): | |
| return f"{p} (ไปฅไธไธ่ฌๅ)" | |
| for p in self._KR_PROVINCES: | |
| if v.startswith(p): | |
| return f"{p} (์ดํ ์ผ๋ฐํ)" | |
| return self._suppress(et, v) | |
| def _generalize_postal(self, et, v): | |
| """์ผ๋ณธ ์ฐํธ๋ฒํธ โ ์ 3์๋ฆฌ๋ง ์ ์ง (์ง์ญ ๋ถ๋ฅ ๋จ์).""" | |
| m = re.search(r"(\d{3})-?\d{4}", v) | |
| if m: | |
| return f"ใ{m.group(1)}-****" | |
| return self._suppress(et, v) | |
| def _identity(self, et, v): | |
| """๋ณํ ์์ โ ๅไบบๆ ๅ ฑ ๊ฐ ์๋ ํญ๋ชฉ (๋ฒ์ธ๋ฒํธ ๋ฑ).""" | |
| return v | |
| def _ip_truncate(self, et, v): | |
| m = re.match(r"(\d+)\.(\d+)\.(\d+)\.\d+", v) | |
| if m: | |
| return f"{m.group(1)}.{m.group(2)}.{m.group(3)}.0/24" | |
| return self._suppress(et, v) | |
| def _generalize_url(self, et, v): | |
| m = re.match(r"(https?://[^/]+)", v, flags=re.IGNORECASE) | |
| if m: | |
| return f"{m.group(1)}/[โฆ]" | |
| return v | |
| # ========================================================================= | |
| # ๋ถ๋ฅ / ํ๊ฐ ํจ์ | |
| # ========================================================================= | |
| def classify_entity(entity_type: str, jurisdictions: List[str]) -> Dict[str, str]: | |
| out: Dict[str, str] = {} | |
| for j in jurisdictions: | |
| rules = JURISDICTION.get(j) | |
| if not rules: | |
| out[j] = "unmapped" | |
| continue | |
| # JP ์ ์ฉ โ ใใคใใณใใผๆณ ์ ์ฉ ํญ๋ชฉ ์ฐ์ (direct ๋ณด๋ค ๋ ์๊ฒฉํ ๋ถ๋ฅ) | |
| if entity_type in rules.get("my_number_act", []): | |
| out[j] = "my_number_act" | |
| elif entity_type in rules.get("sensitive_appi", []): | |
| out[j] = "sensitive_appi" | |
| elif entity_type in rules.get("direct", []): | |
| out[j] = "direct" | |
| elif entity_type in rules.get("quasi", []): | |
| out[j] = "quasi" | |
| elif entity_type in rules.get("sensitive", []): | |
| out[j] = "sensitive" | |
| elif entity_type in rules.get("secret", []): | |
| out[j] = "secret" | |
| else: | |
| out[j] = "unmapped" | |
| return out | |
| def _requirements_per_jurisdiction(j: str, level: str) -> List[str]: | |
| """๊ดํ /์ฒ๋ฆฌ์์ค๋ณ ํต์ฌ ์๊ตฌ์ฌํญ ์ฒดํฌ๋ฆฌ์คํธ.""" | |
| common = [ | |
| "์ง์ ์๋ณ์ ๋ชจ๋ ๋ณํ/์ ๊ฑฐ", | |
| "๋ฏผ๊ฐ์ ๋ณด(๊ธ์ตยท๊ฑด๊ฐ ๋ฑ) ๋ง์คํน/์ ๊ฑฐ", | |
| "๋น๋ฐ ์๊ฒฉ์ฆ๋ช ์์ ์ ๊ฑฐ", | |
| ] | |
| by_level = { | |
| "pseudonymization": [ | |
| "์ถ๊ฐ์ ๋ณด(๋งคํยทํค) ๋ณ๋ ๋ถ๋ฆฌ ๋ณด๊ด", | |
| "์ฌ์๋ณ ์๋ ๋ฐฉ์ง ๊ธฐ์ ยท์กฐ์ง์ ์กฐ์น", | |
| ], | |
| "anonymization": [ | |
| "์ถ๊ฐ์ ๋ณด๋ฅผ ํฌํจํ ์ด๋ค ํฉ๋ฆฌ์ ์๋จ์ผ๋ก๋ ์ฌ์๋ณ ๋ถ๊ฐ", | |
| "์ค์๋ณ์ ์กฐํฉ ์ฌ์๋ณ ์ํ ๊ฒ์ฆ (k-์ต๋ช ์ฑ ๋ฑ)", | |
| "๊ฒฐ๊ณผ์ ๋ถํฌยท์ผ๋ฐํ ์์ค ํต๊ณ์ ๊ฒ์ฆ", | |
| ], | |
| } | |
| j_specific = { | |
| "KR": ["๊ฐ๋ช ์ ๋ณด ์ ์ ์ฑ ๊ฒํ (๊ฐ๋ช ์ ๋ณด ์ฒ๋ฆฌ ๊ฐ์ด๋๋ผ์ธ)"] if level == "pseudonymization" | |
| else ["์ ์ ์ฑ ํ๊ฐ + ์ถ๊ฐ์ ๋ณด ํ๊ธฐ"], | |
| "JP": ["ไปฎๅๅ ๅทฅๆ ๅ ฑใฎๅฎๅ จ็ฎก็ๆช็ฝฎ (APPI ยง35-2)"] if level == "pseudonymization" | |
| else ["ๅฟๅๅ ๅทฅๆ ๅ ฑใฎไฝๆๆนๆณ็ญใฎๅ ฌ่กจ (APPI ยง43)"], | |
| "US": ["HIPAA Safe Harbor 18 ์๋ณ์ ํญ๋ชฉ ๋ชจ๋ ์ฒ๋ฆฌ", | |
| "Expert Determination ํธ๋ ์ ํต๊ณ ์ ๋ฌธ๊ฐ ๊ฒ์ฆ"], | |
| "EU": ["EDPB 01/2025 โ singling-out / linkability / inference 3๊ฐ ์ํ ํ๊ฐ", | |
| "Art 32 ์ ์ ํ ๋ณด์์กฐ์น"], | |
| } | |
| return common + by_level[level] + j_specific.get(j, []) | |
| def evaluate_compliance( | |
| entities: List[EntityRecord], | |
| jurisdictions: List[str], | |
| treatment_level: str, | |
| ) -> List[ComplianceVerdict]: | |
| out: List[ComplianceVerdict] = [] | |
| for j in jurisdictions: | |
| rules = JURISDICTION.get(j) | |
| if not rules: | |
| continue | |
| counts = {"direct": 0, "quasi": 0, "sensitive": 0, "secret": 0, | |
| "sensitive_appi": 0, "my_number_act": 0, "unmapped": 0} | |
| untreated: List[str] = [] | |
| # ใใคใใณใใผๆณ โ 'suppress' ๊ธฐ๋ฒ(=[REDACTED]) ์ด์ธ๋ ๋ชจ๋ ์๋ฐ | |
| my_number_violations: List[str] = [] | |
| # ่ฆ้ ๆ ฎๅไบบๆ ๅ ฑ โ ์ตํธ์ธ ๋์ ์ฌ๋ถ๋ ์๋ ํ์ ๋ถ๊ฐ โ pending ์ผ๋ก ๋์ | |
| sensitive_appi_present: List[str] = [] | |
| for e in entities: | |
| cls = e.classifications.get(j, "unmapped") | |
| counts[cls] = counts.get(cls, 0) + 1 | |
| if cls in ("direct", "secret", "sensitive", "sensitive_appi", "my_number_act") \ | |
| and e.transformed == e.original: | |
| untreated.append(e.entity_type) | |
| if cls == "my_number_act" and e.technique != "suppress": | |
| my_number_violations.append(e.entity_type) | |
| if cls == "sensitive_appi": | |
| sensitive_appi_present.append(e.entity_type) | |
| reqs = _requirements_per_jurisdiction(j, treatment_level) | |
| # ๋จ์ ํด๋ฆฌ์คํฑ ํ์ (PoC) | |
| # ์ฐ์ ใใคใใณใใผๆณ ์๋ฐ ๊ฒ์ฌ (๋ค๋ฅธ ๋ชจ๋ ํ์ ์ฐ์ ) | |
| if my_number_violations: | |
| verdict = "insufficient" | |
| rationale = ( | |
| f"ใใคใใณใใผๆณ ยง19 ์๋ฐ: {', '.join(sorted(set(my_number_violations)))} โ " | |
| "๋ง์ด๋๋ฒ๋ ๅฉ็จ็ฎ็ๅคไฟ็ฎก็ฆๆญข โ ๋ง์คํน/ํ ํฐํ ๋ถ๊ฐ, ์์ ์ ๊ฑฐ(suppress) ํ์." | |
| ) | |
| met = ["๊ฒ์ถ/๋ถ๋ฅ ์๋ฃ"] | |
| pending = [ | |
| "JP_MY_NUMBER ์ technique ์ 'suppress' ๋ก ๋ณ๊ฒฝ (์์ ์ ๊ฑฐ)", | |
| "ใใคใใณใใผๆณ ยง12 ๅฎๅ จ็ฎก็ๆช็ฝฎ โ ๆๅทๅยท์ ๊ทผํต์ ์ฆ๋น", | |
| "็ฎ็ๅคๅฉ็จยทๆไพ็ฆๆญข (ๆณ ยง20)", | |
| ] + reqs | |
| elif untreated: | |
| verdict = "insufficient" | |
| rationale = ( | |
| f"๋ฏธ์ฒ๋ฆฌ ์ง์ /๋ฏผ๊ฐ/๋น๋ฐ ํญ๋ชฉ ์กด์ฌ: {', '.join(sorted(set(untreated)))} โ " | |
| f"{rules['name']} ๊ธฐ์ค {treatment_level} ๋ฏธ์ถฉ์กฑ." | |
| ) | |
| met = ["๊ฒ์ถ/๋ถ๋ฅ ์๋ฃ"] | |
| pending = ["๋ฏธ์ฒ๋ฆฌ ํญ๋ชฉ ๋ณํ"] + reqs | |
| elif treatment_level == "anonymization": | |
| # ์ต๋ช : ์ค์๋ณ์ ๋ค์ ์์กด ์ ๋ถ๋ถ (์๋ ๊ฒ์ฆ ํ์) | |
| quasi_n = counts.get("quasi", 0) | |
| if quasi_n >= 2: | |
| verdict = "partial" | |
| rationale = ( | |
| f"์ค์๋ณ์ {quasi_n}๊ฐ โ ์กฐํฉ ์ฌ์๋ณ ์ํ. " | |
| "k-์ต๋ช ์ฑยทl-๋ค์์ฑยทt-๊ทผ์ ์ฑ ๋ฑ ์ ๋ ๊ฒ์ฆ ํ์." | |
| ) | |
| met = [ | |
| "์ง์ ยท๋ฏผ๊ฐยท๋น๋ฐ ๋ชจ๋ ๋ณํ", | |
| "PII ๊ฒ์ถยท๋ถ๋ฅยทDPV ๋งคํ ์๋ฃ", | |
| ] | |
| pending = [ | |
| "์ค์๋ณ์ ์กฐํฉ์ ๋ํ ์ฌ์๋ณ ์ํ ์ ๋ ํ๊ฐ", | |
| "์ถ๊ฐ์ ๋ณด(๋งคํ) ํ๊ธฐ ์ ์ฐจ", | |
| ] | |
| else: | |
| verdict = "compliant" | |
| rationale = ( | |
| "์ง์ ยท๋ฏผ๊ฐยท๋น๋ฐ ๋ชจ๋ ๋ณํ๋์๊ณ ์ค์๋ณ์ ์ผ๋ฐํ ์ ์ฉ. " | |
| "๋จ PoC ํด๋ฆฌ์คํฑ โ ์ค ์ด์ ์ ํต๊ณ์ ์ ์ ์ฑ ๊ฒํ ํ์." | |
| ) | |
| met = [ | |
| "์ง์ ยท๋ฏผ๊ฐยท๋น๋ฐ ๋ชจ๋ ๋ณํ", | |
| "์ค์๋ณ์ ์ผ๋ฐํ/์ ๊ฑฐ", | |
| "DPV ๋งคํ + ๊ดํ ๋ถ๋ฅ ๋ช ์ธํ", | |
| ] | |
| pending = [ | |
| "์ถ๊ฐ์ ๋ณด(๋งคํ) ํ๊ธฐ ๋๋ ๋ถ๋ฆฌ ํ๊ธฐ ์ฆ๋น", | |
| "ํต๊ณ์ ์ฌ์๋ณ ์ํ ํ๊ฐ ๋ณด๊ณ ์", | |
| ] | |
| else: | |
| # ๊ฐ๋ช ํ โ ์ถ๊ฐ์ ๋ณด ๋ถ๋ฆฌ ๋ณด๊ด ์ ์ ๋ก ์ผ๋จ compliant | |
| verdict = "compliant" | |
| n_treated = counts["direct"] + counts["sensitive"] + counts["secret"] | |
| rationale = ( | |
| f"์ง์ ยท๋ฏผ๊ฐยท๋น๋ฐ {n_treated}๊ฑด ๋ชจ๋ ๋ณํ ์๋ฃ. ๋งคํ ํ ์ด๋ธ์ " | |
| "๋ณธ PoC ๊ฐ ๋ฉ๋ชจ๋ฆฌ์ ๋ณด๊ด โ ์ค ์ด์ ์ KMS/HSM ๋ถ๋ฆฌ ๋ณด๊ด ํ์." | |
| ) | |
| met = [ | |
| "์ง์ ์๋ณ์ ๋ชจ๋ ๋ณํ/์ ๊ฑฐ", | |
| "๋ฏผ๊ฐ์ ๋ณด ๋ง์คํน/์ ๊ฑฐ", | |
| "๋น๋ฐ ์๊ฒฉ์ฆ๋ช ์์ ์ ๊ฑฐ", | |
| "DPV ๋งคํ + ์ฒ๋ฆฌ ํ๋ฆ ๋ฌธ์ํ", | |
| ] | |
| pending = [ | |
| "๋งคํ ํ ์ด๋ธ์ ๋ณ๋ ๋ณด๊ด (KMSยทHSM)", | |
| "์ฌ์๋ณ ์๋ ๋ฐฉ์ง ๊ธฐ์ ยท์กฐ์ง์ ์กฐ์น (Art 32 / APPI ๅฎๅ จ็ฎก็)", | |
| "(KR) ๊ฐ๋ช ์ ๋ณด ์ ์ ์ฑ ๊ฒํ ", | |
| ] | |
| # KR ๋ง์ ๊ฐ์ด๋๋ผ์ธ ์ ์ ์ฑ ๊ฒํ ๊ฐ์กฐ | |
| if j == "KR": | |
| pending.append("๊ฐ๋ช ์ ๋ณด ์ฒ๋ฆฌ ๊ฐ์ด๋๋ผ์ธ ยงIII-3 ์ ์ ์ฑ ๊ฒํ ์์ํ ์์ฌ๋ก") | |
| # JP ่ฆ้ ๆ ฎๅไบบๆ ๅ ฑ โ ์ตํธ์ธ ๋์ ์๋ ๊ฒ์ฆ ๋ถ๊ฐ | |
| if j == "JP" and sensitive_appi_present: | |
| pending.append( | |
| f"่ฆ้ ๆ ฎๅไบบๆ ๅ ฑ {len(set(sensitive_appi_present))}๊ฑด โ " | |
| "APPI ยง20 ์ฌ์ ์ตํธ์ธ ๋์ ํ๋ณด ์ฆ๋น ํ์" | |
| ) | |
| out.append(ComplianceVerdict( | |
| jurisdiction=j, | |
| name=rules["name"], | |
| law=rules["law"], | |
| url=rules["url"], | |
| notes=rules["notes"], | |
| treatment_level=treatment_level, | |
| counts={k: v for k, v in counts.items() if k != "unmapped"}, | |
| untreated=sorted(set(untreated)), | |
| verdict=verdict, | |
| rationale=rationale, | |
| requirements_met=met, | |
| requirements_pending=pending, | |
| )) | |
| return out | |
| # ========================================================================= | |
| # ๋ฉ์ธ ์ง์ ์ โ ํ ์คํธ + findings โ ๋ณํ ํ ์คํธ + ํ๊ฐ | |
| # ========================================================================= | |
| def run( | |
| text: str, | |
| findings: List[Dict], | |
| jurisdictions: List[str], | |
| treatment_level: str, | |
| salt: Optional[bytes] = None, | |
| ) -> Dict: | |
| """text ์ PII Scanner findings ๋ฅผ ๋ฐ์ ๋ณํ + ํ๊ฐ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ.""" | |
| if treatment_level not in ("pseudonymization", "anonymization"): | |
| treatment_level = "pseudonymization" | |
| valid = [j for j in jurisdictions if j in JURISDICTION] | |
| if not valid: | |
| valid = list(JURISDICTION.keys()) | |
| pz = Pseudonymizer(salt=salt, anonymize=(treatment_level == "anonymization")) | |
| # ์์น ์ค๋ฆ์ฐจ์ ์ ๋ ฌ ํ ๋น๊ฒน์นจ ์ ํ | |
| sorted_findings = sorted(findings, key=lambda f: (f["start"], -f.get("score", 0))) | |
| chosen = [] | |
| last_end = -1 | |
| for f in sorted_findings: | |
| if f["start"] >= last_end: | |
| chosen.append(f) | |
| last_end = f["end"] | |
| records: List[EntityRecord] = [] | |
| for i, f in enumerate(chosen): | |
| et = f["entity_type"] | |
| original = f["text"] | |
| dpv_concept, dpv_note = DPV_CATEGORY.get(et, ("dpv:NonPersonalData", "DPV ๋งคํ ์์")) | |
| transformed, tech_id, tech_note, tech_dpv = pz.transform(et, original) | |
| classifications = classify_entity(et, valid) | |
| records.append(EntityRecord( | |
| index=i + 1, | |
| entity_type=et, | |
| original=original, | |
| start=f["start"], | |
| end=f["end"], | |
| score=float(f.get("score", 0)), | |
| dpv_concept=dpv_concept, | |
| dpv_note=dpv_note, | |
| technique=tech_id, | |
| technique_note=tech_note, | |
| technique_dpv=tech_dpv, | |
| transformed=transformed, | |
| classifications=classifications, | |
| )) | |
| # ๋โ์ ์ผ๋ก ์นํ (์ธ๋ฑ์ค ์ ์ง) | |
| out_text = text | |
| for r in sorted(records, key=lambda r: r.start, reverse=True): | |
| out_text = out_text[: r.start] + r.transformed + out_text[r.end :] | |
| verdicts = evaluate_compliance(records, valid, treatment_level) | |
| return { | |
| "treatment_level": treatment_level, | |
| "jurisdictions": valid, | |
| "original_text": text, | |
| "transformed_text": out_text, | |
| "entities": [asdict(r) for r in records], | |
| "verdicts": [asdict(v) for v in verdicts], | |
| # ๋งคํ ํ ์ด๋ธ (PoC ์์ฐ์ฉ โ ์ค ์ด์ ์ ์ ๋ ์๋ต์ ํฌํจ ๊ธ์ง) | |
| "mapping_demo": [ | |
| {"entity_type": k[0], "original": k[1], "token": v} | |
| for k, v in pz.consistent.items() | |
| ], | |
| } | |