Spaces:
Build error
Build error
| """ | |
| upif.modules.output_protection | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| The Data Loss Prevention (DLP) layer. | |
| Scans outgoing model responses for Personally Identifiable Information (PII) | |
| and Secrets (API Keys) to prevent leakage. | |
| :copyright: (c) 2025 Yash Dhone. | |
| :license: Proprietary, see LICENSE for details. | |
| """ | |
| import re | |
| from typing import Any, Optional, Dict, List | |
| from upif.core.interfaces import SecurityModule | |
| class OutputShield(SecurityModule): | |
| """ | |
| PII and Secret Redaction Shield. | |
| Targets: | |
| - Email Addresses | |
| - US Phone Numbers | |
| - SSN (Social Security Numbers) | |
| - Generic API Keys (sk-..., gh_-...) | |
| """ | |
| def __init__(self): | |
| # Compiled Regex Patterns for Performance | |
| self.patterns: List[Dict[str, Any]] = [ | |
| # Email (Standard RFC-ish) | |
| { | |
| "name": "EMAIL_REDACTED", | |
| "regex": re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b') | |
| }, | |
| # Phone (US Format mostly, simplistic) | |
| { | |
| "name": "PHONE_REDACTED", | |
| "regex": re.compile(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b') | |
| }, | |
| # SSN (Simple) | |
| { | |
| "name": "SSN_REDACTED", | |
| "regex": re.compile(r'\b\d{3}-\d{2}-\d{4}\b') | |
| }, | |
| # API Keys (Common prefixes) | |
| { | |
| "name": "API_KEY_REDACTED", | |
| "regex": re.compile(r'\b(sk-[a-zA-Z0-9]{20,}|gh[pousr]-[a-zA-Z0-9]{20,})\b') | |
| } | |
| ] | |
| def scan(self, content: Any, metadata: Optional[Dict[str, Any]] = None) -> Any: | |
| """ | |
| Redacts Sensitive Info from the content string. | |
| Args: | |
| content (Any): Model response. | |
| Returns: | |
| str: Redacted string (e.g., "Email: [EMAIL_REDACTED]") | |
| """ | |
| if not isinstance(content, str): | |
| return content | |
| sanitized = content | |
| for p in self.patterns: | |
| # Replace found patterns with [NAME] | |
| # Efficient implementation: re.sub scans whole string | |
| sanitized = p["regex"].sub(f"[{p['name']}]", sanitized) | |
| return sanitized | |