UPIF-Demo / upif /modules /output_protection.py
yashsecdev's picture
Initial commit: UPIF v0.1.4 and Marketing Demo
5e56bcf
"""
upif.modules.output_protection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The Data Loss Prevention (DLP) layer.
Scans outgoing model responses for Personally Identifiable Information (PII)
and Secrets (API Keys) to prevent leakage.
:copyright: (c) 2025 Yash Dhone.
:license: Proprietary, see LICENSE for details.
"""
import re
from typing import Any, Optional, Dict, List
from upif.core.interfaces import SecurityModule
class OutputShield(SecurityModule):
"""
PII and Secret Redaction Shield.
Targets:
- Email Addresses
- US Phone Numbers
- SSN (Social Security Numbers)
- Generic API Keys (sk-..., gh_-...)
"""
def __init__(self):
# Compiled Regex Patterns for Performance
self.patterns: List[Dict[str, Any]] = [
# Email (Standard RFC-ish)
{
"name": "EMAIL_REDACTED",
"regex": re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
},
# Phone (US Format mostly, simplistic)
{
"name": "PHONE_REDACTED",
"regex": re.compile(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b')
},
# SSN (Simple)
{
"name": "SSN_REDACTED",
"regex": re.compile(r'\b\d{3}-\d{2}-\d{4}\b')
},
# API Keys (Common prefixes)
{
"name": "API_KEY_REDACTED",
"regex": re.compile(r'\b(sk-[a-zA-Z0-9]{20,}|gh[pousr]-[a-zA-Z0-9]{20,})\b')
}
]
def scan(self, content: Any, metadata: Optional[Dict[str, Any]] = None) -> Any:
"""
Redacts Sensitive Info from the content string.
Args:
content (Any): Model response.
Returns:
str: Redacted string (e.g., "Email: [EMAIL_REDACTED]")
"""
if not isinstance(content, str):
return content
sanitized = content
for p in self.patterns:
# Replace found patterns with [NAME]
# Efficient implementation: re.sub scans whole string
sanitized = p["regex"].sub(f"[{p['name']}]", sanitized)
return sanitized