Spaces:
Paused
Paused
File size: 10,682 Bytes
4a2ab42 71af2e7 4a2ab42 71af2e7 4a2ab42 71af2e7 4a2ab42 71af2e7 4a2ab42 71af2e7 4a2ab42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | import contextlib
import json
import logging
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
class PIIScrubber:
"""PII detection and scrubbing utilities"""
# PII patterns
EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
PHONE_PATTERN = re.compile(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b")
SSN_PATTERN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
CREDIT_CARD_PATTERN = re.compile(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")
IP_ADDRESS_PATTERN = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
# Additional patterns for sensitive data
BANK_ACCOUNT_PATTERN = re.compile(r"\b\d{8,17}\b")
PASSPORT_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{7,8}\b")
DRIVER_LICENSE_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{6,8}\b")
@staticmethod
def scrub_pii(text: str, mask_char: str = "***") -> str:
"""Scrub PII from text using patterns"""
if not text or not isinstance(text, str):
return text
scrubbed_text = text
# Apply all PII patterns
patterns = [
(PIIScrubber.EMAIL_PATTERN, f"{mask_char}@{mask_char}.com"),
(PIIScrubber.PHONE_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
(PIIScrubber.SSN_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
(
PIIScrubber.CREDIT_CARD_PATTERN,
f"{mask_char}-{mask_char}-{mask_char}-{mask_char}",
),
(
PIIScrubber.IP_ADDRESS_PATTERN,
f"{mask_char}.{mask_char}.{mask_char}.{mask_char}",
),
(PIIScrubber.BANK_ACCOUNT_PATTERN, mask_char * 8),
(PIIScrubber.PASSPORT_PATTERN, f"{mask_char}{mask_char}"),
(PIIScrubber.DRIVER_LICENSE_PATTERN, f"{mask_char}{mask_char}"),
]
for pattern, replacement in patterns:
scrubbed_text = pattern.sub(replacement, scrubbed_text)
return scrubbed_text
class JSONFormatter(logging.Formatter):
"""Custom JSON formatter for structured logging"""
def format(self, record: logging.LogRecord) -> str:
log_entry = {
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno,
}
if record.exc_info:
log_entry["exception"] = self.formatException(record.exc_info)
if hasattr(record, "extra_fields"):
log_entry.update(record.extra_fields)
if hasattr(record, "request_id"):
log_entry["request_id"] = record.request_id
if hasattr(record, "user_id"):
log_entry["user_id"] = record.user_id
if hasattr(record, "ip_address"):
log_entry["ip_address"] = PIIScrubber.scrub_pii(record.ip_address)
# Scrub the message itself
log_entry["message"] = PIIScrubber.scrub_pii(log_entry["message"])
# Scrub extra fields
for key, value in log_entry.items():
if isinstance(value, str) and key not in ["timestamp", "level", "logger"]:
log_entry[key] = PIIScrubber.scrub_pii(value)
return json.dumps(log_entry, default=str)
def setup_logging(
level: str = "INFO",
format_type: str = "json",
log_file: str | None = None,
max_file_size: int = 10 * 1024 * 1024, # 10MB
backup_count: int = 5,
enable_console: bool = True,
enable_file: bool = True,
) -> logging.Logger:
"""Enhanced logging setup with better configuration options"""
# Convert string level to numeric
numeric_level = getattr(logging, level.upper(), logging.INFO)
# Get or create logger
logger = logging.getLogger("zenith")
logger.setLevel(numeric_level)
# Clear existing handlers
for h in list(logger.handlers):
logger.removeHandler(h)
# Create formatter
if format_type.lower() == "json":
formatter = JSONFormatter()
else:
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Console handler
if enable_console:
console = logging.StreamHandler(sys.stdout)
console.setLevel(numeric_level)
console.setFormatter(formatter)
logger.addHandler(console)
# File handler with rotation
if enable_file and log_file:
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
from logging.handlers import RotatingFileHandler
fh = RotatingFileHandler(
log_file, maxBytes=max_file_size, backupCount=backup_count, encoding="utf-8"
)
fh.setLevel(numeric_level)
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.propagate = False
return logger
# Environment-based logging configuration
logger = setup_logging(
level=os.getenv("LOG_LEVEL", "WARNING"), # Default to WARNING for production
format_type=os.getenv("LOG_FORMAT", "json"),
log_file=os.getenv("LOG_FILE", "logs/fraud_detection.log"),
max_file_size=int(os.getenv("LOG_MAX_SIZE_MB", "10")) * 1024 * 1024,
backup_count=int(os.getenv("LOG_BACKUP_COUNT", "5")),
enable_console=os.getenv("LOG_CONSOLE", "true").lower() == "true",
enable_file=os.getenv("LOG_FILE_ENABLED", "true").lower() == "true",
)
def log_request(
request_id: str,
method: str,
path: str,
status_code: int,
duration: float,
user_id: str | None = None,
):
"""Log HTTP request details (single call to module logger)."""
extra_fields = {
"request_id": request_id,
"method": method,
"path": path,
"status_code": status_code,
"duration_ms": round(duration * 1000, 2),
}
if user_id:
extra_fields["user_id"] = user_id
# Prefer calling the attribute on the imported module object so tests
# that patch `core.logging.logger` are observed reliably.
try:
core_mod = sys.modules.get("core.logging")
if core_mod and hasattr(core_mod, "logger"):
try:
core_mod.logger.info("HTTP request", extra=extra_fields)
return
except Exception:
pass
except Exception:
pass
try:
logger.info("HTTP request", extra=extra_fields)
except Exception:
with contextlib.suppress(Exception):
logging.getLogger("Zenith").info("HTTP request", extra=extra_fields)
def log_error(
error_type: str,
message: str,
details: dict[str, Any] | None = None,
user_id: str | None = None,
):
"""Log application errors."""
extra_fields = {"error_type": error_type, "details": details or {}}
if user_id:
extra_fields["user_id"] = user_id
try:
core_mod = sys.modules.get("core.logging")
if core_mod and hasattr(core_mod, "logger"):
try:
core_mod.logger.error(message, extra=extra_fields)
return
except Exception:
pass
except Exception:
pass
try:
logger.error(message, extra=extra_fields)
except Exception:
with contextlib.suppress(Exception):
logging.getLogger("Zenith").error(message, extra=extra_fields)
def log_security_event(
event_type: str,
user_id: str | None = None,
ip_address: str | None = None,
details: dict[str, Any] | None = None,
):
"""Log security-related events."""
extra_fields = {
"event_type": event_type,
"security_event": True,
"details": details or {},
}
if user_id:
extra_fields["user_id"] = user_id
if ip_address:
extra_fields["ip_address"] = ip_address
try:
core_mod = sys.modules.get("core.logging")
if core_mod and hasattr(core_mod, "logger"):
try:
core_mod.logger.warning("Security event", extra=extra_fields)
return
except Exception:
pass
except Exception:
pass
try:
logger.warning("Security event", extra=extra_fields)
except Exception:
with contextlib.suppress(Exception):
logging.getLogger("Zenith").warning("Security event", extra=extra_fields)
def log_performance(metric_name: str, value: float, tags: dict[str, Any] | None = None):
"""Log performance metrics."""
extra_fields = {
"metric_name": metric_name,
"metric_value": value,
"performance_metric": True,
"tags": tags or {},
}
try:
core_mod = sys.modules.get("core.logging")
if core_mod and hasattr(core_mod, "logger"):
try:
core_mod.logger.info("Performance metric", extra=extra_fields)
return
except Exception:
pass
except Exception:
pass
try:
logger.info("Performance metric", extra=extra_fields)
except Exception:
with contextlib.suppress(Exception):
logging.getLogger("Zenith").info("Performance metric", extra=extra_fields)
def configure_environment_logging():
"""Configure logging based on environment (development vs production)"""
env = os.getenv("ENVIRONMENT", "development").lower()
if env == "development":
# Development: more verbose, console only, human-readable
setup_logging(
level="DEBUG", format_type="text", enable_console=True, enable_file=False
)
elif env == "testing":
# Testing: capture all logs, minimal output, structured
setup_logging(
level="DEBUG",
format_type="json",
log_file="logs/test.log",
enable_console=False,
enable_file=True,
)
elif env == "staging":
# Staging: balanced logging, both console and file
setup_logging(
level="INFO",
format_type="json",
log_file="logs/staging.log",
enable_console=True,
enable_file=True,
)
else: # production
# Production: minimal, structured logging, file only
setup_logging(
level="WARNING",
format_type="json",
log_file="logs/production.log",
enable_console=False,
enable_file=True,
)
# Initialize with environment-based configuration
configure_environment_logging()
|