Spaces:
Paused
Paused
Upload core/logging.py with huggingface_hub
Browse files- core/logging.py +63 -3
core/logging.py
CHANGED
|
@@ -2,12 +2,60 @@ import contextlib
|
|
| 2 |
import json
|
| 3 |
import logging
|
| 4 |
import os
|
|
|
|
| 5 |
import sys
|
| 6 |
from datetime import datetime, timezone
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
class JSONFormatter(logging.Formatter):
|
| 12 |
"""Custom JSON formatter for structured logging"""
|
| 13 |
|
|
@@ -33,7 +81,15 @@ class JSONFormatter(logging.Formatter):
|
|
| 33 |
if hasattr(record, "user_id"):
|
| 34 |
log_entry["user_id"] = record.user_id
|
| 35 |
if hasattr(record, "ip_address"):
|
| 36 |
-
log_entry["ip_address"] = record.ip_address
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
return json.dumps(log_entry, default=str)
|
| 39 |
|
|
@@ -83,7 +139,9 @@ def setup_logging(
|
|
| 83 |
|
| 84 |
from logging.handlers import RotatingFileHandler
|
| 85 |
|
| 86 |
-
fh = RotatingFileHandler(
|
|
|
|
|
|
|
| 87 |
fh.setLevel(numeric_level)
|
| 88 |
fh.setFormatter(formatter)
|
| 89 |
logger.addHandler(fh)
|
|
@@ -239,7 +297,9 @@ def configure_environment_logging():
|
|
| 239 |
|
| 240 |
if env == "development":
|
| 241 |
# Development: more verbose, console only, human-readable
|
| 242 |
-
setup_logging(
|
|
|
|
|
|
|
| 243 |
elif env == "testing":
|
| 244 |
# Testing: capture all logs, minimal output, structured
|
| 245 |
setup_logging(
|
|
|
|
| 2 |
import json
|
| 3 |
import logging
|
| 4 |
import os
|
| 5 |
+
import re
|
| 6 |
import sys
|
| 7 |
from datetime import datetime, timezone
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Any
|
| 10 |
|
| 11 |
|
| 12 |
+
class PIIScrubber:
|
| 13 |
+
"""PII detection and scrubbing utilities"""
|
| 14 |
+
|
| 15 |
+
# PII patterns
|
| 16 |
+
EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
|
| 17 |
+
PHONE_PATTERN = re.compile(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b")
|
| 18 |
+
SSN_PATTERN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
|
| 19 |
+
CREDIT_CARD_PATTERN = re.compile(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")
|
| 20 |
+
IP_ADDRESS_PATTERN = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
|
| 21 |
+
|
| 22 |
+
# Additional patterns for sensitive data
|
| 23 |
+
BANK_ACCOUNT_PATTERN = re.compile(r"\b\d{8,17}\b")
|
| 24 |
+
PASSPORT_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{7,8}\b")
|
| 25 |
+
DRIVER_LICENSE_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{6,8}\b")
|
| 26 |
+
|
| 27 |
+
@staticmethod
|
| 28 |
+
def scrub_pii(text: str, mask_char: str = "***") -> str:
|
| 29 |
+
"""Scrub PII from text using patterns"""
|
| 30 |
+
if not text or not isinstance(text, str):
|
| 31 |
+
return text
|
| 32 |
+
|
| 33 |
+
scrubbed_text = text
|
| 34 |
+
|
| 35 |
+
# Apply all PII patterns
|
| 36 |
+
patterns = [
|
| 37 |
+
(PIIScrubber.EMAIL_PATTERN, f"{mask_char}@{mask_char}.com"),
|
| 38 |
+
(PIIScrubber.PHONE_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
|
| 39 |
+
(PIIScrubber.SSN_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
|
| 40 |
+
(
|
| 41 |
+
PIIScrubber.CREDIT_CARD_PATTERN,
|
| 42 |
+
f"{mask_char}-{mask_char}-{mask_char}-{mask_char}",
|
| 43 |
+
),
|
| 44 |
+
(
|
| 45 |
+
PIIScrubber.IP_ADDRESS_PATTERN,
|
| 46 |
+
f"{mask_char}.{mask_char}.{mask_char}.{mask_char}",
|
| 47 |
+
),
|
| 48 |
+
(PIIScrubber.BANK_ACCOUNT_PATTERN, mask_char * 8),
|
| 49 |
+
(PIIScrubber.PASSPORT_PATTERN, f"{mask_char}{mask_char}"),
|
| 50 |
+
(PIIScrubber.DRIVER_LICENSE_PATTERN, f"{mask_char}{mask_char}"),
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
for pattern, replacement in patterns:
|
| 54 |
+
scrubbed_text = pattern.sub(replacement, scrubbed_text)
|
| 55 |
+
|
| 56 |
+
return scrubbed_text
|
| 57 |
+
|
| 58 |
+
|
| 59 |
class JSONFormatter(logging.Formatter):
|
| 60 |
"""Custom JSON formatter for structured logging"""
|
| 61 |
|
|
|
|
| 81 |
if hasattr(record, "user_id"):
|
| 82 |
log_entry["user_id"] = record.user_id
|
| 83 |
if hasattr(record, "ip_address"):
|
| 84 |
+
log_entry["ip_address"] = PIIScrubber.scrub_pii(record.ip_address)
|
| 85 |
+
|
| 86 |
+
# Scrub the message itself
|
| 87 |
+
log_entry["message"] = PIIScrubber.scrub_pii(log_entry["message"])
|
| 88 |
+
|
| 89 |
+
# Scrub extra fields
|
| 90 |
+
for key, value in log_entry.items():
|
| 91 |
+
if isinstance(value, str) and key not in ["timestamp", "level", "logger"]:
|
| 92 |
+
log_entry[key] = PIIScrubber.scrub_pii(value)
|
| 93 |
|
| 94 |
return json.dumps(log_entry, default=str)
|
| 95 |
|
|
|
|
| 139 |
|
| 140 |
from logging.handlers import RotatingFileHandler
|
| 141 |
|
| 142 |
+
fh = RotatingFileHandler(
|
| 143 |
+
log_file, maxBytes=max_file_size, backupCount=backup_count, encoding="utf-8"
|
| 144 |
+
)
|
| 145 |
fh.setLevel(numeric_level)
|
| 146 |
fh.setFormatter(formatter)
|
| 147 |
logger.addHandler(fh)
|
|
|
|
| 297 |
|
| 298 |
if env == "development":
|
| 299 |
# Development: more verbose, console only, human-readable
|
| 300 |
+
setup_logging(
|
| 301 |
+
level="DEBUG", format_type="text", enable_console=True, enable_file=False
|
| 302 |
+
)
|
| 303 |
elif env == "testing":
|
| 304 |
# Testing: capture all logs, minimal output, structured
|
| 305 |
setup_logging(
|