File size: 6,325 Bytes
59bd45e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | """Logging configuration for Voice Text Processor.
This module sets up the logging system with proper formatting, levels,
and file output. It also includes a filter to prevent sensitive information
from being logged.
Requirements: 10.5, 9.5
"""
import logging
import re
from typing import Optional
from pathlib import Path
from contextvars import ContextVar
# Context variable to store request_id across async calls
request_id_var: ContextVar[Optional[str]] = ContextVar('request_id', default=None)
class RequestIdFilter(logging.Filter):
"""Filter to add request_id to log records.
This filter adds the request_id from context to each log record,
making it available in the log format.
Requirements: 9.5
"""
def filter(self, record: logging.LogRecord) -> bool:
"""Add request_id to log record.
Args:
record: Log record to enhance
Returns:
bool: Always True (we modify but don't reject records)
"""
# Get request_id from context, default to empty string if not set
record.request_id = request_id_var.get() or '-'
return True
class SensitiveDataFilter(logging.Filter):
"""Filter to remove sensitive information from log records.
This filter masks API keys, passwords, and other sensitive data
to prevent them from appearing in logs.
Requirements: 10.5
"""
# Patterns to detect and mask sensitive data
SENSITIVE_PATTERNS = [
# API keys (various formats)
(re.compile(r'(api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'),
(re.compile(r'(zhipu[_-]?api[_-]?key["\s:=]+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'),
# Bearer tokens
(re.compile(r'(bearer\s+)([a-zA-Z0-9_-]{10,})', re.IGNORECASE), r'\1***REDACTED***'),
# Passwords
(re.compile(r'(password["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'),
# Authorization headers (capture the whole value)
(re.compile(r'(authorization["\s:=]+)([^\s"]+)', re.IGNORECASE), r'\1***REDACTED***'),
]
def filter(self, record: logging.LogRecord) -> bool:
"""Filter log record to mask sensitive data.
Args:
record: Log record to filter
Returns:
bool: Always True (we modify but don't reject records)
"""
# Mask sensitive data in the message
if hasattr(record, 'msg') and isinstance(record.msg, str):
record.msg = self._mask_sensitive_data(record.msg)
# Mask sensitive data in arguments
if hasattr(record, 'args') and record.args:
if isinstance(record.args, dict):
record.args = {
k: self._mask_sensitive_data(str(v)) if isinstance(v, str) else v
for k, v in record.args.items()
}
elif isinstance(record.args, tuple):
record.args = tuple(
self._mask_sensitive_data(str(arg)) if isinstance(arg, str) else arg
for arg in record.args
)
return True
def _mask_sensitive_data(self, text: str) -> str:
"""Mask sensitive data in text using regex patterns.
Args:
text: Text to mask
Returns:
str: Text with sensitive data masked
"""
for pattern, replacement in self.SENSITIVE_PATTERNS:
text = pattern.sub(replacement, text)
return text
def setup_logging(
log_level: str = "INFO",
log_file: Optional[Path] = None,
log_format: Optional[str] = None
) -> None:
"""Set up logging configuration for the application.
Args:
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: Optional path to log file. If None, logs only to console.
log_format: Optional custom log format string
Requirements: 10.5, 9.5
"""
# Default log format with request_id, timestamp, level, and message
if log_format is None:
log_format = "[%(asctime)s] [%(levelname)s] [%(request_id)s] [%(name)s] %(message)s"
# Date format
date_format = "%Y-%m-%d %H:%M:%S"
# Create formatter
formatter = logging.Formatter(log_format, datefmt=date_format)
# Get root logger
root_logger = logging.getLogger()
root_logger.setLevel(getattr(logging, log_level.upper()))
# Remove existing handlers
root_logger.handlers.clear()
# Add filters
request_id_filter = RequestIdFilter()
sensitive_filter = SensitiveDataFilter()
# Console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
console_handler.addFilter(request_id_filter)
console_handler.addFilter(sensitive_filter)
root_logger.addHandler(console_handler)
# File handler (if log file specified)
if log_file:
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setFormatter(formatter)
file_handler.addFilter(request_id_filter)
file_handler.addFilter(sensitive_filter)
root_logger.addHandler(file_handler)
# Log startup message
logger = logging.getLogger(__name__)
logger.info(f"Logging initialized at level {log_level}")
if log_file:
logger.info(f"Logging to file: {log_file}")
def get_logger(name: str) -> logging.Logger:
"""Get a logger instance for a module.
Args:
name: Logger name (typically __name__)
Returns:
logging.Logger: Logger instance
"""
return logging.getLogger(name)
def set_request_id(request_id: str) -> None:
"""Set the request_id in the current context.
This should be called at the beginning of each request to ensure
all log messages include the request_id.
Args:
request_id: Unique identifier for the request
Requirements: 9.5
"""
request_id_var.set(request_id)
def clear_request_id() -> None:
"""Clear the request_id from the current context.
This should be called at the end of each request to clean up.
"""
request_id_var.set(None)
|