teoat commited on
Commit
71af2e7
·
verified ·
1 Parent(s): caf23fd

Upload core/logging.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. core/logging.py +63 -3
core/logging.py CHANGED
@@ -2,12 +2,60 @@ import contextlib
2
  import json
3
  import logging
4
  import os
 
5
  import sys
6
  from datetime import datetime, timezone
7
  from pathlib import Path
8
  from typing import Any
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class JSONFormatter(logging.Formatter):
12
  """Custom JSON formatter for structured logging"""
13
 
@@ -33,7 +81,15 @@ class JSONFormatter(logging.Formatter):
33
  if hasattr(record, "user_id"):
34
  log_entry["user_id"] = record.user_id
35
  if hasattr(record, "ip_address"):
36
- log_entry["ip_address"] = record.ip_address
 
 
 
 
 
 
 
 
37
 
38
  return json.dumps(log_entry, default=str)
39
 
@@ -83,7 +139,9 @@ def setup_logging(
83
 
84
  from logging.handlers import RotatingFileHandler
85
 
86
- fh = RotatingFileHandler(log_file, maxBytes=max_file_size, backupCount=backup_count, encoding="utf-8")
 
 
87
  fh.setLevel(numeric_level)
88
  fh.setFormatter(formatter)
89
  logger.addHandler(fh)
@@ -239,7 +297,9 @@ def configure_environment_logging():
239
 
240
  if env == "development":
241
  # Development: more verbose, console only, human-readable
242
- setup_logging(level="DEBUG", format_type="text", enable_console=True, enable_file=False)
 
 
243
  elif env == "testing":
244
  # Testing: capture all logs, minimal output, structured
245
  setup_logging(
 
2
  import json
3
  import logging
4
  import os
5
+ import re
6
  import sys
7
  from datetime import datetime, timezone
8
  from pathlib import Path
9
  from typing import Any
10
 
11
 
12
+ class PIIScrubber:
13
+ """PII detection and scrubbing utilities"""
14
+
15
+ # PII patterns
16
+ EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
17
+ PHONE_PATTERN = re.compile(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b")
18
+ SSN_PATTERN = re.compile(r"\b\d{3}-\d{2}-\d{4}\b")
19
+ CREDIT_CARD_PATTERN = re.compile(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")
20
+ IP_ADDRESS_PATTERN = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
21
+
22
+ # Additional patterns for sensitive data
23
+ BANK_ACCOUNT_PATTERN = re.compile(r"\b\d{8,17}\b")
24
+ PASSPORT_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{7,8}\b")
25
+ DRIVER_LICENSE_PATTERN = re.compile(r"\b[A-Z]{1,2}\d{6,8}\b")
26
+
27
+ @staticmethod
28
+ def scrub_pii(text: str, mask_char: str = "***") -> str:
29
+ """Scrub PII from text using patterns"""
30
+ if not text or not isinstance(text, str):
31
+ return text
32
+
33
+ scrubbed_text = text
34
+
35
+ # Apply all PII patterns
36
+ patterns = [
37
+ (PIIScrubber.EMAIL_PATTERN, f"{mask_char}@{mask_char}.com"),
38
+ (PIIScrubber.PHONE_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
39
+ (PIIScrubber.SSN_PATTERN, f"{mask_char}-{mask_char}-{mask_char}"),
40
+ (
41
+ PIIScrubber.CREDIT_CARD_PATTERN,
42
+ f"{mask_char}-{mask_char}-{mask_char}-{mask_char}",
43
+ ),
44
+ (
45
+ PIIScrubber.IP_ADDRESS_PATTERN,
46
+ f"{mask_char}.{mask_char}.{mask_char}.{mask_char}",
47
+ ),
48
+ (PIIScrubber.BANK_ACCOUNT_PATTERN, mask_char * 8),
49
+ (PIIScrubber.PASSPORT_PATTERN, f"{mask_char}{mask_char}"),
50
+ (PIIScrubber.DRIVER_LICENSE_PATTERN, f"{mask_char}{mask_char}"),
51
+ ]
52
+
53
+ for pattern, replacement in patterns:
54
+ scrubbed_text = pattern.sub(replacement, scrubbed_text)
55
+
56
+ return scrubbed_text
57
+
58
+
59
  class JSONFormatter(logging.Formatter):
60
  """Custom JSON formatter for structured logging"""
61
 
 
81
  if hasattr(record, "user_id"):
82
  log_entry["user_id"] = record.user_id
83
  if hasattr(record, "ip_address"):
84
+ log_entry["ip_address"] = PIIScrubber.scrub_pii(record.ip_address)
85
+
86
+ # Scrub the message itself
87
+ log_entry["message"] = PIIScrubber.scrub_pii(log_entry["message"])
88
+
89
+ # Scrub extra fields
90
+ for key, value in log_entry.items():
91
+ if isinstance(value, str) and key not in ["timestamp", "level", "logger"]:
92
+ log_entry[key] = PIIScrubber.scrub_pii(value)
93
 
94
  return json.dumps(log_entry, default=str)
95
 
 
139
 
140
  from logging.handlers import RotatingFileHandler
141
 
142
+ fh = RotatingFileHandler(
143
+ log_file, maxBytes=max_file_size, backupCount=backup_count, encoding="utf-8"
144
+ )
145
  fh.setLevel(numeric_level)
146
  fh.setFormatter(formatter)
147
  logger.addHandler(fh)
 
297
 
298
  if env == "development":
299
  # Development: more verbose, console only, human-readable
300
+ setup_logging(
301
+ level="DEBUG", format_type="text", enable_console=True, enable_file=False
302
+ )
303
  elif env == "testing":
304
  # Testing: capture all logs, minimal output, structured
305
  setup_logging(