TranscriptWriting / logger.py
jmisak's picture
Upload 57 files
52d0298 verified
raw
history blame
8.16 kB
"""
Structured Logging System for TranscriptorAI
Replaces scattered print() statements with proper logging infrastructure.
Supports different log levels, prevents PII leakage, and provides clean output.
"""
import logging
import os
import sys
from datetime import datetime
from typing import Optional
from pathlib import Path
class SafeFormatter(logging.Formatter):
"""Custom formatter that sanitizes PII from log messages"""
def __init__(self, *args, sanitize_pii: bool = True, **kwargs):
super().__init__(*args, **kwargs)
self.sanitize_pii = sanitize_pii
def format(self, record):
# Get the original formatted message
msg = super().format(record)
if self.sanitize_pii:
# Sanitize common PII patterns
import re
# Redact email addresses
msg = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', msg)
# Redact phone numbers
msg = re.sub(r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]', msg)
# Redact SSN
msg = re.sub(r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b', '[SSN]', msg)
# Redact tokens (keep first/last 4 chars for debugging)
msg = re.sub(r'\b(hf_[a-zA-Z0-9]{4})[a-zA-Z0-9]+([a-zA-Z0-9]{4})\b', r'\1****\2', msg)
return msg
class TranscriptorLogger:
"""Central logging system for TranscriptorAI"""
_instance = None
_initialized = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if TranscriptorLogger._initialized:
return
self.logger = logging.getLogger("TranscriptorAI")
self.debug_mode = os.getenv("DEBUG_MODE", "False").lower() == "true"
self.sanitize_logs = os.getenv("SANITIZE_LOGS", "True").lower() == "true"
# Set log level based on debug mode
if self.debug_mode:
self.logger.setLevel(logging.DEBUG)
else:
self.logger.setLevel(logging.INFO)
# Remove existing handlers to avoid duplicates
self.logger.handlers.clear()
# Create formatters
if self.debug_mode:
console_format = SafeFormatter(
'%(asctime)s - %(name)s - [%(levelname)s] - %(funcName)s:%(lineno)d - %(message)s',
sanitize_pii=self.sanitize_logs
)
else:
console_format = SafeFormatter(
'%(levelname)s: %(message)s',
sanitize_pii=self.sanitize_logs
)
# Console handler (stdout for INFO+, stderr for WARN+)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(console_format)
console_handler.setLevel(logging.DEBUG if self.debug_mode else logging.INFO)
self.logger.addHandler(console_handler)
# File handler for production (optional)
log_to_file = os.getenv("LOG_TO_FILE", "False").lower() == "true"
if log_to_file:
self._add_file_handler()
TranscriptorLogger._initialized = True
def _add_file_handler(self):
"""Add file handler for persistent logs"""
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
log_file = log_dir / f"transcriptor_{datetime.now().strftime('%Y%m%d')}.log"
file_format = SafeFormatter(
'%(asctime)s - %(name)s - [%(levelname)s] - %(funcName)s:%(lineno)d - %(message)s',
sanitize_pii=self.sanitize_logs
)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(file_format)
file_handler.setLevel(logging.DEBUG)
self.logger.addHandler(file_handler)
# Convenience methods
def debug(self, msg: str, *args, **kwargs):
"""Log debug message (only shown in debug mode)"""
self.logger.debug(msg, *args, **kwargs)
def info(self, msg: str, *args, **kwargs):
"""Log info message"""
self.logger.info(msg, *args, **kwargs)
def warning(self, msg: str, *args, **kwargs):
"""Log warning message"""
self.logger.warning(msg, *args, **kwargs)
def error(self, msg: str, *args, **kwargs):
"""Log error message"""
self.logger.error(msg, *args, **kwargs)
def critical(self, msg: str, *args, **kwargs):
"""Log critical error message"""
self.logger.critical(msg, *args, **kwargs)
def success(self, msg: str, *args, **kwargs):
"""Log success message (displayed as INFO with ✓ prefix)"""
self.logger.info(f"✓ {msg}", *args, **kwargs)
def progress(self, msg: str, *args, **kwargs):
"""Log progress update (displayed as INFO with ⏳ prefix)"""
self.logger.info(f"⏳ {msg}", *args, **kwargs)
def step(self, step_num: int, total: int, msg: str):
"""Log step progress"""
self.logger.info(f"[{step_num}/{total}] {msg}")
def section(self, title: str):
"""Log section header"""
separator = "=" * 60
self.logger.info(separator)
self.logger.info(title)
self.logger.info(separator)
# Global logger instance
_global_logger: Optional[TranscriptorLogger] = None
def get_logger() -> TranscriptorLogger:
"""Get or create the global logger instance"""
global _global_logger
if _global_logger is None:
_global_logger = TranscriptorLogger()
return _global_logger
# Convenience functions for backward compatibility
def log_debug(msg: str):
"""Log debug message"""
get_logger().debug(msg)
def log_info(msg: str):
"""Log info message"""
get_logger().info(msg)
def log_warning(msg: str):
"""Log warning message"""
get_logger().warning(msg)
def log_error(msg: str):
"""Log error message"""
get_logger().error(msg)
def log_success(msg: str):
"""Log success message"""
get_logger().success(msg)
def log_progress(msg: str):
"""Log progress message"""
get_logger().progress(msg)
def log_section(title: str):
"""Log section header"""
get_logger().section(title)
def log_step(step_num: int, total: int, msg: str):
"""Log step progress"""
get_logger().step(step_num, total, msg)
# Context manager for logging operations
class LogContext:
"""Context manager for logging operation start/end"""
def __init__(self, operation: str, logger: Optional[TranscriptorLogger] = None):
self.operation = operation
self.logger = logger or get_logger()
self.start_time = None
def __enter__(self):
self.start_time = datetime.now()
self.logger.info(f"Starting: {self.operation}")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
elapsed = (datetime.now() - self.start_time).total_seconds()
if exc_type is None:
self.logger.success(f"Completed: {self.operation} ({elapsed:.2f}s)")
else:
self.logger.error(f"Failed: {self.operation} - {exc_val}")
return False # Don't suppress exceptions
# Example usage
if __name__ == "__main__":
# Test logging
logger = get_logger()
logger.section("Test Logging System")
logger.info("This is an info message")
logger.debug("This is a debug message (only in debug mode)")
logger.warning("This is a warning")
logger.error("This is an error")
logger.success("Operation completed successfully")
logger.progress("Processing transcripts...")
logger.step(1, 5, "Extracting text")
# Test PII sanitization
logger.info("User email: john.doe@example.com")
logger.info("Phone: 555-123-4567")
logger.info("Token: hf_abcdefghijklmnopqrstuvwxyz1234567890")
# Test context manager
with LogContext("Sample Operation"):
logger.info("Doing some work...")
import time
time.sleep(0.1)