""" Structured logging configuration for the application. This module sets up consistent logging across all components with support for different log levels, formatters, and handlers. """ import logging import sys from pathlib import Path from typing import Optional from datetime import datetime # ANSI color codes for console output class LogColors: """ANSI color codes for terminal output.""" RESET = "\033[0m" BOLD = "\033[1m" RED = "\033[91m" GREEN = "\033[92m" YELLOW = "\033[93m" BLUE = "\033[94m" MAGENTA = "\033[95m" CYAN = "\033[96m" GRAY = "\033[90m" class ColoredFormatter(logging.Formatter): """Custom formatter with colors for different log levels.""" FORMATS = { logging.DEBUG: LogColors.GRAY + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", logging.INFO: LogColors.GREEN + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", logging.WARNING: LogColors.YELLOW + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", logging.ERROR: LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", logging.CRITICAL: LogColors.BOLD + LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", } def format(self, record): log_fmt = self.FORMATS.get(record.levelno) formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S") return formatter.format(record) class FileFormatter(logging.Formatter): """File formatter with timestamps and detailed information.""" def __init__(self): super().__init__( fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) def setup_logging( log_level: str = "INFO", log_file: Optional[str] = None, log_to_console: bool = True, ) -> None: """ Set up logging configuration for the application. Args: log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) log_file: Optional path to log file. If None, logs only to console log_to_console: Whether to log to console (default: True) """ # Convert log level string to logging constant numeric_level = getattr(logging, log_level.upper(), logging.INFO) # Get root logger root_logger = logging.getLogger() root_logger.setLevel(numeric_level) # Remove existing handlers root_logger.handlers.clear() # Console handler if log_to_console: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(numeric_level) console_handler.setFormatter(ColoredFormatter()) root_logger.addHandler(console_handler) # File handler if log_file: log_path = Path(log_file) log_path.parent.mkdir(parents=True, exist_ok=True) file_handler = logging.FileHandler(log_file, encoding="utf-8") file_handler.setLevel(numeric_level) file_handler.setFormatter(FileFormatter()) root_logger.addHandler(file_handler) # Suppress overly verbose third-party loggers logging.getLogger("chromadb").setLevel(logging.WARNING) logging.getLogger("sentence_transformers").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("httpcore").setLevel(logging.WARNING) logging.getLogger("openai").setLevel(logging.WARNING) logging.getLogger("anthropic").setLevel(logging.WARNING) root_logger.info(f"Logging initialized at {log_level} level") def get_logger(name: str) -> logging.Logger: """ Get a logger instance with the specified name. Args: name: Logger name (typically __name__ of the module) Returns: Logger instance """ return logging.getLogger(name) def log_function_call(logger: logging.Logger): """ Decorator to log function calls with arguments and return values. Usage: @log_function_call(logger) def my_function(arg1, arg2): return result """ def decorator(func): def wrapper(*args, **kwargs): func_name = func.__name__ logger.debug(f"Calling {func_name} with args={args}, kwargs={kwargs}") try: result = func(*args, **kwargs) logger.debug(f"{func_name} completed successfully") return result except Exception as e: logger.error(f"{func_name} failed with error: {e}", exc_info=True) raise return wrapper return decorator # Structured logging helpers def log_pdf_processing(logger: logging.Logger, filename: str, pages: int, chunks: int): """Log PDF processing completion.""" logger.info(f"Processed PDF: {filename} | Pages: {pages} | Chunks: {chunks}") def log_retrieval(logger: logging.Logger, query: str, num_results: int, duration_ms: float): """Log retrieval operation.""" logger.info(f"Retrieved {num_results} chunks for query in {duration_ms:.2f}ms") logger.debug(f"Query: {query[:100]}...") def log_llm_call(logger: logging.Logger, model: str, tokens_in: int, tokens_out: int, duration_s: float): """Log LLM API call.""" logger.info( f"LLM call: {model} | In: {tokens_in} tokens | Out: {tokens_out} tokens | Duration: {duration_s:.2f}s" ) def log_embedding_generation(logger: logging.Logger, num_chunks: int, duration_s: float): """Log embedding generation.""" chunks_per_sec = num_chunks / duration_s if duration_s > 0 else 0 logger.info(f"Generated embeddings for {num_chunks} chunks in {duration_s:.2f}s ({chunks_per_sec:.1f} chunks/s)") def log_cache_hit(logger: logging.Logger, cache_type: str, key: str): """Log cache hit.""" logger.debug(f"Cache hit: {cache_type} | Key: {key[:50]}") def log_cache_miss(logger: logging.Logger, cache_type: str, key: str): """Log cache miss.""" logger.debug(f"Cache miss: {cache_type} | Key: {key[:50]}") def log_error(logger: logging.Logger, operation: str, error: Exception): """Log error with context.""" logger.error(f"Error in {operation}: {str(error)}", exc_info=True)