zeta / src /utils /logging.py
rodrigo-moonray
Deploy zeta-only embeddings (NV-Embed-v2 + E5-small)
9b457ed
"""
Structured logging configuration for the application.
This module sets up consistent logging across all components with support for
different log levels, formatters, and handlers.
"""
import logging
import sys
from pathlib import Path
from typing import Optional
from datetime import datetime
# ANSI color codes for console output
class LogColors:
"""ANSI color codes for terminal output."""
RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
MAGENTA = "\033[95m"
CYAN = "\033[96m"
GRAY = "\033[90m"
class ColoredFormatter(logging.Formatter):
"""Custom formatter with colors for different log levels."""
FORMATS = {
logging.DEBUG: LogColors.GRAY + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
logging.INFO: LogColors.GREEN + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
logging.WARNING: LogColors.YELLOW + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
logging.ERROR: LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
logging.CRITICAL: LogColors.BOLD + LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
}
def format(self, record):
log_fmt = self.FORMATS.get(record.levelno)
formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
return formatter.format(record)
class FileFormatter(logging.Formatter):
"""File formatter with timestamps and detailed information."""
def __init__(self):
super().__init__(
fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
def setup_logging(
log_level: str = "INFO",
log_file: Optional[str] = None,
log_to_console: bool = True,
) -> None:
"""
Set up logging configuration for the application.
Args:
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: Optional path to log file. If None, logs only to console
log_to_console: Whether to log to console (default: True)
"""
# Convert log level string to logging constant
numeric_level = getattr(logging, log_level.upper(), logging.INFO)
# Get root logger
root_logger = logging.getLogger()
root_logger.setLevel(numeric_level)
# Remove existing handlers
root_logger.handlers.clear()
# Console handler
if log_to_console:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(numeric_level)
console_handler.setFormatter(ColoredFormatter())
root_logger.addHandler(console_handler)
# File handler
if log_file:
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setLevel(numeric_level)
file_handler.setFormatter(FileFormatter())
root_logger.addHandler(file_handler)
# Suppress overly verbose third-party loggers
logging.getLogger("chromadb").setLevel(logging.WARNING)
logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.getLogger("anthropic").setLevel(logging.WARNING)
root_logger.info(f"Logging initialized at {log_level} level")
def get_logger(name: str) -> logging.Logger:
"""
Get a logger instance with the specified name.
Args:
name: Logger name (typically __name__ of the module)
Returns:
Logger instance
"""
return logging.getLogger(name)
def log_function_call(logger: logging.Logger):
"""
Decorator to log function calls with arguments and return values.
Usage:
@log_function_call(logger)
def my_function(arg1, arg2):
return result
"""
def decorator(func):
def wrapper(*args, **kwargs):
func_name = func.__name__
logger.debug(f"Calling {func_name} with args={args}, kwargs={kwargs}")
try:
result = func(*args, **kwargs)
logger.debug(f"{func_name} completed successfully")
return result
except Exception as e:
logger.error(f"{func_name} failed with error: {e}", exc_info=True)
raise
return wrapper
return decorator
# Structured logging helpers
def log_pdf_processing(logger: logging.Logger, filename: str, pages: int, chunks: int):
"""Log PDF processing completion."""
logger.info(f"Processed PDF: {filename} | Pages: {pages} | Chunks: {chunks}")
def log_retrieval(logger: logging.Logger, query: str, num_results: int, duration_ms: float):
"""Log retrieval operation."""
logger.info(f"Retrieved {num_results} chunks for query in {duration_ms:.2f}ms")
logger.debug(f"Query: {query[:100]}...")
def log_llm_call(logger: logging.Logger, model: str, tokens_in: int, tokens_out: int, duration_s: float):
"""Log LLM API call."""
logger.info(
f"LLM call: {model} | In: {tokens_in} tokens | Out: {tokens_out} tokens | Duration: {duration_s:.2f}s"
)
def log_embedding_generation(logger: logging.Logger, num_chunks: int, duration_s: float):
"""Log embedding generation."""
chunks_per_sec = num_chunks / duration_s if duration_s > 0 else 0
logger.info(f"Generated embeddings for {num_chunks} chunks in {duration_s:.2f}s ({chunks_per_sec:.1f} chunks/s)")
def log_cache_hit(logger: logging.Logger, cache_type: str, key: str):
"""Log cache hit."""
logger.debug(f"Cache hit: {cache_type} | Key: {key[:50]}")
def log_cache_miss(logger: logging.Logger, cache_type: str, key: str):
"""Log cache miss."""
logger.debug(f"Cache miss: {cache_type} | Key: {key[:50]}")
def log_error(logger: logging.Logger, operation: str, error: Exception):
"""Log error with context."""
logger.error(f"Error in {operation}: {str(error)}", exc_info=True)