Spaces:

retcheto
/

zeta

Running

rodrigo-moonray

Deploy zeta-only embeddings (NV-Embed-v2 + E5-small)

9b457ed about 1 month ago

6.26 kB

	"""
	Structured logging configuration for the application.

	This module sets up consistent logging across all components with support for
	different log levels, formatters, and handlers.
	"""

	import logging
	import sys
	from pathlib import Path
	from typing import Optional
	from datetime import datetime


	# ANSI color codes for console output
	class LogColors:
	"""ANSI color codes for terminal output."""
	RESET = "\033[0m"
	BOLD = "\033[1m"
	RED = "\033[91m"
	GREEN = "\033[92m"
	YELLOW = "\033[93m"
	BLUE = "\033[94m"
	MAGENTA = "\033[95m"
	CYAN = "\033[96m"
	GRAY = "\033[90m"


	class ColoredFormatter(logging.Formatter):
	"""Custom formatter with colors for different log levels."""

	FORMATS = {
	logging.DEBUG: LogColors.GRAY + "%(levelname)s" + LogColors.RESET + " \| %(name)s \| %(message)s",
	logging.INFO: LogColors.GREEN + "%(levelname)s" + LogColors.RESET + " \| %(name)s \| %(message)s",
	logging.WARNING: LogColors.YELLOW + "%(levelname)s" + LogColors.RESET + " \| %(name)s \| %(message)s",
	logging.ERROR: LogColors.RED + "%(levelname)s" + LogColors.RESET + " \| %(name)s \| %(message)s",
	logging.CRITICAL: LogColors.BOLD + LogColors.RED + "%(levelname)s" + LogColors.RESET + " \| %(name)s \| %(message)s",
	}

	def format(self, record):
	log_fmt = self.FORMATS.get(record.levelno)
	formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
	return formatter.format(record)


	class FileFormatter(logging.Formatter):
	"""File formatter with timestamps and detailed information."""

	def __init__(self):
	super().__init__(
	fmt="%(asctime)s \| %(levelname)-8s \| %(name)s:%(lineno)d \| %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S"
	)


	def setup_logging(
	log_level: str = "INFO",
	log_file: Optional[str] = None,
	log_to_console: bool = True,
	) -> None:
	"""
	Set up logging configuration for the application.

	Args:
	log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
	log_file: Optional path to log file. If None, logs only to console
	log_to_console: Whether to log to console (default: True)
	"""
	# Convert log level string to logging constant
	numeric_level = getattr(logging, log_level.upper(), logging.INFO)

	# Get root logger
	root_logger = logging.getLogger()
	root_logger.setLevel(numeric_level)

	# Remove existing handlers
	root_logger.handlers.clear()

	# Console handler
	if log_to_console:
	console_handler = logging.StreamHandler(sys.stdout)
	console_handler.setLevel(numeric_level)
	console_handler.setFormatter(ColoredFormatter())
	root_logger.addHandler(console_handler)

	# File handler
	if log_file:
	log_path = Path(log_file)
	log_path.parent.mkdir(parents=True, exist_ok=True)

	file_handler = logging.FileHandler(log_file, encoding="utf-8")
	file_handler.setLevel(numeric_level)
	file_handler.setFormatter(FileFormatter())
	root_logger.addHandler(file_handler)

	# Suppress overly verbose third-party loggers
	logging.getLogger("chromadb").setLevel(logging.WARNING)
	logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
	logging.getLogger("urllib3").setLevel(logging.WARNING)
	logging.getLogger("httpx").setLevel(logging.WARNING)
	logging.getLogger("httpcore").setLevel(logging.WARNING)
	logging.getLogger("openai").setLevel(logging.WARNING)
	logging.getLogger("anthropic").setLevel(logging.WARNING)

	root_logger.info(f"Logging initialized at {log_level} level")


	def get_logger(name: str) -> logging.Logger:
	"""
	Get a logger instance with the specified name.

	Args:
	name: Logger name (typically __name__ of the module)

	Returns:
	Logger instance
	"""
	return logging.getLogger(name)


	def log_function_call(logger: logging.Logger):
	"""
	Decorator to log function calls with arguments and return values.

	Usage:
	@log_function_call(logger)
	def my_function(arg1, arg2):
	return result
	"""
	def decorator(func):
	def wrapper(args, *kwargs):
	func_name = func.__name__
	logger.debug(f"Calling {func_name} with args={args}, kwargs={kwargs}")
	try:
	result = func(args, *kwargs)
	logger.debug(f"{func_name} completed successfully")
	return result
	except Exception as e:
	logger.error(f"{func_name} failed with error: {e}", exc_info=True)
	raise
	return wrapper
	return decorator


	# Structured logging helpers
	def log_pdf_processing(logger: logging.Logger, filename: str, pages: int, chunks: int):
	"""Log PDF processing completion."""
	logger.info(f"Processed PDF: {filename} \| Pages: {pages} \| Chunks: {chunks}")


	def log_retrieval(logger: logging.Logger, query: str, num_results: int, duration_ms: float):
	"""Log retrieval operation."""
	logger.info(f"Retrieved {num_results} chunks for query in {duration_ms:.2f}ms")
	logger.debug(f"Query: {query[:100]}...")


	def log_llm_call(logger: logging.Logger, model: str, tokens_in: int, tokens_out: int, duration_s: float):
	"""Log LLM API call."""
	logger.info(
	f"LLM call: {model} \| In: {tokens_in} tokens \| Out: {tokens_out} tokens \| Duration: {duration_s:.2f}s"
	)


	def log_embedding_generation(logger: logging.Logger, num_chunks: int, duration_s: float):
	"""Log embedding generation."""
	chunks_per_sec = num_chunks / duration_s if duration_s > 0 else 0
	logger.info(f"Generated embeddings for {num_chunks} chunks in {duration_s:.2f}s ({chunks_per_sec:.1f} chunks/s)")


	def log_cache_hit(logger: logging.Logger, cache_type: str, key: str):
	"""Log cache hit."""
	logger.debug(f"Cache hit: {cache_type} \| Key: {key[:50]}")


	def log_cache_miss(logger: logging.Logger, cache_type: str, key: str):
	"""Log cache miss."""
	logger.debug(f"Cache miss: {cache_type} \| Key: {key[:50]}")


	def log_error(logger: logging.Logger, operation: str, error: Exception):
	"""Log error with context."""
	logger.error(f"Error in {operation}: {str(error)}", exc_info=True)