File size: 6,263 Bytes
9b457ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"""
Structured logging configuration for the application.

This module sets up consistent logging across all components with support for
different log levels, formatters, and handlers.
"""

import logging
import sys
from pathlib import Path
from typing import Optional
from datetime import datetime


# ANSI color codes for console output
class LogColors:
    """ANSI color codes for terminal output."""
    RESET = "\033[0m"
    BOLD = "\033[1m"
    RED = "\033[91m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    BLUE = "\033[94m"
    MAGENTA = "\033[95m"
    CYAN = "\033[96m"
    GRAY = "\033[90m"


class ColoredFormatter(logging.Formatter):
    """Custom formatter with colors for different log levels."""

    FORMATS = {
        logging.DEBUG: LogColors.GRAY + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
        logging.INFO: LogColors.GREEN + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
        logging.WARNING: LogColors.YELLOW + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
        logging.ERROR: LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
        logging.CRITICAL: LogColors.BOLD + LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s",
    }

    def format(self, record):
        log_fmt = self.FORMATS.get(record.levelno)
        formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
        return formatter.format(record)


class FileFormatter(logging.Formatter):
    """File formatter with timestamps and detailed information."""

    def __init__(self):
        super().__init__(
            fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S"
        )


def setup_logging(
    log_level: str = "INFO",
    log_file: Optional[str] = None,
    log_to_console: bool = True,
) -> None:
    """
    Set up logging configuration for the application.

    Args:
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
        log_file: Optional path to log file. If None, logs only to console
        log_to_console: Whether to log to console (default: True)
    """
    # Convert log level string to logging constant
    numeric_level = getattr(logging, log_level.upper(), logging.INFO)

    # Get root logger
    root_logger = logging.getLogger()
    root_logger.setLevel(numeric_level)

    # Remove existing handlers
    root_logger.handlers.clear()

    # Console handler
    if log_to_console:
        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setLevel(numeric_level)
        console_handler.setFormatter(ColoredFormatter())
        root_logger.addHandler(console_handler)

    # File handler
    if log_file:
        log_path = Path(log_file)
        log_path.parent.mkdir(parents=True, exist_ok=True)

        file_handler = logging.FileHandler(log_file, encoding="utf-8")
        file_handler.setLevel(numeric_level)
        file_handler.setFormatter(FileFormatter())
        root_logger.addHandler(file_handler)

    # Suppress overly verbose third-party loggers
    logging.getLogger("chromadb").setLevel(logging.WARNING)
    logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("httpx").setLevel(logging.WARNING)
    logging.getLogger("httpcore").setLevel(logging.WARNING)
    logging.getLogger("openai").setLevel(logging.WARNING)
    logging.getLogger("anthropic").setLevel(logging.WARNING)

    root_logger.info(f"Logging initialized at {log_level} level")


def get_logger(name: str) -> logging.Logger:
    """
    Get a logger instance with the specified name.

    Args:
        name: Logger name (typically __name__ of the module)

    Returns:
        Logger instance
    """
    return logging.getLogger(name)


def log_function_call(logger: logging.Logger):
    """
    Decorator to log function calls with arguments and return values.

    Usage:
        @log_function_call(logger)
        def my_function(arg1, arg2):
            return result
    """
    def decorator(func):
        def wrapper(*args, **kwargs):
            func_name = func.__name__
            logger.debug(f"Calling {func_name} with args={args}, kwargs={kwargs}")
            try:
                result = func(*args, **kwargs)
                logger.debug(f"{func_name} completed successfully")
                return result
            except Exception as e:
                logger.error(f"{func_name} failed with error: {e}", exc_info=True)
                raise
        return wrapper
    return decorator


# Structured logging helpers
def log_pdf_processing(logger: logging.Logger, filename: str, pages: int, chunks: int):
    """Log PDF processing completion."""
    logger.info(f"Processed PDF: {filename} | Pages: {pages} | Chunks: {chunks}")


def log_retrieval(logger: logging.Logger, query: str, num_results: int, duration_ms: float):
    """Log retrieval operation."""
    logger.info(f"Retrieved {num_results} chunks for query in {duration_ms:.2f}ms")
    logger.debug(f"Query: {query[:100]}...")


def log_llm_call(logger: logging.Logger, model: str, tokens_in: int, tokens_out: int, duration_s: float):
    """Log LLM API call."""
    logger.info(
        f"LLM call: {model} | In: {tokens_in} tokens | Out: {tokens_out} tokens | Duration: {duration_s:.2f}s"
    )


def log_embedding_generation(logger: logging.Logger, num_chunks: int, duration_s: float):
    """Log embedding generation."""
    chunks_per_sec = num_chunks / duration_s if duration_s > 0 else 0
    logger.info(f"Generated embeddings for {num_chunks} chunks in {duration_s:.2f}s ({chunks_per_sec:.1f} chunks/s)")


def log_cache_hit(logger: logging.Logger, cache_type: str, key: str):
    """Log cache hit."""
    logger.debug(f"Cache hit: {cache_type} | Key: {key[:50]}")


def log_cache_miss(logger: logging.Logger, cache_type: str, key: str):
    """Log cache miss."""
    logger.debug(f"Cache miss: {cache_type} | Key: {key[:50]}")


def log_error(logger: logging.Logger, operation: str, error: Exception):
    """Log error with context."""
    logger.error(f"Error in {operation}: {str(error)}", exc_info=True)