Spaces:

satyakimitra
/

QuerySphere

Running

App Files Files Community

QuerySphere / utils /error_handler.py

satyakimitra

first commit

0a4529c about 2 months ago

raw

history blame contribute delete

15.3 kB

	# DEPENDENCIES
	import traceback
	from enum import Enum
	from typing import Any
	from typing import Dict
	from typing import Optional
	from config.models import ErrorResponse
	from config.logging_config import get_logger


	# Setup Logger
	logger = get_logger(__name__)


	class ErrorCode(str, Enum):
	"""
	Standardized error codes
	"""
	# File errors (1xxx)
	FILE_NOT_FOUND = "FILE_1001"
	FILE_TOO_LARGE = "FILE_1002"
	INVALID_FILE_TYPE = "FILE_1003"
	FILE_CORRUPTED = "FILE_1004"
	FILE_UPLOAD_FAILED = "FILE_1005"

	# Parsing errors (2xxx)
	PARSE_ERROR = "PARSE_2001"
	PDF_PARSE_ERROR = "PARSE_2002"
	DOCX_PARSE_ERROR = "PARSE_2003"
	TEXT_ENCODING_ERROR = "PARSE_2004"

	# Processing errors (3xxx)
	CHUNKING_ERROR = "PROC_3001"
	EMBEDDING_ERROR = "PROC_3002"
	INDEXING_ERROR = "PROC_3003"


	# LLM errors (5xxx)
	LLM_ERROR = "LLM_4001"
	OLLAMA_NOT_AVAILABLE = "LLM_4002"
	GENERATION_TIMEOUT = "LLM_4003"
	CONTEXT_TOO_LONG = "LLM_4004"

	# Validation errors (6xxx)
	VALIDATION_ERROR = "VAL_5001"
	INVALID_INPUT = "VAL_5002"
	MISSING_REQUIRED_FIELD = "VAL_5003"

	# System errors (7xxx)
	SYSTEM_ERROR = "SYS_6001"
	DATABASE_ERROR = "SYS_6002"
	CACHE_ERROR = "SYS_6003"
	CONFIGURATION_ERROR = "SYS_6004"

	# Vector Search
	VECTOR_SEARCH_ERROR = "RETR_7001"
	KEYWORD_SEARCH_ERROR = "RETR_7002"
	RERANKING_ERROR = "RETR_7003"
	CITATION_ERROR = "RETR_7004"
	CONTEXT_ASSEMBLY_ERROR = "RETR_7005"
	HYBRID_RETRIEVAL_ERROR = "RETR_7006"

	# LLM Generation
	TOKEN_MANAGEMENT_ERROR = "GEN_8001"
	TEMPERATURE_CONTROL_ERROR = "GEN_8002"
	CITATION_FORMATTING_ERROR = "GEN_8003"
	PROMPT_BUILDING_ERROR = "GEN_8004"
	LLM_CLIENT_ERROR = "GEN_8005"
	RESPONSE_GENERATION_ERROR = "GEN_8006"

	# Generic
	UNKNOWN_ERROR = "ERR_9999"


	class RAGException(Exception):
	"""
	Base exception for RAG system
	"""
	def __init__(self, message: str, error_code: ErrorCode = ErrorCode.UNKNOWN_ERROR, details: Optional[Dict[str, Any]] = None, original_error: Optional[Exception] = None):
	self.message = message
	self.error_code = error_code
	self.details = details or {}
	self.original_error = original_error

	super().__init__(self.message)


	def to_dict(self) -> dict:
	"""
	Convert exception to dictionary
	"""
	error_dict = {"error" : self.error_code.value,
	"message" : self.message,
	"details" : self.details,
	}

	if self.original_error:
	error_dict["original_error"] = str(self.original_error)

	return error_dict


	def to_error_response(self) -> ErrorResponse:
	"""
	Convert to ErrorResponse model
	"""
	return ErrorResponse(error = self.error_code.value,
	message = self.message,
	detail = self.details if self.details else None,
	)


	# Specific Exceptions
	class FileException(RAGException):
	"""
	File-related errors
	"""
	pass


	class FileNotFoundError(FileException):
	"""
	File not found
	"""
	def __init__(self, file_path: str, **kwargs):
	super().__init__(message = f"File not found: {file_path}",
	error_code = ErrorCode.FILE_NOT_FOUND,
	details = {"file_path": file_path},
	**kwargs
	)


	class FileTooLargeError(FileException):
	"""
	File exceeds size limit
	"""
	def __init__(self, file_size: int, max_size: int, **kwargs):
	super().__init__(message = f"File size {file_size} bytes exceeds maximum {max_size} bytes",
	error_code = ErrorCode.FILE_TOO_LARGE,
	details = {"file_size": file_size, "max_size": max_size},
	**kwargs
	)


	class InvalidFileTypeError(FileException):
	"""
	Invalid file type
	"""
	def __init__(self, file_type: str, allowed_types: list, **kwargs):
	super().__init__(message = f"Invalid file type '{file_type}'. Allowed: {', '.join(allowed_types)}",
	error_code = ErrorCode.INVALID_FILE_TYPE,
	details = {"file_type": file_type, "allowed_types": allowed_types},
	**kwargs
	)


	class ParsingException(RAGException):
	"""
	Document parsing errors
	"""
	pass


	class PDFParseError(ParsingException):
	"""
	PDF parsing failed
	"""
	def __init__(self, file_path: str, **kwargs):
	super().__init__(message = f"Failed to parse PDF: {file_path}",
	error_code = ErrorCode.PDF_PARSE_ERROR,
	details = {"file_path": file_path},
	**kwargs
	)


	class DOCXParseError(ParsingException):
	"""
	DOCX parsing failed
	"""
	def __init__(self, file_path: str, **kwargs):
	super().__init__(message = f"Failed to parse DOCX: {file_path}",
	error_code = ErrorCode.DOCX_PARSE_ERROR,
	details = {"file_path": file_path},
	**kwargs
	)


	class TextEncodingError(ParsingException):
	"""
	Text encoding error
	"""
	def __init__(self, file_path: str, encoding: str, **kwargs):
	super().__init__(message = f"Failed to decode file {file_path} with encoding {encoding}",
	error_code = ErrorCode.TEXT_ENCODING_ERROR,
	details = {"file_path": file_path, "encoding": encoding},
	**kwargs
	)


	class OCRException(RAGException):
	"""
	OCR-specific exceptions
	"""
	pass


	class ArchiveException(RAGException):
	"""
	Archive-specific exceptions
	"""
	pass


	class ProcessingException(RAGException):
	"""
	Processing errors
	"""
	pass


	class ChunkingError(ProcessingException):
	"""
	Chunking failed
	"""
	def __init__(self, document_id: str, **kwargs):
	super().__init__(message = f"Failed to chunk document: {document_id}",
	error_code = ErrorCode.CHUNKING_ERROR,
	details = {"document_id": document_id},
	**kwargs
	)


	class EmbeddingError(ProcessingException):
	"""
	Embedding generation failed
	"""
	def __init__(self, text_length: int, **kwargs):
	super().__init__(message = f"Failed to generate embeddings for text of length {text_length}",
	error_code = ErrorCode.EMBEDDING_ERROR,
	details = {"text_length": text_length},
	**kwargs
	)


	class IndexingError(ProcessingException):
	"""
	Indexing failed
	"""
	def __init__(self, index_type: str, **kwargs):
	super().__init__(message = f"Failed to index into {index_type}",
	error_code = ErrorCode.INDEXING_ERROR,
	details = {"index_type": index_type},
	**kwargs
	)


	class VectorSearchError(RAGException):
	"""
	Vector search errors
	"""
	pass


	class KeywordSearchError(RAGException):
	"""
	Keyword search errors
	"""
	pass


	class RerankingError(RAGException):
	"""
	Reranking errors
	"""
	pass


	class CitationError(RAGException):
	"""
	Citation tracking errors
	"""
	pass


	class ContextAssemblyError(RAGException):
	"""
	Context assembly errors
	"""
	pass


	class HybridRetrievalError(RAGException):
	"""
	Hybrid retrieval errors
	"""
	pass


	class TokenManagementError(RAGException):
	"""
	Token management errors
	"""
	pass


	class TemperatureControlError(RAGException):
	"""
	Temperature control errors
	"""
	pass


	class CitationFormattingError(RAGException):
	"""
	Citation formatting errors
	"""
	pass


	class PromptBuildingError(RAGException):
	"""
	Prompt building errors
	"""
	pass


	class LLMClientError(RAGException):
	"""
	LLM client errors
	"""
	pass


	class ResponseGenerationError(RAGException):
	"""
	Response generation errors
	"""
	pass


	class NoResultsFoundError(RAGException):
	"""
	No results found
	"""
	def __init__(self, query: str, **kwargs):
	super().__init__(message = f"No results found for query: {query}",
	error_code = ErrorCode.NO_RESULTS_FOUND,
	details = {"query": query},
	**kwargs
	)


	class LLMException(RAGException):
	"""
	LLM errors
	"""
	pass


	class OllamaNotAvailableError(LLMException):
	"""
	Ollama service not available
	"""
	def __init__(self, base_url: str, **kwargs):
	super().__init__(message = f"Ollama service not available at {base_url}",
	error_code = ErrorCode.OLLAMA_NOT_AVAILABLE,
	details = {"base_url": base_url},
	**kwargs
	)


	class GenerationTimeoutError(LLMException):
	"""
	Generation timeout
	"""
	def __init__(self, timeout: int, **kwargs):
	super().__init__(message = f"LLM generation timed out after {timeout} seconds",
	error_code = ErrorCode.GENERATION_TIMEOUT,
	details = {"timeout": timeout},
	**kwargs
	)


	class ContextTooLongError(LLMException):
	"""
	Context exceeds window
	"""
	def __init__(self, context_length: int, max_length: int, **kwargs):
	super().__init__(message = f"Context length {context_length} exceeds maximum {max_length}",
	error_code = ErrorCode.CONTEXT_TOO_LONG,
	details = {"context_length": context_length, "max_length": max_length},
	**kwargs
	)


	class ValidationException(RAGException):
	"""
	Validation errors
	"""
	pass


	# Error Handler Decorators
	def handle_errors(error_type: type = RAGException, log_error: bool = True, reraise: bool = True):
	"""
	Decorator to handle errors in functions

	Arguments:
	----------
	error_type { RAGException } : Exception type to catch (default: RAGException)

	log_error { bool } : Whether to log the error

	reraise { bool } : Whether to reraise after handling
	"""
	def decorator(func):
	def wrapper(args, *kwargs):
	try:
	return func(args, *kwargs)

	except error_type as e:
	if log_error:
	logger.error(f"Error in {func.__name__}: {str(e)}", exc_info=True)

	if reraise:
	raise

	else:
	return None

	return wrapper
	return decorator


	def safe_execute(func, args, default = None, log_errors: bool = True, *kwargs):
	"""
	Safely execute a function with error handling

	Arguments:
	----------
	func : Function to execute

	*args : Function arguments

	default : Default value on error

	log_errors : Whether to log errors

	**kwargs : Function keyword arguments

	Returns:
	--------
	Function result or default on error
	"""
	try:
	return func(args, *kwargs)

	except Exception as e:
	if log_errors:
	logger.error(f"Error in {func.__name__}: {str(e)}", exc_info=True)

	return default


	class ErrorContext:
	"""
	Context manager for error handling
	"""

	def __init__(self, operation: str, raise_on_error: bool = True, log_on_error: bool = True, **context_data):
	self.operation = operation
	self.raise_on_error = raise_on_error
	self.log_on_error = log_on_error
	self.context_data = context_data
	self.error: Optional[Exception] = None


	def __enter__(self):
	logger.debug(f"Starting: {self.operation}")

	return self


	def __exit__(self, exc_type, exc_val, exc_tb):
	if exc_type is not None:
	self.error = exc_val

	if self.log_on_error:
	logger.error(f"Error in {self.operation}: {exc_val}",
	extra = self.context_data,
	exc_info = True,
	)

	if not self.raise_on_error:
	# Suppress exception
	return True

	return False


	def log_and_raise(error: Exception, message: str,**context):
	"""
	Log error and raise

	Arguments:
	----------
	error : Exception to raise

	message : Log message

	**context : Additional context to log
	"""
	logger.error(message, extra=context, exc_info=True)

	raise error


	def format_error_message(error: Exception, include_traceback: bool = False) -> str:
	"""
	Format error message for display.

	Arguments:
	----------
	error : Exception

	include_traceback : Include full traceback

	Returns:
	--------
	{ str } : Formatted error message
	"""
	if isinstance(error, RAGException):
	message = f"[{error.error_code.value}] {error.message}"

	if error.details:
	message += f"\nDetails: {error.details}"

	else:
	message = f"{type(error).__name__}: {str(error)}"

	if include_traceback:
	message += f"\n\nTraceback:\n{''.join(traceback.format_tb(error.__traceback__))}"

	return message


	class EvaluationError(RAGException):
	"""
	Custom exception for evaluation-related errors
	"""
	def __init__(self, message: str, original_error: Optional[Exception] = None):
	super().__init__(error_type = "EvaluationError",
	message = message,
	original_error = original_error,
	status_code = 500,
	)

	self.evaluation_context = None


	def with_context(self, context: Dict[str, Any]) -> 'EvaluationError':
	"""
	Add evaluation context to the error
	"""
	self.evaluation_context = context

	return self