""" Utility functions for the Business Intelligence Dashboard. This module contains helper functions for data type detection, validation, and common operations. """ from typing import List, Optional, Tuple import pandas as pd import numpy as np def detect_column_types(df: pd.DataFrame) -> Tuple[List[str], List[str], List[str]]: """ Detect column types in a DataFrame. Args: df: Input DataFrame Returns: Tuple of (numerical_columns, categorical_columns, date_columns) """ numerical = [] categorical = [] date_columns = [] for col in df.columns: if pd.api.types.is_datetime64_any_dtype(df[col]): date_columns.append(col) elif pd.api.types.is_numeric_dtype(df[col]): numerical.append(col) else: categorical.append(col) return numerical, categorical, date_columns def validate_dataframe(df: pd.DataFrame) -> Tuple[bool, Optional[str]]: """ Validate that DataFrame is not empty and has valid structure. Args: df: DataFrame to validate Returns: Tuple of (is_valid, error_message) """ if df is None or df.empty: return False, "DataFrame is empty or None" if len(df.columns) == 0: return False, "DataFrame has no columns" return True, None def format_number(value: float, decimals: int = 2) -> str: """ Format a number with specified decimal places. Args: value: Number to format decimals: Number of decimal places Returns: Formatted string """ if pd.isna(value): return "N/A" return f"{value:,.{decimals}f}" def safe_divide(numerator: float, denominator: float) -> float: """ Safely divide two numbers, returning 0 if denominator is 0. Args: numerator: Numerator value denominator: Denominator value Returns: Division result or 0 """ if denominator == 0 or pd.isna(denominator): return 0.0 return numerator / denominator def get_missing_value_summary(df: pd.DataFrame) -> pd.DataFrame: """ Get summary of missing values in DataFrame. Args: df: Input DataFrame Returns: DataFrame with missing value statistics """ missing = df.isnull().sum() missing_pct = (missing / len(df)) * 100 summary = pd.DataFrame({ 'Column': missing.index, 'Missing_Count': missing.values, 'Missing_Percentage': missing_pct.values }) return summary[summary['Missing_Count'] > 0].sort_values( 'Missing_Count', ascending=False )