Spaces:
Sleeping
Sleeping
| """ | |
| Utility functions for the Business Intelligence Dashboard. | |
| This module contains helper functions for data type detection, | |
| validation, and common operations. | |
| """ | |
| from typing import List, Optional, Tuple | |
| import pandas as pd | |
| import numpy as np | |
| def detect_column_types(df: pd.DataFrame) -> Tuple[List[str], List[str], List[str]]: | |
| """ | |
| Detect column types in a DataFrame. | |
| Args: | |
| df: Input DataFrame | |
| Returns: | |
| Tuple of (numerical_columns, categorical_columns, date_columns) | |
| """ | |
| numerical = [] | |
| categorical = [] | |
| date_columns = [] | |
| for col in df.columns: | |
| if pd.api.types.is_datetime64_any_dtype(df[col]): | |
| date_columns.append(col) | |
| elif pd.api.types.is_numeric_dtype(df[col]): | |
| numerical.append(col) | |
| else: | |
| categorical.append(col) | |
| return numerical, categorical, date_columns | |
| def validate_dataframe(df: pd.DataFrame) -> Tuple[bool, Optional[str]]: | |
| """ | |
| Validate that DataFrame is not empty and has valid structure. | |
| Args: | |
| df: DataFrame to validate | |
| Returns: | |
| Tuple of (is_valid, error_message) | |
| """ | |
| if df is None or df.empty: | |
| return False, "DataFrame is empty or None" | |
| if len(df.columns) == 0: | |
| return False, "DataFrame has no columns" | |
| return True, None | |
| def format_number(value: float, decimals: int = 2) -> str: | |
| """ | |
| Format a number with specified decimal places. | |
| Args: | |
| value: Number to format | |
| decimals: Number of decimal places | |
| Returns: | |
| Formatted string | |
| """ | |
| if pd.isna(value): | |
| return "N/A" | |
| return f"{value:,.{decimals}f}" | |
| def safe_divide(numerator: float, denominator: float) -> float: | |
| """ | |
| Safely divide two numbers, returning 0 if denominator is 0. | |
| Args: | |
| numerator: Numerator value | |
| denominator: Denominator value | |
| Returns: | |
| Division result or 0 | |
| """ | |
| if denominator == 0 or pd.isna(denominator): | |
| return 0.0 | |
| return numerator / denominator | |
| def get_missing_value_summary(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Get summary of missing values in DataFrame. | |
| Args: | |
| df: Input DataFrame | |
| Returns: | |
| DataFrame with missing value statistics | |
| """ | |
| missing = df.isnull().sum() | |
| missing_pct = (missing / len(df)) * 100 | |
| summary = pd.DataFrame({ | |
| 'Column': missing.index, | |
| 'Missing_Count': missing.values, | |
| 'Missing_Percentage': missing_pct.values | |
| }) | |
| return summary[summary['Missing_Count'] > 0].sort_values( | |
| 'Missing_Count', ascending=False | |
| ) | |