BI-dashboard / utils.py
Lohith Venkat Chamakura
Initial commit
48909ac
"""
Utility functions for the Business Intelligence Dashboard.
This module contains helper functions for data type detection,
validation, and common operations.
"""
from typing import List, Optional, Tuple
import pandas as pd
import numpy as np
def detect_column_types(df: pd.DataFrame) -> Tuple[List[str], List[str], List[str]]:
"""
Detect column types in a DataFrame.
Args:
df: Input DataFrame
Returns:
Tuple of (numerical_columns, categorical_columns, date_columns)
"""
numerical = []
categorical = []
date_columns = []
for col in df.columns:
if pd.api.types.is_datetime64_any_dtype(df[col]):
date_columns.append(col)
elif pd.api.types.is_numeric_dtype(df[col]):
numerical.append(col)
else:
categorical.append(col)
return numerical, categorical, date_columns
def validate_dataframe(df: pd.DataFrame) -> Tuple[bool, Optional[str]]:
"""
Validate that DataFrame is not empty and has valid structure.
Args:
df: DataFrame to validate
Returns:
Tuple of (is_valid, error_message)
"""
if df is None or df.empty:
return False, "DataFrame is empty or None"
if len(df.columns) == 0:
return False, "DataFrame has no columns"
return True, None
def format_number(value: float, decimals: int = 2) -> str:
"""
Format a number with specified decimal places.
Args:
value: Number to format
decimals: Number of decimal places
Returns:
Formatted string
"""
if pd.isna(value):
return "N/A"
return f"{value:,.{decimals}f}"
def safe_divide(numerator: float, denominator: float) -> float:
"""
Safely divide two numbers, returning 0 if denominator is 0.
Args:
numerator: Numerator value
denominator: Denominator value
Returns:
Division result or 0
"""
if denominator == 0 or pd.isna(denominator):
return 0.0
return numerator / denominator
def get_missing_value_summary(df: pd.DataFrame) -> pd.DataFrame:
"""
Get summary of missing values in DataFrame.
Args:
df: Input DataFrame
Returns:
DataFrame with missing value statistics
"""
missing = df.isnull().sum()
missing_pct = (missing / len(df)) * 100
summary = pd.DataFrame({
'Column': missing.index,
'Missing_Count': missing.values,
'Missing_Percentage': missing_pct.values
})
return summary[summary['Missing_Count'] > 0].sort_values(
'Missing_Count', ascending=False
)