File size: 2,701 Bytes
48909ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
Utility functions for the Business Intelligence Dashboard.

This module contains helper functions for data type detection,
validation, and common operations.
"""

from typing import List, Optional, Tuple
import pandas as pd
import numpy as np


def detect_column_types(df: pd.DataFrame) -> Tuple[List[str], List[str], List[str]]:
    """
    Detect column types in a DataFrame.
    
    Args:
        df: Input DataFrame
        
    Returns:
        Tuple of (numerical_columns, categorical_columns, date_columns)
    """
    numerical = []
    categorical = []
    date_columns = []
    
    for col in df.columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            date_columns.append(col)
        elif pd.api.types.is_numeric_dtype(df[col]):
            numerical.append(col)
        else:
            categorical.append(col)
    
    return numerical, categorical, date_columns


def validate_dataframe(df: pd.DataFrame) -> Tuple[bool, Optional[str]]:
    """
    Validate that DataFrame is not empty and has valid structure.
    
    Args:
        df: DataFrame to validate
        
    Returns:
        Tuple of (is_valid, error_message)
    """
    if df is None or df.empty:
        return False, "DataFrame is empty or None"
    
    if len(df.columns) == 0:
        return False, "DataFrame has no columns"
    
    return True, None


def format_number(value: float, decimals: int = 2) -> str:
    """
    Format a number with specified decimal places.
    
    Args:
        value: Number to format
        decimals: Number of decimal places
        
    Returns:
        Formatted string
    """
    if pd.isna(value):
        return "N/A"
    return f"{value:,.{decimals}f}"


def safe_divide(numerator: float, denominator: float) -> float:
    """
    Safely divide two numbers, returning 0 if denominator is 0.
    
    Args:
        numerator: Numerator value
        denominator: Denominator value
        
    Returns:
        Division result or 0
    """
    if denominator == 0 or pd.isna(denominator):
        return 0.0
    return numerator / denominator


def get_missing_value_summary(df: pd.DataFrame) -> pd.DataFrame:
    """
    Get summary of missing values in DataFrame.
    
    Args:
        df: Input DataFrame
        
    Returns:
        DataFrame with missing value statistics
    """
    missing = df.isnull().sum()
    missing_pct = (missing / len(df)) * 100
    
    summary = pd.DataFrame({
        'Column': missing.index,
        'Missing_Count': missing.values,
        'Missing_Percentage': missing_pct.values
    })
    
    return summary[summary['Missing_Count'] > 0].sort_values(
        'Missing_Count', ascending=False
    )