"""
Utility functions and constants for the Business Intelligence Dashboard.
Contains helper functions for data validation, formatting, and common operations.
Works with ANY dataset - no hardcoded column names.
"""

import pandas as pd
import numpy as np
from typing import List, Dict, Any, Optional, Tuple

# Constants
SUPPORTED_FILE_EXTENSIONS = ['.csv', '.xlsx', '.xls']
MAX_PREVIEW_ROWS = 100
DEFAULT_PREVIEW_ROWS = 10
DATE_FORMATS = ['%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d-%m-%Y']
NUMERIC_DTYPES = ['int64', 'int32', 'float64', 'float32']
AGGREGATION_METHODS = ['sum', 'mean', 'median', 'count', 'min', 'max']


def validate_file_extension(filename: str) -> Tuple[bool, str]:
    """
    Validate if the uploaded file has a supported extension.
    
    Args:
        filename: Name of the uploaded file
        
    Returns:
        Tuple of (is_valid, message)
    """
    if filename is None:
        return False, "No file uploaded. Please upload a CSV or Excel file."
    
    ext = '.' + filename.split('.')[-1].lower() if '.' in filename else ''
    
    if ext not in SUPPORTED_FILE_EXTENSIONS:
        return False, f"Unsupported file format '{ext}'. Please upload CSV or Excel files."
    
    return True, "File format is valid."


def format_number(value: float, decimals: int = 2) -> str:
    """
    Format a number with thousands separator and decimal places.
    
    Args:
        value: Number to format
        decimals: Number of decimal places
        
    Returns:
        Formatted string representation
    """
    if pd.isna(value):
        return "N/A"
    
    if abs(value) >= 1_000_000:
        return f"{value/1_000_000:,.{decimals}f}M"
    elif abs(value) >= 1_000:
        return f"{value/1_000:,.{decimals}f}K"
    else:
        return f"{value:,.{decimals}f}"


def get_column_type(dtype) -> str:
    """
    Determine the general type of a column based on its dtype.
    
    Args:
        dtype: pandas dtype of the column
        
    Returns:
        String indicating 'numeric', 'categorical', or 'datetime'
    """
    dtype_str = str(dtype)
    
    if 'int' in dtype_str or 'float' in dtype_str:
        return 'numeric'
    elif 'datetime' in dtype_str:
        return 'datetime'
    else:
        return 'categorical'


def detect_date_columns(df: pd.DataFrame) -> List[str]:
    """
    Detect columns that likely contain date information.
    
    Args:
        df: pandas DataFrame to analyze
        
    Returns:
        List of column names that appear to be dates
    """
    date_columns = []
    
    for col in df.columns:
        if df[col].dtype == 'datetime64[ns]':
            date_columns.append(col)
        elif df[col].dtype == 'object':
            sample = df[col].dropna().head(100)
            if len(sample) > 0:
                try:
                    pd.to_datetime(sample)
                    date_columns.append(col)
                except (ValueError, TypeError):
                    pass
    
    return date_columns


def calculate_percentage(part: float, whole: float) -> float:
    """
    Calculate percentage safely, handling division by zero.
    
    Args:
        part: Numerator value
        whole: Denominator value
        
    Returns:
        Percentage value or 0 if whole is 0
    """
    if whole == 0 or pd.isna(whole):
        return 0.0
    return (part / whole) * 100


def truncate_string(text: str, max_length: int = 50) -> str:
    """
    Truncate a string to a maximum length with ellipsis.
    
    Args:
        text: String to truncate
        max_length: Maximum allowed length
        
    Returns:
        Truncated string with ellipsis if needed
    """
    if pd.isna(text):
        return ""
    text = str(text)
    if len(text) <= max_length:
        return text
    return text[:max_length-3] + "..."


def get_dataframe_memory_usage(df: pd.DataFrame) -> str:
    """
    Get human-readable memory usage of a DataFrame.
    
    Args:
        df: pandas DataFrame
        
    Returns:
        Formatted string of memory usage
    """
    bytes_used = df.memory_usage(deep=True).sum()
    
    if bytes_used >= 1_073_741_824:  # 1 GB
        return f"{bytes_used / 1_073_741_824:.2f} GB"
    elif bytes_used >= 1_048_576:  # 1 MB
        return f"{bytes_used / 1_048_576:.2f} MB"
    elif bytes_used >= 1024:  # 1 KB
        return f"{bytes_used / 1024:.2f} KB"
    else:
        return f"{bytes_used} bytes"


def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
    """
    Perform division safely, returning default if denominator is zero.
    
    Args:
        numerator: The dividend
        denominator: The divisor
        default: Value to return if division is not possible
        
    Returns:
        Result of division or default value
    """
    if denominator == 0 or pd.isna(denominator) or pd.isna(numerator):
        return default
    return numerator / denominator


def create_info_message(title: str, content: str, msg_type: str = "info") -> str:
    """
    Create a formatted information message.
    
    Args:
        title: Message title
        content: Message content
        msg_type: Type of message ('info', 'warning', 'error', 'success')
        
    Returns:
        Formatted markdown string
    """
    icons = {
        "info": "ℹ️",
        "warning": "⚠️",
        "error": "❌",
        "success": "✅"
    }
    icon = icons.get(msg_type, "ℹ️")
    return f"### {icon} {title}\n\n{content}"


def get_numeric_columns(df: pd.DataFrame) -> List[str]:
    """
    Get list of numeric columns in a DataFrame.
    
    Args:
        df: pandas DataFrame
        
    Returns:
        List of numeric column names
    """
    return df.select_dtypes(include=[np.number]).columns.tolist()


def get_categorical_columns(df: pd.DataFrame) -> List[str]:
    """
    Get list of categorical columns in a DataFrame.
    
    Args:
        df: pandas DataFrame
        
    Returns:
        List of categorical column names
    """
    return df.select_dtypes(include=['object', 'category']).columns.tolist()


def get_datetime_columns(df: pd.DataFrame) -> List[str]:
    """
    Get list of datetime columns in a DataFrame.
    
    Args:
        df: pandas DataFrame
        
    Returns:
        List of datetime column names
    """
    return df.select_dtypes(include=['datetime64']).columns.tolist()